/modules/crawler/Utilities/Database/DatabaseLoader.py - Komentovat - Aplikace nad otevřenými daty (KIV) – BHVS - Redmine

aswi2020sebela-gitlab/modules/crawler/Utilities/Database/DatabaseLoader.py @ 2d129043

1	527abccd	petrh	from Utilities.Database import DatabaseDataLine
2			import pymongo
3
4	04a2b5a4	petrh	# specify mongodb connection
5	728f8c5d	petrh	MONGODB_CONNECTION = "mongodb://root:root@database"
6	04a2b5a4	petrh	# mongodb account name
7			MONGODB_ACC_NAME = "root"
8			# mongodb account password
9			MONGODB_ACC_PASSWORD = "root"
10			# mongodb data database
11			MONGODB_DATA_DATABASE = "DATA"
12			# mongodb collection with aviable datasets
13			MONGODB_DATASET_COLLECTION = "DATASETS"
14
15			# Path to processed data
16			PROCESSED_DATA_PATH = "ProcessedData/"
17
18
19			def create_database_connection():
20			"""
21			Creates connection to mongoDB
22
23			Returns:
24			Connection to mongoDB
25			"""
26			client = pymongo.MongoClient(MONGODB_CONNECTION)
27	527abccd	petrh
28	04a2b5a4	petrh	# Authenticating
29			client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
30
31			database = client[MONGODB_DATA_DATABASE]
32
33			return database
34
35
36			def get_data_from_file(filename, config):
37			"""
38			Opens processed file, reads it line by line
39			name, ocurrence, date
40			searches name in config and adds device map coordinates
41			than creates a dictionary with date without hours as key
42			and list of data lines as value.
43			Args:
44			filename: name of processed file
45			config: loaded configuration file of dataset
46
47			Returns:
48			dictionary with date without hours as key
49			and list of Datalines as value
50			"""
51			dataset_name = config["dataset-name"]
52			dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
53	527abccd	petrh
54	04a2b5a4	petrh	f = open(dataset_path + filename, "r")
55
56			devices = config["devices"]
57	527abccd	petrh	date_dict = dict()
58
59			for line in f:
60			line = line[:-1]
61
62	04a2b5a4	petrh	csv_column = line.split(";")
63	527abccd	petrh
64	04a2b5a4	petrh	name = csv_column[0]
65	2d129043	petrh
66	04a2b5a4	petrh	occurrence = csv_column[1]
67			date = csv_column[2]
68	527abccd	petrh
69	2d129043	petrh
70
71	527abccd	petrh	database_data_line = DatabaseDataLine.DatabaseDataLine(name, devices[name]["x"]
72	04a2b5a4	petrh	, devices[name]["y"], date, occurrence)
73	527abccd	petrh
74	04a2b5a4	petrh	# if you want to change table split by hours or months change this
75			date_without_hours = date[:-2]
76	527abccd	petrh	if date_without_hours not in date_dict:
77			date_dict[date_without_hours] = list()
78
79	1187e871	petrh	date_dict[date_without_hours].append(database_data_line.to_dictionary())
80	527abccd	petrh
81			return date_dict
82
83
84			def load_data_to_database(dataset_name, data_dic):
85	04a2b5a4	petrh	"""
86			Takes data_dic created in method get_data_from_file
87			and loads into into database where collection name is dataset_name + data_dic key
88			and data lines are line in collection
89	527abccd	petrh
90	04a2b5a4	petrh	Args:
91			dataset_name: name of dataset that has existing configuration file
92			data_dic: dictionary of data lines created in get_data_from_file
93			"""
94			database = create_database_connection()
95	527abccd	petrh
96	04a2b5a4	petrh	# collection where are specified aviable datasets
97			collection_datasets = database[MONGODB_DATASET_COLLECTION]
98	527abccd	petrh
99	04a2b5a4	petrh	# check if newly added data already have a dataset specified in collection
100	587b1c57	petrh	dataset_present = collection_datasets.find_one({'name': dataset_name})
101	527abccd	petrh
102			if dataset_present is None:
103			collection_datasets.insert_one({'name': dataset_name})
104
105			for date in data_dic:
106	04a2b5a4	petrh	dataset_collections = database[dataset_name]
107	527abccd	petrh	dataset_collections.insert_one({'name': dataset_name+date})
108	04a2b5a4	petrh	date_dataset = database[dataset_name + date]
109	527abccd	petrh	date_dataset.insert_many(data_dic[date])

Projekt

Obecné

Profil

ASWI - Pokročilé softwarové inženýrství » ASWI 2020 » Aplikace nad otevřenými daty (KIV) – BHVS