Revize 0a2832fb
Přidáno uživatelem Jakub Vašta před asi 4 roky(ů)
modules/crawler/Utilities/Database/DatabaseLoader.py | ||
---|---|---|
11 | 11 |
MONGODB_DATA_DATABASE = "open-data-db" |
12 | 12 |
# mongodb collection with aviable datasets |
13 | 13 |
MONGODB_DATASET_COLLECTION = "DATASETS" |
14 |
# mongodb collection with aviable diveces of datasets |
|
15 |
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES" |
|
16 |
# mongodb collection with aviable diveces of datasets |
|
17 |
MONGODB_DATASET_LOADED_FILES_COLLECTION = "FILES" |
|
14 | 18 |
|
15 | 19 |
# Path to processed data |
16 | 20 |
PROCESSED_DATA_PATH = "ProcessedData/" |
... | ... | |
63 | 67 |
|
64 | 68 |
name = csv_column[0] |
65 | 69 |
|
70 |
if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP": |
|
71 |
continue |
|
72 |
|
|
66 | 73 |
occurrence = csv_column[1] |
67 | 74 |
date = csv_column[2] |
68 | 75 |
database_data_line = DatabaseDataLine.DatabaseDataLine( |
... | ... | |
79 | 86 |
return date_dict |
80 | 87 |
|
81 | 88 |
|
82 |
def load_data_to_database(dataset_name, data_dic):
|
|
89 |
def load_data_to_database(database_connection,dataset_name, data_dic, file_name):
|
|
83 | 90 |
""" |
84 | 91 |
Takes data_dic created in method get_data_from_file |
85 | 92 |
and loads into into database where collection name is dataset_name + data_dic key |
86 | 93 |
and data lines are line in collection |
87 | 94 |
|
88 | 95 |
Args: |
89 |
dataset_name: name of dataset that has existing configuration file |
|
96 |
database_connection: created connection to a MONGODB |
|
97 |
config: loaded configuration file of dataset |
|
90 | 98 |
data_dic: dictionary of data lines created in get_data_from_file |
99 |
file_name: name of file containing data |
|
100 |
""" |
|
101 |
|
|
102 |
for date in data_dic: |
|
103 |
dataset_collections = database_connection[dataset_name] |
|
104 |
dataset_collections.insert_one({'name': dataset_name + date}) |
|
105 |
date_dataset = database_connection[dataset_name + date] |
|
106 |
date_dataset.insert_many(data_dic[date]) |
|
107 |
|
|
108 |
collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION] |
|
109 |
collection_loaded_files.insert_one({'file': file_name}) |
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
def check_or_update_datasets_collection(database_connection,config): |
|
91 | 114 |
""" |
92 |
database = create_database_connection()
|
|
115 |
Checks if DATASETS collection contains dataset and if display name was not updated
|
|
93 | 116 |
|
117 |
Args: |
|
118 |
database_connection: created connection to a MONGODB |
|
119 |
config: loaded configuration file of dataset |
|
120 |
""" |
|
94 | 121 |
# collection where are specified aviable datasets |
95 |
collection_datasets = database[MONGODB_DATASET_COLLECTION] |
|
122 |
collection_datasets = database_connection[MONGODB_DATASET_COLLECTION] |
|
123 |
|
|
124 |
dataset_name = config['dataset-name'] |
|
125 |
display_name = config['display-name'] |
|
126 |
|
|
127 |
query = {'key-name': dataset_name} |
|
96 | 128 |
|
97 | 129 |
# check if newly added data already have a dataset specified in collection |
98 |
dataset_present = collection_datasets.find_one({'name': dataset_name})
|
|
130 |
dataset_present = collection_datasets.find_one(query)
|
|
99 | 131 |
|
100 | 132 |
if dataset_present is None: |
101 |
collection_datasets.insert_one({'name': dataset_name}) |
|
133 |
collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name}) |
|
134 |
elif dataset_present['display-name'] != display_name: |
|
135 |
newvalues = { "$set": { 'display-name': display_name } } |
|
136 |
collection_datasets.update_one(query, newvalues) |
|
102 | 137 |
|
103 |
for date in data_dic: |
|
104 |
dataset_collections = database[dataset_name] |
|
105 |
dataset_collections.insert_one({'name': dataset_name + date}) |
|
106 |
date_dataset = database[dataset_name + date] |
|
107 |
date_dataset.insert_many(data_dic[date]) |
|
138 |
|
|
139 |
def update_devices_collection(database_connection,config): |
|
140 |
""" |
|
141 |
Checks if dataset_name collection contains every device with current set up |
|
142 |
|
|
143 |
Args: |
|
144 |
database_connection: created connection to a MONGODB |
|
145 |
config: loaded configuration file of dataset |
|
146 |
""" |
|
147 |
dataset_name = config['dataset-name'] |
|
148 |
|
|
149 |
collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION] |
|
150 |
|
|
151 |
collection_devices.delete_many({}) |
|
152 |
|
|
153 |
devices = config['devices'] |
|
154 |
|
|
155 |
devices_list = list() |
|
156 |
|
|
157 |
for device in devices.keys(): |
|
158 |
if devices[device]['x'] != "SKIP" or devices[device]['y'] != "SKIP": |
|
159 |
devices_list.append({'name': device , 'x': devices[device]['x'] , 'y': devices[device]['y'] }) |
|
160 |
|
|
161 |
collection_devices.insert_many(devices_list) |
|
162 |
|
|
163 |
|
|
164 |
def check_if_database_doesnt_contain_file(database_connection,dataset_name,file_name): |
|
165 |
""" |
|
166 |
Checks if dataset_name collection contains every device with current set up |
|
167 |
|
|
168 |
Args: |
|
169 |
database_connection: created connection to a MONGODB |
|
170 |
filename: checked file name |
|
171 |
""" |
|
172 |
|
|
173 |
collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION] |
|
174 |
|
|
175 |
query = {'file': file_name} |
|
176 |
|
|
177 |
# check if newly added data already have a dataset specified in collection |
|
178 |
dataset_present = collection_loaded_files.find_one(query) |
|
179 |
|
|
180 |
if dataset_present is None: |
|
181 |
return True |
|
182 |
else: |
|
183 |
return False |
Také k dispozici: Unified diff
Re #8089
- pridana nova proměna do konfig souboru
- predalana tabulka v databasi s nazvem DATASETS
- nove obsahuje "key-name" jako klíč a "display-name" pro zobrazení