Revize 81980e82
Přidáno uživatelem Tomáš Ballák před téměř 4 roky(ů)
modules/crawler/Utilities/Database/database_loader.py | ||
---|---|---|
1 | 1 |
from Utilities.Database import database_data_line, database_record_logs |
2 | 2 |
from Utilities import configure_functions |
3 |
from Utilities.helpers import should_skip |
|
3 | 4 |
import pymongo |
4 | 5 |
import re |
5 | 6 |
|
... | ... | |
67 | 68 |
|
68 | 69 |
name = csv_column[0] |
69 | 70 |
|
70 |
if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
|
|
71 |
if should_skip(devices[name]):
|
|
71 | 72 |
continue |
72 | 73 |
|
73 | 74 |
occurrence = csv_column[1] |
... | ... | |
80 | 81 |
if date_without_hours not in date_dict: |
81 | 82 |
date_dict[date_without_hours] = list() |
82 | 83 |
|
83 |
date_dict[date_without_hours].append( |
|
84 |
data_line.to_dictionary()) |
|
84 |
date_dict[date_without_hours].append(data_line.to_dictionary()) |
|
85 | 85 |
|
86 | 86 |
return date_dict |
87 | 87 |
|
88 | 88 |
|
89 |
def load_data_to_database(database_connection,dataset_name, data_dic, file_name): |
|
89 |
def load_data_to_database(database_connection, dataset_name, data_dic, |
|
90 |
file_name): |
|
90 | 91 |
""" |
91 | 92 |
Takes data_dic created in method get_data_from_file |
92 | 93 |
and loads into into database where collection name is dataset_name + data_dic key |
... | ... | |
106 | 107 |
date_dataset.insert_many(data_dic[date]) |
107 | 108 |
|
108 | 109 |
|
109 |
|
|
110 |
def check_or_update_datasets_collection(database_connection,config): |
|
110 |
def check_or_update_datasets_collection(database_connection, config): |
|
111 | 111 |
""" |
112 | 112 |
Checks if DATASETS collection contains dataset and if display name was not updated |
113 | 113 |
|
... | ... | |
127 | 127 |
dataset_present = collection_datasets.find_one(query) |
128 | 128 |
|
129 | 129 |
if dataset_present is None: |
130 |
collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name,'updated': 0}) |
|
130 |
collection_datasets.insert_one({ |
|
131 |
'key-name': dataset_name, |
|
132 |
'display-name': display_name, |
|
133 |
'updated': 0 |
|
134 |
}) |
|
131 | 135 |
elif dataset_present['display-name'] != display_name: |
132 |
newvalues = { "$set": { 'display-name': display_name } }
|
|
136 |
newvalues = {"$set": {'display-name': display_name}}
|
|
133 | 137 |
collection_datasets.update_one(query, newvalues) |
134 | 138 |
|
135 | 139 |
|
... | ... | |
153 | 157 |
|
154 | 158 |
change_in_devices = False |
155 | 159 |
|
156 |
collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION] |
|
160 |
collection_devices = database_connection[ |
|
161 |
dataset_name + MONGODB_DATASET_DEVICES_COLLECTION] |
|
157 | 162 |
|
158 | 163 |
devices_cursor = collection_devices.find() |
159 | 164 |
|
... | ... | |
161 | 166 |
|
162 | 167 |
for device in devices_cursor: |
163 | 168 |
name = device['name'] |
164 |
db_device_dict[name] = {'name': name, 'x': device['x'] , 'y': device['y']} |
|
165 |
|
|
169 |
db_device_dict[name] = { |
|
170 |
'name': name, |
|
171 |
'x': device['x'], |
|
172 |
'y': device['y'] |
|
173 |
} |
|
166 | 174 |
|
167 |
valid_devices = configure_functions.return_dictionary_of_valid_devices(devices) |
|
175 |
valid_devices = configure_functions.return_dictionary_of_valid_devices( |
|
176 |
devices) |
|
168 | 177 |
|
169 | 178 |
if len(valid_devices.keys()) != len(db_device_dict.keys()): |
170 | 179 |
change_in_devices = True |
171 |
|
|
180 |
|
|
172 | 181 |
if change_in_devices == False: |
173 | 182 |
for device in valid_devices.keys(): |
174 | 183 |
if device in db_device_dict: |
... | ... | |
180 | 189 |
change_in_devices = True |
181 | 190 |
break |
182 | 191 |
|
183 |
|
|
184 | 192 |
if change_in_devices == True: |
185 | 193 |
collection_devices.delete_many({}) |
186 | 194 |
devices_list = list() |
187 | 195 |
|
188 | 196 |
for device in devices.keys(): |
189 |
x = devices[device]['x'] |
|
190 |
y = devices[device]['y'] |
|
191 |
if not (x == "SKIP" or x == "UNKNOWN!" or y == "SKIP" or y == "UNKNOWN!"): |
|
192 |
devices_list.append({'name': device , 'x': x , 'y': y }) |
|
197 |
if not (should_skip(devices[device])): |
|
198 |
devices_list.append({ |
|
199 |
'name': device, |
|
200 |
'x': devices[device]['x'], |
|
201 |
'y': devices[device]['y'] |
|
202 |
}) |
|
193 | 203 |
|
194 | 204 |
collection_devices.insert_many(devices_list) |
195 | 205 |
|
196 | 206 |
return change_in_devices |
197 | 207 |
|
198 |
|
|
208 |
|
|
199 | 209 |
def remove_dataset_database(dataset_name): |
200 | 210 |
""" |
201 | 211 |
Removes dataset entries from database |
... | ... | |
211 | 221 |
collection_datasets.delete_one({"key-name": dataset_name}) |
212 | 222 |
print("Removing record from DATASETS collection") |
213 | 223 |
|
214 |
|
|
215 | 224 |
# Retrieve list of all collections |
216 | 225 |
collections = mydb.list_collection_names() |
217 | 226 |
|
... | ... | |
232 | 241 |
# Creating connection |
233 | 242 |
mydb = create_database_connection() |
234 | 243 |
|
235 |
pattern = re.compile(dataset_name+'[0-9]+-[0-9]+-+[0-9]+')
|
|
244 |
pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
|
|
236 | 245 |
|
237 | 246 |
# Retrieve list of all collections |
238 | 247 |
collections = mydb.list_collection_names() |
Také k dispozici: Unified diff
Re #8160 new dataset