Projekt

Obecné

Profil

Stáhnout (8.34 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3 2f227a6c ballakt
from Utilities.helpers import should_skip, detect_change
4 af7609b5 Tomáš Ballák
from shared_types import ConfigType
5
from typing import Dict
6 527abccd petrh
import pymongo
7 d6ca840d petrh
import re
8 527abccd petrh
9 04a2b5a4 petrh
# specify mongodb connection
10 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
11 04a2b5a4 petrh
# mongodb account name
12
MONGODB_ACC_NAME = "root"
13
# mongodb account password
14
MONGODB_ACC_PASSWORD = "root"
15
# mongodb data database
16 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
17 04a2b5a4 petrh
# mongodb collection with aviable datasets
18
MONGODB_DATASET_COLLECTION = "DATASETS"
19 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
20
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
21 04a2b5a4 petrh
22
# Path to processed data
23
PROCESSED_DATA_PATH = "ProcessedData/"
24
25 af7609b5 Tomáš Ballák
DatabaseConnectionType = Dict[str, any]
26 04a2b5a4 petrh
27 af7609b5 Tomáš Ballák
28
def create_database_connection() -> pymongo.database.Database:
29 04a2b5a4 petrh
    """
30
    Creates connection to mongoDB
31 2f227a6c ballakt
32 04a2b5a4 petrh
    Returns:
33
        Connection to mongoDB
34
    """
35
    client = pymongo.MongoClient(MONGODB_CONNECTION)
36 527abccd petrh
37 04a2b5a4 petrh
    # Authenticating
38
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
39
40
    database = client[MONGODB_DATA_DATABASE]
41
42
    return database
43
44
45 af7609b5 Tomáš Ballák
def get_data_from_file(filename: str, config: ConfigType) -> Dict[str, any]:
46 04a2b5a4 petrh
    """
47
        Opens processed file, reads it line by line
48
        name, ocurrence, date
49
        searches name in config and adds device map coordinates
50
        than creates a dictionary with date without hours as key
51
        and list of data lines as value.
52
    Args:
53
        filename: name of processed file
54
        config: loaded configuration file of dataset
55
56
    Returns:
57
        dictionary with date without hours as key
58
        and list of Datalines as value
59
    """
60
    dataset_name = config["dataset-name"]
61
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
62 527abccd petrh
63 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
64
65
    devices = config["devices"]
66 af7609b5 Tomáš Ballák
    date_dict = {}
67 527abccd petrh
68
    for line in f:
69
        line = line[:-1]
70
71 04a2b5a4 petrh
        csv_column = line.split(";")
72 527abccd petrh
73 04a2b5a4 petrh
        name = csv_column[0]
74 2d129043 petrh
75 81980e82 ballakt
        if should_skip(devices[name]):
76 0a2832fb vastja
            continue
77
78 04a2b5a4 petrh
        occurrence = csv_column[1]
79
        date = csv_column[2]
80 d6ca840d petrh
        data_line = database_data_line.DatabaseDataLine(
81 ce22f1ff petrh
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
82 527abccd petrh
83 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
84
        date_without_hours = date[:-3]
85 527abccd petrh
        if date_without_hours not in date_dict:
86
            date_dict[date_without_hours] = list()
87
88 81980e82 ballakt
        date_dict[date_without_hours].append(data_line.to_dictionary())
89 527abccd petrh
90
    return date_dict
91
92
93 af7609b5 Tomáš Ballák
def load_data_to_database(database_connection: DatabaseConnectionType,
94
                          dataset_name: str, data_dic: Dict[str, any],
95
                          file_name: str) -> None:
96 04a2b5a4 petrh
    """
97
    Takes data_dic created in method get_data_from_file
98
    and loads into into database where collection name is dataset_name + data_dic key
99
    and data lines are line in collection
100 527abccd petrh
101 04a2b5a4 petrh
    Args:
102 0a2832fb vastja
        database_connection: created connection to a MONGODB
103
        config: loaded configuration file of dataset
104 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
105 0a2832fb vastja
        file_name: name of file containing data
106
    """
107
108
    for date in data_dic:
109
        dataset_collections = database_connection[dataset_name]
110 ce378142 petrh
        dataset_collections.insert_one({'date': date})
111 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
112
        date_dataset.insert_many(data_dic[date])
113
114
115 af7609b5 Tomáš Ballák
def check_or_update_datasets_collection(
116
        database_connection: DatabaseConnectionType, config: ConfigType):
117 04a2b5a4 petrh
    """
118 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
119 527abccd petrh
120 0a2832fb vastja
    Args:
121
        database_connection: created connection to a MONGODB
122
        config: loaded configuration file of dataset
123
    """
124 04a2b5a4 petrh
    # collection where are specified aviable datasets
125 af7609b5 Tomáš Ballák
    compareKeys = ['display-name', 'display-color']
126 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
127
128 2f227a6c ballakt
    query = {'key-name': config['dataset-name']}
129 527abccd petrh
130 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
131 2f227a6c ballakt
    current_dataset = collection_datasets.find_one(query)
132 527abccd petrh
133 2f227a6c ballakt
    if current_dataset is None:
134 81980e82 ballakt
        collection_datasets.insert_one({
135 2f227a6c ballakt
            'key-name': config['dataset-name'],
136
            'display-name': config['display-name'],
137
            'display-color': config['display-color'],
138 81980e82 ballakt
            'updated': 0
139
        })
140 2f227a6c ballakt
    elif detect_change(current_dataset, config, compareKeys):
141
        newVal = {}
142
        for key in compareKeys:
143
            newVal[key] = config[key]
144
        collection_datasets.update_one(query, {"$set": newVal})
145 527abccd petrh
146 0a2832fb vastja
147 af7609b5 Tomáš Ballák
def update_devices_collection(config: ConfigType):
148 0a2832fb vastja
    """
149 d6ca840d petrh
    Checks if there are any changes in devices specified in config file against 
150
    devices processed and loaded into the database
151
152
    If there are new devices replaces old device in databse by new ones
153 0a2832fb vastja
154
    Args:
155
        config: loaded configuration file of dataset
156 d6ca840d petrh
157
    Returns:
158
        True - when changes are found and devices replaced
159
        False - when there were no changes
160 0a2832fb vastja
    """
161 d6ca840d petrh
    database_connection = create_database_connection()
162 0a2832fb vastja
    dataset_name = config['dataset-name']
163 d6ca840d petrh
    devices = config['devices']
164
165
    change_in_devices = False
166 0a2832fb vastja
167 81980e82 ballakt
    collection_devices = database_connection[
168
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
169 0a2832fb vastja
170 d6ca840d petrh
    devices_cursor = collection_devices.find()
171
172 af7609b5 Tomáš Ballák
    db_device_dict = {}
173 d6ca840d petrh
174
    for device in devices_cursor:
175
        name = device['name']
176 81980e82 ballakt
        db_device_dict[name] = {
177
            'name': name,
178
            'x': device['x'],
179
            'y': device['y']
180
        }
181 d6ca840d petrh
182 81980e82 ballakt
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
183
        devices)
184 d6ca840d petrh
185
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
186
        change_in_devices = True
187 81980e82 ballakt
188 d6ca840d petrh
    if change_in_devices == False:
189
        for device in valid_devices.keys():
190
            if device in db_device_dict:
191
                config_x = valid_devices[device]['x']
192
                config_y = valid_devices[device]['y']
193
                db_x = db_device_dict[device]['x']
194
                db_y = db_device_dict[device]['y']
195
                if config_x != db_x or config_y != db_y:
196
                    change_in_devices = True
197
                    break
198 0a2832fb vastja
199 d6ca840d petrh
    if change_in_devices == True:
200
        collection_devices.delete_many({})
201
        devices_list = list()
202 0a2832fb vastja
203 d6ca840d petrh
        for device in devices.keys():
204 81980e82 ballakt
            if not (should_skip(devices[device])):
205
                devices_list.append({
206
                    'name': device,
207
                    'x': devices[device]['x'],
208
                    'y': devices[device]['y']
209
                })
210 0a2832fb vastja
211 d6ca840d petrh
        collection_devices.insert_many(devices_list)
212 0a2832fb vastja
213 d6ca840d petrh
    return change_in_devices
214 0a2832fb vastja
215 81980e82 ballakt
216 af7609b5 Tomáš Ballák
def remove_dataset_database(dataset_name: str):
217 d6ca840d petrh
    """
218
    Removes dataset entries from database
219
    Args:
220
        dataset_name: name of dataset that has existing configuration file
221 0a2832fb vastja
    """
222 d6ca840d petrh
    # Creating connection
223
    mydb = create_database_connection()
224
225
    # collection where are specified aviable datasets
226
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
227
228
    collection_datasets.delete_one({"key-name": dataset_name})
229 af7609b5 Tomáš Ballák
    print("Odstraňování záznamu z DATASETS kolekce")
230 d6ca840d petrh
231
    # Retrieve list of all collections
232
    collections = mydb.list_collection_names()
233
234
    # Drop of all collections
235
    for name in collections:
236
        if name.startswith(dataset_name):
237
            mydb[name].drop()
238 af7609b5 Tomáš Ballák
            print("Odstraňuji: " + name)
239 d6ca840d petrh
240
241 af7609b5 Tomáš Ballák
def reset_dataset_database(dataset_name: str):
242 d6ca840d petrh
    """
243
    Reset dataset in database 
244
     - delete everything from except crawled links and mention in DATASETS collection
245 0a2832fb vastja
    Args:
246 d6ca840d petrh
        dataset_name: name of dataset that has existing configuration file
247 0a2832fb vastja
    """
248 d6ca840d petrh
    # Creating connection
249
    mydb = create_database_connection()
250 0a2832fb vastja
251 81980e82 ballakt
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
252 0a2832fb vastja
253 d6ca840d petrh
    # Retrieve list of all collections
254
    collections = mydb.list_collection_names()
255 0a2832fb vastja
256 d6ca840d petrh
    # Drop of all collections
257
    for name in collections:
258
        if pattern.match(name):
259
            mydb[name].drop()
260 af7609b5 Tomáš Ballák
            print("Odstraňuji: " + name)
261 0a2832fb vastja
262 2f227a6c ballakt
    database_record_logs.reset_ignore_set_loaded(dataset_name)