Projekt

Obecné

Profil

Stáhnout (8.38 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3 2f227a6c ballakt
from Utilities.helpers import should_skip, detect_change
4 af7609b5 Tomáš Ballák
from shared_types import ConfigType
5
from typing import Dict
6 527abccd petrh
import pymongo
7 d6ca840d petrh
import re
8 527abccd petrh
9 20513e9f Martin Sebela
#
10
# TODO: set MongoDB credentials
11
#
12
13 04a2b5a4 petrh
# specify mongodb connection
14 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
15 04a2b5a4 petrh
# mongodb account name
16
MONGODB_ACC_NAME = "root"
17
# mongodb account password
18
MONGODB_ACC_PASSWORD = "root"
19
# mongodb data database
20 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
21 04a2b5a4 petrh
# mongodb collection with aviable datasets
22
MONGODB_DATASET_COLLECTION = "DATASETS"
23 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
24
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
25 04a2b5a4 petrh
26
# Path to processed data
27
PROCESSED_DATA_PATH = "ProcessedData/"
28
29 af7609b5 Tomáš Ballák
DatabaseConnectionType = Dict[str, any]
30 04a2b5a4 petrh
31 af7609b5 Tomáš Ballák
32
def create_database_connection() -> pymongo.database.Database:
33 04a2b5a4 petrh
    """
34
    Creates connection to mongoDB
35 2f227a6c ballakt
36 04a2b5a4 petrh
    Returns:
37
        Connection to mongoDB
38
    """
39
    client = pymongo.MongoClient(MONGODB_CONNECTION)
40 527abccd petrh
41 04a2b5a4 petrh
    # Authenticating
42
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
43
44
    database = client[MONGODB_DATA_DATABASE]
45
46
    return database
47
48
49 af7609b5 Tomáš Ballák
def get_data_from_file(filename: str, config: ConfigType) -> Dict[str, any]:
50 04a2b5a4 petrh
    """
51
        Opens processed file, reads it line by line
52
        name, ocurrence, date
53
        searches name in config and adds device map coordinates
54
        than creates a dictionary with date without hours as key
55
        and list of data lines as value.
56
    Args:
57
        filename: name of processed file
58
        config: loaded configuration file of dataset
59
60
    Returns:
61
        dictionary with date without hours as key
62
        and list of Datalines as value
63
    """
64
    dataset_name = config["dataset-name"]
65
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
66 527abccd petrh
67 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
68
69
    devices = config["devices"]
70 af7609b5 Tomáš Ballák
    date_dict = {}
71 527abccd petrh
72
    for line in f:
73
        line = line[:-1]
74
75 04a2b5a4 petrh
        csv_column = line.split(";")
76 527abccd petrh
77 04a2b5a4 petrh
        name = csv_column[0]
78 2d129043 petrh
79 81980e82 ballakt
        if should_skip(devices[name]):
80 0a2832fb vastja
            continue
81
82 04a2b5a4 petrh
        occurrence = csv_column[1]
83
        date = csv_column[2]
84 d6ca840d petrh
        data_line = database_data_line.DatabaseDataLine(
85 ce22f1ff petrh
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
86 527abccd petrh
87 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
88
        date_without_hours = date[:-3]
89 527abccd petrh
        if date_without_hours not in date_dict:
90
            date_dict[date_without_hours] = list()
91
92 81980e82 ballakt
        date_dict[date_without_hours].append(data_line.to_dictionary())
93 527abccd petrh
94
    return date_dict
95
96
97 af7609b5 Tomáš Ballák
def load_data_to_database(database_connection: DatabaseConnectionType,
98
                          dataset_name: str, data_dic: Dict[str, any],
99
                          file_name: str) -> None:
100 04a2b5a4 petrh
    """
101
    Takes data_dic created in method get_data_from_file
102
    and loads into into database where collection name is dataset_name + data_dic key
103
    and data lines are line in collection
104 527abccd petrh
105 04a2b5a4 petrh
    Args:
106 0a2832fb vastja
        database_connection: created connection to a MONGODB
107
        config: loaded configuration file of dataset
108 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
109 0a2832fb vastja
        file_name: name of file containing data
110
    """
111
112
    for date in data_dic:
113
        dataset_collections = database_connection[dataset_name]
114 ce378142 petrh
        dataset_collections.insert_one({'date': date})
115 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
116
        date_dataset.insert_many(data_dic[date])
117
118
119 af7609b5 Tomáš Ballák
def check_or_update_datasets_collection(
120
        database_connection: DatabaseConnectionType, config: ConfigType):
121 04a2b5a4 petrh
    """
122 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
123 527abccd petrh
124 0a2832fb vastja
    Args:
125
        database_connection: created connection to a MONGODB
126
        config: loaded configuration file of dataset
127
    """
128 04a2b5a4 petrh
    # collection where are specified aviable datasets
129 af7609b5 Tomáš Ballák
    compareKeys = ['display-name', 'display-color']
130 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
131
132 2f227a6c ballakt
    query = {'key-name': config['dataset-name']}
133 527abccd petrh
134 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
135 2f227a6c ballakt
    current_dataset = collection_datasets.find_one(query)
136 527abccd petrh
137 2f227a6c ballakt
    if current_dataset is None:
138 81980e82 ballakt
        collection_datasets.insert_one({
139 2f227a6c ballakt
            'key-name': config['dataset-name'],
140
            'display-name': config['display-name'],
141
            'display-color': config['display-color'],
142 81980e82 ballakt
            'updated': 0
143
        })
144 2f227a6c ballakt
    elif detect_change(current_dataset, config, compareKeys):
145
        newVal = {}
146
        for key in compareKeys:
147
            newVal[key] = config[key]
148
        collection_datasets.update_one(query, {"$set": newVal})
149 527abccd petrh
150 0a2832fb vastja
151 af7609b5 Tomáš Ballák
def update_devices_collection(config: ConfigType):
152 0a2832fb vastja
    """
153 d6ca840d petrh
    Checks if there are any changes in devices specified in config file against 
154
    devices processed and loaded into the database
155
156
    If there are new devices replaces old device in databse by new ones
157 0a2832fb vastja
158
    Args:
159
        config: loaded configuration file of dataset
160 d6ca840d petrh
161
    Returns:
162
        True - when changes are found and devices replaced
163
        False - when there were no changes
164 0a2832fb vastja
    """
165 d6ca840d petrh
    database_connection = create_database_connection()
166 0a2832fb vastja
    dataset_name = config['dataset-name']
167 d6ca840d petrh
    devices = config['devices']
168
169
    change_in_devices = False
170 0a2832fb vastja
171 81980e82 ballakt
    collection_devices = database_connection[
172
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
173 0a2832fb vastja
174 d6ca840d petrh
    devices_cursor = collection_devices.find()
175
176 af7609b5 Tomáš Ballák
    db_device_dict = {}
177 d6ca840d petrh
178
    for device in devices_cursor:
179
        name = device['name']
180 81980e82 ballakt
        db_device_dict[name] = {
181
            'name': name,
182
            'x': device['x'],
183
            'y': device['y']
184
        }
185 d6ca840d petrh
186 81980e82 ballakt
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
187
        devices)
188 d6ca840d petrh
189
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
190
        change_in_devices = True
191 81980e82 ballakt
192 d6ca840d petrh
    if change_in_devices == False:
193
        for device in valid_devices.keys():
194
            if device in db_device_dict:
195
                config_x = valid_devices[device]['x']
196
                config_y = valid_devices[device]['y']
197
                db_x = db_device_dict[device]['x']
198
                db_y = db_device_dict[device]['y']
199
                if config_x != db_x or config_y != db_y:
200
                    change_in_devices = True
201
                    break
202 0a2832fb vastja
203 d6ca840d petrh
    if change_in_devices == True:
204
        collection_devices.delete_many({})
205
        devices_list = list()
206 0a2832fb vastja
207 d6ca840d petrh
        for device in devices.keys():
208 81980e82 ballakt
            if not (should_skip(devices[device])):
209
                devices_list.append({
210
                    'name': device,
211
                    'x': devices[device]['x'],
212
                    'y': devices[device]['y']
213
                })
214 0a2832fb vastja
215 d6ca840d petrh
        collection_devices.insert_many(devices_list)
216 0a2832fb vastja
217 d6ca840d petrh
    return change_in_devices
218 0a2832fb vastja
219 81980e82 ballakt
220 af7609b5 Tomáš Ballák
def remove_dataset_database(dataset_name: str):
221 d6ca840d petrh
    """
222
    Removes dataset entries from database
223
    Args:
224
        dataset_name: name of dataset that has existing configuration file
225 0a2832fb vastja
    """
226 d6ca840d petrh
    # Creating connection
227
    mydb = create_database_connection()
228
229
    # collection where are specified aviable datasets
230
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
231
232
    collection_datasets.delete_one({"key-name": dataset_name})
233 af7609b5 Tomáš Ballák
    print("Odstraňování záznamu z DATASETS kolekce")
234 d6ca840d petrh
235
    # Retrieve list of all collections
236
    collections = mydb.list_collection_names()
237
238
    # Drop of all collections
239
    for name in collections:
240
        if name.startswith(dataset_name):
241
            mydb[name].drop()
242 af7609b5 Tomáš Ballák
            print("Odstraňuji: " + name)
243 d6ca840d petrh
244
245 af7609b5 Tomáš Ballák
def reset_dataset_database(dataset_name: str):
246 d6ca840d petrh
    """
247
    Reset dataset in database 
248
     - delete everything from except crawled links and mention in DATASETS collection
249 0a2832fb vastja
    Args:
250 d6ca840d petrh
        dataset_name: name of dataset that has existing configuration file
251 0a2832fb vastja
    """
252 d6ca840d petrh
    # Creating connection
253
    mydb = create_database_connection()
254 0a2832fb vastja
255 81980e82 ballakt
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
256 0a2832fb vastja
257 d6ca840d petrh
    # Retrieve list of all collections
258
    collections = mydb.list_collection_names()
259 0a2832fb vastja
260 d6ca840d petrh
    # Drop of all collections
261
    for name in collections:
262
        if pattern.match(name):
263
            mydb[name].drop()
264 af7609b5 Tomáš Ballák
            print("Odstraňuji: " + name)
265 0a2832fb vastja
266 2f227a6c ballakt
    database_record_logs.reset_ignore_set_loaded(dataset_name)