Projekt

Obecné

Profil

Stáhnout (8.38 KB) Statistiky
| Větev: | Revize:
1
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3
from Utilities.helpers import should_skip, detect_change
4
from shared_types import ConfigType
5
from typing import Dict
6
import pymongo
7
import re
8

    
9
#
10
# TODO: set MongoDB credentials
11
#
12

    
13
# specify mongodb connection
14
MONGODB_CONNECTION = "mongodb://root:root@database"
15
# mongodb account name
16
MONGODB_ACC_NAME = "root"
17
# mongodb account password
18
MONGODB_ACC_PASSWORD = "root"
19
# mongodb data database
20
MONGODB_DATA_DATABASE = "open-data-db"
21
# mongodb collection with aviable datasets
22
MONGODB_DATASET_COLLECTION = "DATASETS"
23
# mongodb collection with aviable diveces of datasets
24
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
25

    
26
# Path to processed data
27
PROCESSED_DATA_PATH = "ProcessedData/"
28

    
29
DatabaseConnectionType = Dict[str, any]
30

    
31

    
32
def create_database_connection() -> pymongo.database.Database:
33
    """
34
    Creates connection to mongoDB
35

    
36
    Returns:
37
        Connection to mongoDB
38
    """
39
    client = pymongo.MongoClient(MONGODB_CONNECTION)
40

    
41
    # Authenticating
42
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
43

    
44
    database = client[MONGODB_DATA_DATABASE]
45

    
46
    return database
47

    
48

    
49
def get_data_from_file(filename: str, config: ConfigType) -> Dict[str, any]:
50
    """
51
        Opens processed file, reads it line by line
52
        name, ocurrence, date
53
        searches name in config and adds device map coordinates
54
        than creates a dictionary with date without hours as key
55
        and list of data lines as value.
56
    Args:
57
        filename: name of processed file
58
        config: loaded configuration file of dataset
59

    
60
    Returns:
61
        dictionary with date without hours as key
62
        and list of Datalines as value
63
    """
64
    dataset_name = config["dataset-name"]
65
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
66

    
67
    f = open(dataset_path + filename, "r")
68

    
69
    devices = config["devices"]
70
    date_dict = {}
71

    
72
    for line in f:
73
        line = line[:-1]
74

    
75
        csv_column = line.split(";")
76

    
77
        name = csv_column[0]
78

    
79
        if should_skip(devices[name]):
80
            continue
81

    
82
        occurrence = csv_column[1]
83
        date = csv_column[2]
84
        data_line = database_data_line.DatabaseDataLine(
85
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
86

    
87
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
88
        date_without_hours = date[:-3]
89
        if date_without_hours not in date_dict:
90
            date_dict[date_without_hours] = list()
91

    
92
        date_dict[date_without_hours].append(data_line.to_dictionary())
93

    
94
    return date_dict
95

    
96

    
97
def load_data_to_database(database_connection: DatabaseConnectionType,
98
                          dataset_name: str, data_dic: Dict[str, any],
99
                          file_name: str) -> None:
100
    """
101
    Takes data_dic created in method get_data_from_file
102
    and loads into into database where collection name is dataset_name + data_dic key
103
    and data lines are line in collection
104

    
105
    Args:
106
        database_connection: created connection to a MONGODB
107
        config: loaded configuration file of dataset
108
        data_dic: dictionary of data lines created in get_data_from_file
109
        file_name: name of file containing data
110
    """
111

    
112
    for date in data_dic:
113
        dataset_collections = database_connection[dataset_name]
114
        dataset_collections.insert_one({'date': date})
115
        date_dataset = database_connection[dataset_name + date]
116
        date_dataset.insert_many(data_dic[date])
117

    
118

    
119
def check_or_update_datasets_collection(
120
        database_connection: DatabaseConnectionType, config: ConfigType):
121
    """
122
    Checks if DATASETS collection contains dataset and if display name was not updated
123

    
124
    Args:
125
        database_connection: created connection to a MONGODB
126
        config: loaded configuration file of dataset
127
    """
128
    # collection where are specified aviable datasets
129
    compareKeys = ['display-name', 'display-color']
130
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
131

    
132
    query = {'key-name': config['dataset-name']}
133

    
134
    # check if newly added data already have a dataset specified in collection
135
    current_dataset = collection_datasets.find_one(query)
136

    
137
    if current_dataset is None:
138
        collection_datasets.insert_one({
139
            'key-name': config['dataset-name'],
140
            'display-name': config['display-name'],
141
            'display-color': config['display-color'],
142
            'updated': 0
143
        })
144
    elif detect_change(current_dataset, config, compareKeys):
145
        newVal = {}
146
        for key in compareKeys:
147
            newVal[key] = config[key]
148
        collection_datasets.update_one(query, {"$set": newVal})
149

    
150

    
151
def update_devices_collection(config: ConfigType):
152
    """
153
    Checks if there are any changes in devices specified in config file against 
154
    devices processed and loaded into the database
155

    
156
    If there are new devices replaces old device in databse by new ones
157

    
158
    Args:
159
        config: loaded configuration file of dataset
160

    
161
    Returns:
162
        True - when changes are found and devices replaced
163
        False - when there were no changes
164
    """
165
    database_connection = create_database_connection()
166
    dataset_name = config['dataset-name']
167
    devices = config['devices']
168

    
169
    change_in_devices = False
170

    
171
    collection_devices = database_connection[
172
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
173

    
174
    devices_cursor = collection_devices.find()
175

    
176
    db_device_dict = {}
177

    
178
    for device in devices_cursor:
179
        name = device['name']
180
        db_device_dict[name] = {
181
            'name': name,
182
            'x': device['x'],
183
            'y': device['y']
184
        }
185

    
186
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
187
        devices)
188

    
189
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
190
        change_in_devices = True
191

    
192
    if change_in_devices == False:
193
        for device in valid_devices.keys():
194
            if device in db_device_dict:
195
                config_x = valid_devices[device]['x']
196
                config_y = valid_devices[device]['y']
197
                db_x = db_device_dict[device]['x']
198
                db_y = db_device_dict[device]['y']
199
                if config_x != db_x or config_y != db_y:
200
                    change_in_devices = True
201
                    break
202

    
203
    if change_in_devices == True:
204
        collection_devices.delete_many({})
205
        devices_list = list()
206

    
207
        for device in devices.keys():
208
            if not (should_skip(devices[device])):
209
                devices_list.append({
210
                    'name': device,
211
                    'x': devices[device]['x'],
212
                    'y': devices[device]['y']
213
                })
214

    
215
        collection_devices.insert_many(devices_list)
216

    
217
    return change_in_devices
218

    
219

    
220
def remove_dataset_database(dataset_name: str):
221
    """
222
    Removes dataset entries from database
223
    Args:
224
        dataset_name: name of dataset that has existing configuration file
225
    """
226
    # Creating connection
227
    mydb = create_database_connection()
228

    
229
    # collection where are specified aviable datasets
230
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
231

    
232
    collection_datasets.delete_one({"key-name": dataset_name})
233
    print("Odstraňování záznamu z DATASETS kolekce")
234

    
235
    # Retrieve list of all collections
236
    collections = mydb.list_collection_names()
237

    
238
    # Drop of all collections
239
    for name in collections:
240
        if name.startswith(dataset_name):
241
            mydb[name].drop()
242
            print("Odstraňuji: " + name)
243

    
244

    
245
def reset_dataset_database(dataset_name: str):
246
    """
247
    Reset dataset in database 
248
     - delete everything from except crawled links and mention in DATASETS collection
249
    Args:
250
        dataset_name: name of dataset that has existing configuration file
251
    """
252
    # Creating connection
253
    mydb = create_database_connection()
254

    
255
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
256

    
257
    # Retrieve list of all collections
258
    collections = mydb.list_collection_names()
259

    
260
    # Drop of all collections
261
    for name in collections:
262
        if pattern.match(name):
263
            mydb[name].drop()
264
            print("Odstraňuji: " + name)
265

    
266
    database_record_logs.reset_ignore_set_loaded(dataset_name)
(2-2/3)