Projekt

Obecné

Profil

Stáhnout (8.34 KB) Statistiky
| Větev: | Revize:
1
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3
from Utilities.helpers import should_skip, detect_change
4
from shared_types import ConfigType
5
from typing import Dict
6
import pymongo
7
import re
8

    
9
# specify mongodb connection
10
MONGODB_CONNECTION = "mongodb://root:root@database"
11
# mongodb account name
12
MONGODB_ACC_NAME = "root"
13
# mongodb account password
14
MONGODB_ACC_PASSWORD = "root"
15
# mongodb data database
16
MONGODB_DATA_DATABASE = "open-data-db"
17
# mongodb collection with aviable datasets
18
MONGODB_DATASET_COLLECTION = "DATASETS"
19
# mongodb collection with aviable diveces of datasets
20
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
21

    
22
# Path to processed data
23
PROCESSED_DATA_PATH = "ProcessedData/"
24

    
25
DatabaseConnectionType = Dict[str, any]
26

    
27

    
28
def create_database_connection() -> pymongo.database.Database:
29
    """
30
    Creates connection to mongoDB
31

    
32
    Returns:
33
        Connection to mongoDB
34
    """
35
    client = pymongo.MongoClient(MONGODB_CONNECTION)
36

    
37
    # Authenticating
38
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
39

    
40
    database = client[MONGODB_DATA_DATABASE]
41

    
42
    return database
43

    
44

    
45
def get_data_from_file(filename: str, config: ConfigType) -> Dict[str, any]:
46
    """
47
        Opens processed file, reads it line by line
48
        name, ocurrence, date
49
        searches name in config and adds device map coordinates
50
        than creates a dictionary with date without hours as key
51
        and list of data lines as value.
52
    Args:
53
        filename: name of processed file
54
        config: loaded configuration file of dataset
55

    
56
    Returns:
57
        dictionary with date without hours as key
58
        and list of Datalines as value
59
    """
60
    dataset_name = config["dataset-name"]
61
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
62

    
63
    f = open(dataset_path + filename, "r")
64

    
65
    devices = config["devices"]
66
    date_dict = {}
67

    
68
    for line in f:
69
        line = line[:-1]
70

    
71
        csv_column = line.split(";")
72

    
73
        name = csv_column[0]
74

    
75
        if should_skip(devices[name]):
76
            continue
77

    
78
        occurrence = csv_column[1]
79
        date = csv_column[2]
80
        data_line = database_data_line.DatabaseDataLine(
81
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
82

    
83
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
84
        date_without_hours = date[:-3]
85
        if date_without_hours not in date_dict:
86
            date_dict[date_without_hours] = list()
87

    
88
        date_dict[date_without_hours].append(data_line.to_dictionary())
89

    
90
    return date_dict
91

    
92

    
93
def load_data_to_database(database_connection: DatabaseConnectionType,
94
                          dataset_name: str, data_dic: Dict[str, any],
95
                          file_name: str) -> None:
96
    """
97
    Takes data_dic created in method get_data_from_file
98
    and loads into into database where collection name is dataset_name + data_dic key
99
    and data lines are line in collection
100

    
101
    Args:
102
        database_connection: created connection to a MONGODB
103
        config: loaded configuration file of dataset
104
        data_dic: dictionary of data lines created in get_data_from_file
105
        file_name: name of file containing data
106
    """
107

    
108
    for date in data_dic:
109
        dataset_collections = database_connection[dataset_name]
110
        dataset_collections.insert_one({'date': date})
111
        date_dataset = database_connection[dataset_name + date]
112
        date_dataset.insert_many(data_dic[date])
113

    
114

    
115
def check_or_update_datasets_collection(
116
        database_connection: DatabaseConnectionType, config: ConfigType):
117
    """
118
    Checks if DATASETS collection contains dataset and if display name was not updated
119

    
120
    Args:
121
        database_connection: created connection to a MONGODB
122
        config: loaded configuration file of dataset
123
    """
124
    # collection where are specified aviable datasets
125
    compareKeys = ['display-name', 'display-color']
126
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
127

    
128
    query = {'key-name': config['dataset-name']}
129

    
130
    # check if newly added data already have a dataset specified in collection
131
    current_dataset = collection_datasets.find_one(query)
132

    
133
    if current_dataset is None:
134
        collection_datasets.insert_one({
135
            'key-name': config['dataset-name'],
136
            'display-name': config['display-name'],
137
            'display-color': config['display-color'],
138
            'updated': 0
139
        })
140
    elif detect_change(current_dataset, config, compareKeys):
141
        newVal = {}
142
        for key in compareKeys:
143
            newVal[key] = config[key]
144
        collection_datasets.update_one(query, {"$set": newVal})
145

    
146

    
147
def update_devices_collection(config: ConfigType):
148
    """
149
    Checks if there are any changes in devices specified in config file against 
150
    devices processed and loaded into the database
151

    
152
    If there are new devices replaces old device in databse by new ones
153

    
154
    Args:
155
        config: loaded configuration file of dataset
156

    
157
    Returns:
158
        True - when changes are found and devices replaced
159
        False - when there were no changes
160
    """
161
    database_connection = create_database_connection()
162
    dataset_name = config['dataset-name']
163
    devices = config['devices']
164

    
165
    change_in_devices = False
166

    
167
    collection_devices = database_connection[
168
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
169

    
170
    devices_cursor = collection_devices.find()
171

    
172
    db_device_dict = {}
173

    
174
    for device in devices_cursor:
175
        name = device['name']
176
        db_device_dict[name] = {
177
            'name': name,
178
            'x': device['x'],
179
            'y': device['y']
180
        }
181

    
182
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
183
        devices)
184

    
185
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
186
        change_in_devices = True
187

    
188
    if change_in_devices == False:
189
        for device in valid_devices.keys():
190
            if device in db_device_dict:
191
                config_x = valid_devices[device]['x']
192
                config_y = valid_devices[device]['y']
193
                db_x = db_device_dict[device]['x']
194
                db_y = db_device_dict[device]['y']
195
                if config_x != db_x or config_y != db_y:
196
                    change_in_devices = True
197
                    break
198

    
199
    if change_in_devices == True:
200
        collection_devices.delete_many({})
201
        devices_list = list()
202

    
203
        for device in devices.keys():
204
            if not (should_skip(devices[device])):
205
                devices_list.append({
206
                    'name': device,
207
                    'x': devices[device]['x'],
208
                    'y': devices[device]['y']
209
                })
210

    
211
        collection_devices.insert_many(devices_list)
212

    
213
    return change_in_devices
214

    
215

    
216
def remove_dataset_database(dataset_name: str):
217
    """
218
    Removes dataset entries from database
219
    Args:
220
        dataset_name: name of dataset that has existing configuration file
221
    """
222
    # Creating connection
223
    mydb = create_database_connection()
224

    
225
    # collection where are specified aviable datasets
226
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
227

    
228
    collection_datasets.delete_one({"key-name": dataset_name})
229
    print("Odstraňování záznamu z DATASETS kolekce")
230

    
231
    # Retrieve list of all collections
232
    collections = mydb.list_collection_names()
233

    
234
    # Drop of all collections
235
    for name in collections:
236
        if name.startswith(dataset_name):
237
            mydb[name].drop()
238
            print("Odstraňuji: " + name)
239

    
240

    
241
def reset_dataset_database(dataset_name: str):
242
    """
243
    Reset dataset in database 
244
     - delete everything from except crawled links and mention in DATASETS collection
245
    Args:
246
        dataset_name: name of dataset that has existing configuration file
247
    """
248
    # Creating connection
249
    mydb = create_database_connection()
250

    
251
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
252

    
253
    # Retrieve list of all collections
254
    collections = mydb.list_collection_names()
255

    
256
    # Drop of all collections
257
    for name in collections:
258
        if pattern.match(name):
259
            mydb[name].drop()
260
            print("Odstraňuji: " + name)
261

    
262
    database_record_logs.reset_ignore_set_loaded(dataset_name)
(2-2/3)