Projekt

Obecné

Profil

Stáhnout (8.05 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3 2f227a6c ballakt
from Utilities.helpers import should_skip, detect_change
4 527abccd petrh
import pymongo
5 d6ca840d petrh
import re
6 527abccd petrh
7 04a2b5a4 petrh
# specify mongodb connection
8 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
9 04a2b5a4 petrh
# mongodb account name
10
MONGODB_ACC_NAME = "root"
11
# mongodb account password
12
MONGODB_ACC_PASSWORD = "root"
13
# mongodb data database
14 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
15 04a2b5a4 petrh
# mongodb collection with aviable datasets
16
MONGODB_DATASET_COLLECTION = "DATASETS"
17 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
18
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
19 04a2b5a4 petrh
20
# Path to processed data
21
PROCESSED_DATA_PATH = "ProcessedData/"
22
23
24
def create_database_connection():
25
    """
26
    Creates connection to mongoDB
27 2f227a6c ballakt
28 04a2b5a4 petrh
    Returns:
29
        Connection to mongoDB
30
    """
31
    client = pymongo.MongoClient(MONGODB_CONNECTION)
32 527abccd petrh
33 04a2b5a4 petrh
    # Authenticating
34
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
35
36
    database = client[MONGODB_DATA_DATABASE]
37
38
    return database
39
40
41
def get_data_from_file(filename, config):
42
    """
43
        Opens processed file, reads it line by line
44
        name, ocurrence, date
45
        searches name in config and adds device map coordinates
46
        than creates a dictionary with date without hours as key
47
        and list of data lines as value.
48
    Args:
49
        filename: name of processed file
50
        config: loaded configuration file of dataset
51
52
    Returns:
53
        dictionary with date without hours as key
54
        and list of Datalines as value
55
    """
56
    dataset_name = config["dataset-name"]
57
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
58 527abccd petrh
59 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
60
61
    devices = config["devices"]
62 527abccd petrh
    date_dict = dict()
63
64
    for line in f:
65
        line = line[:-1]
66
67 04a2b5a4 petrh
        csv_column = line.split(";")
68 527abccd petrh
69 04a2b5a4 petrh
        name = csv_column[0]
70 2d129043 petrh
71 81980e82 ballakt
        if should_skip(devices[name]):
72 0a2832fb vastja
            continue
73
74 04a2b5a4 petrh
        occurrence = csv_column[1]
75
        date = csv_column[2]
76 d6ca840d petrh
        data_line = database_data_line.DatabaseDataLine(
77 ce22f1ff petrh
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
78 527abccd petrh
79 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
80
        date_without_hours = date[:-3]
81 527abccd petrh
        if date_without_hours not in date_dict:
82
            date_dict[date_without_hours] = list()
83
84 81980e82 ballakt
        date_dict[date_without_hours].append(data_line.to_dictionary())
85 527abccd petrh
86
    return date_dict
87
88
89 81980e82 ballakt
def load_data_to_database(database_connection, dataset_name, data_dic,
90
                          file_name):
91 04a2b5a4 petrh
    """
92
    Takes data_dic created in method get_data_from_file
93
    and loads into into database where collection name is dataset_name + data_dic key
94
    and data lines are line in collection
95 527abccd petrh
96 04a2b5a4 petrh
    Args:
97 0a2832fb vastja
        database_connection: created connection to a MONGODB
98
        config: loaded configuration file of dataset
99 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
100 0a2832fb vastja
        file_name: name of file containing data
101
    """
102
103
    for date in data_dic:
104
        dataset_collections = database_connection[dataset_name]
105 ce378142 petrh
        dataset_collections.insert_one({'date': date})
106 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
107
        date_dataset.insert_many(data_dic[date])
108
109
110 81980e82 ballakt
def check_or_update_datasets_collection(database_connection, config):
111 04a2b5a4 petrh
    """
112 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
113 527abccd petrh
114 0a2832fb vastja
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118 04a2b5a4 petrh
    # collection where are specified aviable datasets
119 2f227a6c ballakt
    compareKeys = ['display-name',
120
                   'display-color']
121 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
122
123 2f227a6c ballakt
    query = {'key-name': config['dataset-name']}
124 527abccd petrh
125 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
126 2f227a6c ballakt
    current_dataset = collection_datasets.find_one(query)
127 527abccd petrh
128 2f227a6c ballakt
    if current_dataset is None:
129 81980e82 ballakt
        collection_datasets.insert_one({
130 2f227a6c ballakt
            'key-name': config['dataset-name'],
131
            'display-name': config['display-name'],
132
            'display-color': config['display-color'],
133 81980e82 ballakt
            'updated': 0
134
        })
135 2f227a6c ballakt
    elif detect_change(current_dataset, config, compareKeys):
136
        newVal = {}
137
        for key in compareKeys:
138
            newVal[key] = config[key]
139
        collection_datasets.update_one(query, {"$set": newVal})
140 527abccd petrh
141 0a2832fb vastja
142 d6ca840d petrh
def update_devices_collection(config):
143 0a2832fb vastja
    """
144 d6ca840d petrh
    Checks if there are any changes in devices specified in config file against 
145
    devices processed and loaded into the database
146
147
    If there are new devices replaces old device in databse by new ones
148 0a2832fb vastja
149
    Args:
150
        config: loaded configuration file of dataset
151 d6ca840d petrh
152
    Returns:
153
        True - when changes are found and devices replaced
154
        False - when there were no changes
155 0a2832fb vastja
    """
156 d6ca840d petrh
    database_connection = create_database_connection()
157 0a2832fb vastja
    dataset_name = config['dataset-name']
158 d6ca840d petrh
    devices = config['devices']
159
160
    change_in_devices = False
161 0a2832fb vastja
162 81980e82 ballakt
    collection_devices = database_connection[
163
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
164 0a2832fb vastja
165 d6ca840d petrh
    devices_cursor = collection_devices.find()
166
167
    db_device_dict = dict()
168
169
    for device in devices_cursor:
170
        name = device['name']
171 81980e82 ballakt
        db_device_dict[name] = {
172
            'name': name,
173
            'x': device['x'],
174
            'y': device['y']
175
        }
176 d6ca840d petrh
177 81980e82 ballakt
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
178
        devices)
179 d6ca840d petrh
180
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
181
        change_in_devices = True
182 81980e82 ballakt
183 d6ca840d petrh
    if change_in_devices == False:
184
        for device in valid_devices.keys():
185
            if device in db_device_dict:
186
                config_x = valid_devices[device]['x']
187
                config_y = valid_devices[device]['y']
188
                db_x = db_device_dict[device]['x']
189
                db_y = db_device_dict[device]['y']
190
                if config_x != db_x or config_y != db_y:
191
                    change_in_devices = True
192
                    break
193 0a2832fb vastja
194 d6ca840d petrh
    if change_in_devices == True:
195
        collection_devices.delete_many({})
196
        devices_list = list()
197 0a2832fb vastja
198 d6ca840d petrh
        for device in devices.keys():
199 81980e82 ballakt
            if not (should_skip(devices[device])):
200
                devices_list.append({
201
                    'name': device,
202
                    'x': devices[device]['x'],
203
                    'y': devices[device]['y']
204
                })
205 0a2832fb vastja
206 d6ca840d petrh
        collection_devices.insert_many(devices_list)
207 0a2832fb vastja
208 d6ca840d petrh
    return change_in_devices
209 0a2832fb vastja
210 81980e82 ballakt
211 d6ca840d petrh
def remove_dataset_database(dataset_name):
212
    """
213
    Removes dataset entries from database
214
    Args:
215
        dataset_name: name of dataset that has existing configuration file
216 0a2832fb vastja
    """
217 d6ca840d petrh
    # Creating connection
218
    mydb = create_database_connection()
219
220
    # collection where are specified aviable datasets
221
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
222
223
    collection_datasets.delete_one({"key-name": dataset_name})
224
    print("Removing record from DATASETS collection")
225
226
    # Retrieve list of all collections
227
    collections = mydb.list_collection_names()
228
229
    # Drop of all collections
230
    for name in collections:
231
        if name.startswith(dataset_name):
232
            mydb[name].drop()
233
            print("Dropping: " + name)
234
235
236
def reset_dataset_database(dataset_name):
237
    """
238
    Reset dataset in database 
239
     - delete everything from except crawled links and mention in DATASETS collection
240 0a2832fb vastja
    Args:
241 d6ca840d petrh
        dataset_name: name of dataset that has existing configuration file
242 0a2832fb vastja
    """
243 d6ca840d petrh
    # Creating connection
244
    mydb = create_database_connection()
245 0a2832fb vastja
246 81980e82 ballakt
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
247 0a2832fb vastja
248 d6ca840d petrh
    # Retrieve list of all collections
249
    collections = mydb.list_collection_names()
250 0a2832fb vastja
251 d6ca840d petrh
    # Drop of all collections
252
    for name in collections:
253
        if pattern.match(name):
254
            mydb[name].drop()
255 753d424e petrh
            print("Dropping: " + name)
256 0a2832fb vastja
257 2f227a6c ballakt
    database_record_logs.reset_ignore_set_loaded(dataset_name)