Projekt

Obecné

Profil

Stáhnout (7.93 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3 81980e82 ballakt
from Utilities.helpers import should_skip
4 527abccd petrh
import pymongo
5 d6ca840d petrh
import re
6 527abccd petrh
7 04a2b5a4 petrh
# specify mongodb connection
8 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
9 04a2b5a4 petrh
# mongodb account name
10
MONGODB_ACC_NAME = "root"
11
# mongodb account password
12
MONGODB_ACC_PASSWORD = "root"
13
# mongodb data database
14 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
15 04a2b5a4 petrh
# mongodb collection with aviable datasets
16
MONGODB_DATASET_COLLECTION = "DATASETS"
17 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
18
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
19 04a2b5a4 petrh
20
# Path to processed data
21
PROCESSED_DATA_PATH = "ProcessedData/"
22
23
24
def create_database_connection():
25
    """
26
    Creates connection to mongoDB
27
    
28
    Returns:
29
        Connection to mongoDB
30
    """
31
    client = pymongo.MongoClient(MONGODB_CONNECTION)
32 527abccd petrh
33 04a2b5a4 petrh
    # Authenticating
34
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
35
36
    database = client[MONGODB_DATA_DATABASE]
37
38
    return database
39
40
41
def get_data_from_file(filename, config):
42
    """
43
        Opens processed file, reads it line by line
44
        name, ocurrence, date
45
        searches name in config and adds device map coordinates
46
        than creates a dictionary with date without hours as key
47
        and list of data lines as value.
48
    Args:
49
        filename: name of processed file
50
        config: loaded configuration file of dataset
51
52
    Returns:
53
        dictionary with date without hours as key
54
        and list of Datalines as value
55
    """
56
    dataset_name = config["dataset-name"]
57
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
58 527abccd petrh
59 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
60
61
    devices = config["devices"]
62 527abccd petrh
    date_dict = dict()
63
64
    for line in f:
65
        line = line[:-1]
66
67 04a2b5a4 petrh
        csv_column = line.split(";")
68 527abccd petrh
69 04a2b5a4 petrh
        name = csv_column[0]
70 2d129043 petrh
71 81980e82 ballakt
        if should_skip(devices[name]):
72 0a2832fb vastja
            continue
73
74 04a2b5a4 petrh
        occurrence = csv_column[1]
75
        date = csv_column[2]
76 d6ca840d petrh
        data_line = database_data_line.DatabaseDataLine(
77 ce22f1ff petrh
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
78 527abccd petrh
79 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
80
        date_without_hours = date[:-3]
81 527abccd petrh
        if date_without_hours not in date_dict:
82
            date_dict[date_without_hours] = list()
83
84 81980e82 ballakt
        date_dict[date_without_hours].append(data_line.to_dictionary())
85 527abccd petrh
86
    return date_dict
87
88
89 81980e82 ballakt
def load_data_to_database(database_connection, dataset_name, data_dic,
90
                          file_name):
91 04a2b5a4 petrh
    """
92
    Takes data_dic created in method get_data_from_file
93
    and loads into into database where collection name is dataset_name + data_dic key
94
    and data lines are line in collection
95 527abccd petrh
96 04a2b5a4 petrh
    Args:
97 0a2832fb vastja
        database_connection: created connection to a MONGODB
98
        config: loaded configuration file of dataset
99 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
100 0a2832fb vastja
        file_name: name of file containing data
101
    """
102
103
    for date in data_dic:
104
        dataset_collections = database_connection[dataset_name]
105 ce378142 petrh
        dataset_collections.insert_one({'date': date})
106 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
107
        date_dataset.insert_many(data_dic[date])
108
109
110 81980e82 ballakt
def check_or_update_datasets_collection(database_connection, config):
111 04a2b5a4 petrh
    """
112 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
113 527abccd petrh
114 0a2832fb vastja
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118 04a2b5a4 petrh
    # collection where are specified aviable datasets
119 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
120
121
    dataset_name = config['dataset-name']
122
    display_name = config['display-name']
123
124
    query = {'key-name': dataset_name}
125 527abccd petrh
126 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
127 0a2832fb vastja
    dataset_present = collection_datasets.find_one(query)
128 527abccd petrh
129
    if dataset_present is None:
130 81980e82 ballakt
        collection_datasets.insert_one({
131
            'key-name': dataset_name,
132
            'display-name': display_name,
133
            'updated': 0
134
        })
135 0a2832fb vastja
    elif dataset_present['display-name'] != display_name:
136 81980e82 ballakt
        newvalues = {"$set": {'display-name': display_name}}
137 0a2832fb vastja
        collection_datasets.update_one(query, newvalues)
138 527abccd petrh
139 0a2832fb vastja
140 d6ca840d petrh
def update_devices_collection(config):
141 0a2832fb vastja
    """
142 d6ca840d petrh
    Checks if there are any changes in devices specified in config file against 
143
    devices processed and loaded into the database
144
145
    If there are new devices replaces old device in databse by new ones
146 0a2832fb vastja
147
    Args:
148
        config: loaded configuration file of dataset
149 d6ca840d petrh
150
    Returns:
151
        True - when changes are found and devices replaced
152
        False - when there were no changes
153 0a2832fb vastja
    """
154 d6ca840d petrh
    database_connection = create_database_connection()
155 0a2832fb vastja
    dataset_name = config['dataset-name']
156 d6ca840d petrh
    devices = config['devices']
157
158
    change_in_devices = False
159 0a2832fb vastja
160 81980e82 ballakt
    collection_devices = database_connection[
161
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
162 0a2832fb vastja
163 d6ca840d petrh
    devices_cursor = collection_devices.find()
164
165
    db_device_dict = dict()
166
167
    for device in devices_cursor:
168
        name = device['name']
169 81980e82 ballakt
        db_device_dict[name] = {
170
            'name': name,
171
            'x': device['x'],
172
            'y': device['y']
173
        }
174 d6ca840d petrh
175 81980e82 ballakt
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
176
        devices)
177 d6ca840d petrh
178
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
179
        change_in_devices = True
180 81980e82 ballakt
181 d6ca840d petrh
    if change_in_devices == False:
182
        for device in valid_devices.keys():
183
            if device in db_device_dict:
184
                config_x = valid_devices[device]['x']
185
                config_y = valid_devices[device]['y']
186
                db_x = db_device_dict[device]['x']
187
                db_y = db_device_dict[device]['y']
188
                if config_x != db_x or config_y != db_y:
189
                    change_in_devices = True
190
                    break
191 0a2832fb vastja
192 d6ca840d petrh
    if change_in_devices == True:
193
        collection_devices.delete_many({})
194
        devices_list = list()
195 0a2832fb vastja
196 d6ca840d petrh
        for device in devices.keys():
197 81980e82 ballakt
            if not (should_skip(devices[device])):
198
                devices_list.append({
199
                    'name': device,
200
                    'x': devices[device]['x'],
201
                    'y': devices[device]['y']
202
                })
203 0a2832fb vastja
204 d6ca840d petrh
        collection_devices.insert_many(devices_list)
205 0a2832fb vastja
206 d6ca840d petrh
    return change_in_devices
207 0a2832fb vastja
208 81980e82 ballakt
209 d6ca840d petrh
def remove_dataset_database(dataset_name):
210
    """
211
    Removes dataset entries from database
212
    Args:
213
        dataset_name: name of dataset that has existing configuration file
214 0a2832fb vastja
    """
215 d6ca840d petrh
    # Creating connection
216
    mydb = create_database_connection()
217
218
    # collection where are specified aviable datasets
219
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
220
221
    collection_datasets.delete_one({"key-name": dataset_name})
222
    print("Removing record from DATASETS collection")
223
224
    # Retrieve list of all collections
225
    collections = mydb.list_collection_names()
226
227
    # Drop of all collections
228
    for name in collections:
229
        if name.startswith(dataset_name):
230
            mydb[name].drop()
231
            print("Dropping: " + name)
232
233
234
def reset_dataset_database(dataset_name):
235
    """
236
    Reset dataset in database 
237
     - delete everything from except crawled links and mention in DATASETS collection
238 0a2832fb vastja
    Args:
239 d6ca840d petrh
        dataset_name: name of dataset that has existing configuration file
240 0a2832fb vastja
    """
241 d6ca840d petrh
    # Creating connection
242
    mydb = create_database_connection()
243 0a2832fb vastja
244 81980e82 ballakt
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
245 0a2832fb vastja
246 d6ca840d petrh
    # Retrieve list of all collections
247
    collections = mydb.list_collection_names()
248 0a2832fb vastja
249 d6ca840d petrh
    # Drop of all collections
250
    for name in collections:
251
        if pattern.match(name):
252
            mydb[name].drop()
253 753d424e petrh
            print("Dropping: " + name)
254 0a2832fb vastja
255 d6ca840d petrh
    database_record_logs.reset_ignore_set_loaded(dataset_name)