Projekt

Obecné

Profil

Stáhnout (7.8 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3 527abccd petrh
import pymongo
4 d6ca840d petrh
import re
5 527abccd petrh
6 04a2b5a4 petrh
# specify mongodb connection
7 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
8 04a2b5a4 petrh
# mongodb account name
9
MONGODB_ACC_NAME = "root"
10
# mongodb account password
11
MONGODB_ACC_PASSWORD = "root"
12
# mongodb data database
13 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
14 04a2b5a4 petrh
# mongodb collection with aviable datasets
15
MONGODB_DATASET_COLLECTION = "DATASETS"
16 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
17
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
18 04a2b5a4 petrh
19
# Path to processed data
20
PROCESSED_DATA_PATH = "ProcessedData/"
21
22
23
def create_database_connection():
24
    """
25
    Creates connection to mongoDB
26
    
27
    Returns:
28
        Connection to mongoDB
29
    """
30
    client = pymongo.MongoClient(MONGODB_CONNECTION)
31 527abccd petrh
32 04a2b5a4 petrh
    # Authenticating
33
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
34
35
    database = client[MONGODB_DATA_DATABASE]
36
37
    return database
38
39
40
def get_data_from_file(filename, config):
41
    """
42
        Opens processed file, reads it line by line
43
        name, ocurrence, date
44
        searches name in config and adds device map coordinates
45
        than creates a dictionary with date without hours as key
46
        and list of data lines as value.
47
    Args:
48
        filename: name of processed file
49
        config: loaded configuration file of dataset
50
51
    Returns:
52
        dictionary with date without hours as key
53
        and list of Datalines as value
54
    """
55
    dataset_name = config["dataset-name"]
56
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
57 527abccd petrh
58 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
59
60
    devices = config["devices"]
61 527abccd petrh
    date_dict = dict()
62
63
    for line in f:
64
        line = line[:-1]
65
66 04a2b5a4 petrh
        csv_column = line.split(";")
67 527abccd petrh
68 04a2b5a4 petrh
        name = csv_column[0]
69 2d129043 petrh
70 0a2832fb vastja
        if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
71
            continue
72
73 04a2b5a4 petrh
        occurrence = csv_column[1]
74
        date = csv_column[2]
75 d6ca840d petrh
        data_line = database_data_line.DatabaseDataLine(
76 ce22f1ff petrh
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
77 527abccd petrh
78 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
79
        date_without_hours = date[:-3]
80 527abccd petrh
        if date_without_hours not in date_dict:
81
            date_dict[date_without_hours] = list()
82
83 ce22f1ff petrh
        date_dict[date_without_hours].append(
84 d6ca840d petrh
            data_line.to_dictionary())
85 527abccd petrh
86
    return date_dict
87
88
89 0a2832fb vastja
def load_data_to_database(database_connection,dataset_name, data_dic, file_name):
90 04a2b5a4 petrh
    """
91
    Takes data_dic created in method get_data_from_file
92
    and loads into into database where collection name is dataset_name + data_dic key
93
    and data lines are line in collection
94 527abccd petrh
95 04a2b5a4 petrh
    Args:
96 0a2832fb vastja
        database_connection: created connection to a MONGODB
97
        config: loaded configuration file of dataset
98 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
99 0a2832fb vastja
        file_name: name of file containing data
100
    """
101
102
    for date in data_dic:
103
        dataset_collections = database_connection[dataset_name]
104 ce378142 petrh
        dataset_collections.insert_one({'date': date})
105 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
106
        date_dataset.insert_many(data_dic[date])
107
108
109
110
def check_or_update_datasets_collection(database_connection,config):
111 04a2b5a4 petrh
    """
112 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
113 527abccd petrh
114 0a2832fb vastja
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118 04a2b5a4 petrh
    # collection where are specified aviable datasets
119 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
120
121
    dataset_name = config['dataset-name']
122
    display_name = config['display-name']
123
124
    query = {'key-name': dataset_name}
125 527abccd petrh
126 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
127 0a2832fb vastja
    dataset_present = collection_datasets.find_one(query)
128 527abccd petrh
129
    if dataset_present is None:
130 d6ca840d petrh
        collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name,'updated': 0})
131 0a2832fb vastja
    elif dataset_present['display-name'] != display_name:
132
        newvalues = { "$set": { 'display-name': display_name } }
133
        collection_datasets.update_one(query, newvalues)
134 527abccd petrh
135 0a2832fb vastja
136 d6ca840d petrh
def update_devices_collection(config):
137 0a2832fb vastja
    """
138 d6ca840d petrh
    Checks if there are any changes in devices specified in config file against 
139
    devices processed and loaded into the database
140
141
    If there are new devices replaces old device in databse by new ones
142 0a2832fb vastja
143
    Args:
144
        config: loaded configuration file of dataset
145 d6ca840d petrh
146
    Returns:
147
        True - when changes are found and devices replaced
148
        False - when there were no changes
149 0a2832fb vastja
    """
150 d6ca840d petrh
    database_connection = create_database_connection()
151 0a2832fb vastja
    dataset_name = config['dataset-name']
152 d6ca840d petrh
    devices = config['devices']
153
154
    change_in_devices = False
155 0a2832fb vastja
156
    collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
157
158 d6ca840d petrh
    devices_cursor = collection_devices.find()
159
160
    db_device_dict = dict()
161
162
    for device in devices_cursor:
163
        name = device['name']
164
        db_device_dict[name] = {'name': name, 'x': device['x'] , 'y': device['y']}
165
166
167
    valid_devices = configure_functions.return_dictionary_of_valid_devices(devices)
168
169
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
170
        change_in_devices = True
171
    
172
    if change_in_devices == False:
173
        for device in valid_devices.keys():
174
            if device in db_device_dict:
175
                config_x = valid_devices[device]['x']
176
                config_y = valid_devices[device]['y']
177
                db_x = db_device_dict[device]['x']
178
                db_y = db_device_dict[device]['y']
179
                if config_x != db_x or config_y != db_y:
180
                    change_in_devices = True
181
                    break
182 0a2832fb vastja
183
184 d6ca840d petrh
    if change_in_devices == True:
185
        collection_devices.delete_many({})
186
        devices_list = list()
187 0a2832fb vastja
188 d6ca840d petrh
        for device in devices.keys():
189
            x = devices[device]['x']
190
            y = devices[device]['y']
191
            if not (x == "SKIP" or x == "UNKNOWN!" or y == "SKIP" or y == "UNKNOWN!"):
192
                devices_list.append({'name': device , 'x': x , 'y': y })
193 0a2832fb vastja
194 d6ca840d petrh
        collection_devices.insert_many(devices_list)
195 0a2832fb vastja
196 d6ca840d petrh
    return change_in_devices
197 0a2832fb vastja
198 d6ca840d petrh
    
199
def remove_dataset_database(dataset_name):
200
    """
201
    Removes dataset entries from database
202
    Args:
203
        dataset_name: name of dataset that has existing configuration file
204 0a2832fb vastja
    """
205 d6ca840d petrh
    # Creating connection
206
    mydb = create_database_connection()
207
208
    # collection where are specified aviable datasets
209
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
210
211
    collection_datasets.delete_one({"key-name": dataset_name})
212
    print("Removing record from DATASETS collection")
213
214 0a2832fb vastja
215 d6ca840d petrh
    # Retrieve list of all collections
216
    collections = mydb.list_collection_names()
217
218
    # Drop of all collections
219
    for name in collections:
220
        if name.startswith(dataset_name):
221
            mydb[name].drop()
222
            print("Dropping: " + name)
223
224
225
def reset_dataset_database(dataset_name):
226
    """
227
    Reset dataset in database 
228
     - delete everything from except crawled links and mention in DATASETS collection
229 0a2832fb vastja
    Args:
230 d6ca840d petrh
        dataset_name: name of dataset that has existing configuration file
231 0a2832fb vastja
    """
232 d6ca840d petrh
    # Creating connection
233
    mydb = create_database_connection()
234 0a2832fb vastja
235 d6ca840d petrh
    pattern = re.compile(dataset_name+'[0-9]+-[0-9]+-+[0-9]+')
236 0a2832fb vastja
237 d6ca840d petrh
    # Retrieve list of all collections
238
    collections = mydb.list_collection_names()
239 0a2832fb vastja
240 d6ca840d petrh
    # Drop of all collections
241
    for name in collections:
242
        if pattern.match(name):
243
            mydb[name].drop()
244 753d424e petrh
            print("Dropping: " + name)
245 0a2832fb vastja
246 d6ca840d petrh
    database_record_logs.reset_ignore_set_loaded(dataset_name)