Projekt

Obecné

Profil

Stáhnout (8.05 KB) Statistiky
| Větev: | Revize:
1
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3
from Utilities.helpers import should_skip, detect_change
4
import pymongo
5
import re
6

    
7
# specify mongodb connection
8
MONGODB_CONNECTION = "mongodb://root:root@database"
9
# mongodb account name
10
MONGODB_ACC_NAME = "root"
11
# mongodb account password
12
MONGODB_ACC_PASSWORD = "root"
13
# mongodb data database
14
MONGODB_DATA_DATABASE = "open-data-db"
15
# mongodb collection with aviable datasets
16
MONGODB_DATASET_COLLECTION = "DATASETS"
17
# mongodb collection with aviable diveces of datasets
18
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
19

    
20
# Path to processed data
21
PROCESSED_DATA_PATH = "ProcessedData/"
22

    
23

    
24
def create_database_connection():
25
    """
26
    Creates connection to mongoDB
27

    
28
    Returns:
29
        Connection to mongoDB
30
    """
31
    client = pymongo.MongoClient(MONGODB_CONNECTION)
32

    
33
    # Authenticating
34
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
35

    
36
    database = client[MONGODB_DATA_DATABASE]
37

    
38
    return database
39

    
40

    
41
def get_data_from_file(filename, config):
42
    """
43
        Opens processed file, reads it line by line
44
        name, ocurrence, date
45
        searches name in config and adds device map coordinates
46
        than creates a dictionary with date without hours as key
47
        and list of data lines as value.
48
    Args:
49
        filename: name of processed file
50
        config: loaded configuration file of dataset
51

    
52
    Returns:
53
        dictionary with date without hours as key
54
        and list of Datalines as value
55
    """
56
    dataset_name = config["dataset-name"]
57
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
58

    
59
    f = open(dataset_path + filename, "r")
60

    
61
    devices = config["devices"]
62
    date_dict = dict()
63

    
64
    for line in f:
65
        line = line[:-1]
66

    
67
        csv_column = line.split(";")
68

    
69
        name = csv_column[0]
70

    
71
        if should_skip(devices[name]):
72
            continue
73

    
74
        occurrence = csv_column[1]
75
        date = csv_column[2]
76
        data_line = database_data_line.DatabaseDataLine(
77
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
78

    
79
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
80
        date_without_hours = date[:-3]
81
        if date_without_hours not in date_dict:
82
            date_dict[date_without_hours] = list()
83

    
84
        date_dict[date_without_hours].append(data_line.to_dictionary())
85

    
86
    return date_dict
87

    
88

    
89
def load_data_to_database(database_connection, dataset_name, data_dic,
90
                          file_name):
91
    """
92
    Takes data_dic created in method get_data_from_file
93
    and loads into into database where collection name is dataset_name + data_dic key
94
    and data lines are line in collection
95

    
96
    Args:
97
        database_connection: created connection to a MONGODB
98
        config: loaded configuration file of dataset
99
        data_dic: dictionary of data lines created in get_data_from_file
100
        file_name: name of file containing data
101
    """
102

    
103
    for date in data_dic:
104
        dataset_collections = database_connection[dataset_name]
105
        dataset_collections.insert_one({'date': date})
106
        date_dataset = database_connection[dataset_name + date]
107
        date_dataset.insert_many(data_dic[date])
108

    
109

    
110
def check_or_update_datasets_collection(database_connection, config):
111
    """
112
    Checks if DATASETS collection contains dataset and if display name was not updated
113

    
114
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118
    # collection where are specified aviable datasets
119
    compareKeys = ['display-name',
120
                   'display-color']
121
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
122

    
123
    query = {'key-name': config['dataset-name']}
124

    
125
    # check if newly added data already have a dataset specified in collection
126
    current_dataset = collection_datasets.find_one(query)
127

    
128
    if current_dataset is None:
129
        collection_datasets.insert_one({
130
            'key-name': config['dataset-name'],
131
            'display-name': config['display-name'],
132
            'display-color': config['display-color'],
133
            'updated': 0
134
        })
135
    elif detect_change(current_dataset, config, compareKeys):
136
        newVal = {}
137
        for key in compareKeys:
138
            newVal[key] = config[key]
139
        collection_datasets.update_one(query, {"$set": newVal})
140

    
141

    
142
def update_devices_collection(config):
143
    """
144
    Checks if there are any changes in devices specified in config file against 
145
    devices processed and loaded into the database
146

    
147
    If there are new devices replaces old device in databse by new ones
148

    
149
    Args:
150
        config: loaded configuration file of dataset
151

    
152
    Returns:
153
        True - when changes are found and devices replaced
154
        False - when there were no changes
155
    """
156
    database_connection = create_database_connection()
157
    dataset_name = config['dataset-name']
158
    devices = config['devices']
159

    
160
    change_in_devices = False
161

    
162
    collection_devices = database_connection[
163
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
164

    
165
    devices_cursor = collection_devices.find()
166

    
167
    db_device_dict = dict()
168

    
169
    for device in devices_cursor:
170
        name = device['name']
171
        db_device_dict[name] = {
172
            'name': name,
173
            'x': device['x'],
174
            'y': device['y']
175
        }
176

    
177
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
178
        devices)
179

    
180
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
181
        change_in_devices = True
182

    
183
    if change_in_devices == False:
184
        for device in valid_devices.keys():
185
            if device in db_device_dict:
186
                config_x = valid_devices[device]['x']
187
                config_y = valid_devices[device]['y']
188
                db_x = db_device_dict[device]['x']
189
                db_y = db_device_dict[device]['y']
190
                if config_x != db_x or config_y != db_y:
191
                    change_in_devices = True
192
                    break
193

    
194
    if change_in_devices == True:
195
        collection_devices.delete_many({})
196
        devices_list = list()
197

    
198
        for device in devices.keys():
199
            if not (should_skip(devices[device])):
200
                devices_list.append({
201
                    'name': device,
202
                    'x': devices[device]['x'],
203
                    'y': devices[device]['y']
204
                })
205

    
206
        collection_devices.insert_many(devices_list)
207

    
208
    return change_in_devices
209

    
210

    
211
def remove_dataset_database(dataset_name):
212
    """
213
    Removes dataset entries from database
214
    Args:
215
        dataset_name: name of dataset that has existing configuration file
216
    """
217
    # Creating connection
218
    mydb = create_database_connection()
219

    
220
    # collection where are specified aviable datasets
221
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
222

    
223
    collection_datasets.delete_one({"key-name": dataset_name})
224
    print("Removing record from DATASETS collection")
225

    
226
    # Retrieve list of all collections
227
    collections = mydb.list_collection_names()
228

    
229
    # Drop of all collections
230
    for name in collections:
231
        if name.startswith(dataset_name):
232
            mydb[name].drop()
233
            print("Dropping: " + name)
234

    
235

    
236
def reset_dataset_database(dataset_name):
237
    """
238
    Reset dataset in database 
239
     - delete everything from except crawled links and mention in DATASETS collection
240
    Args:
241
        dataset_name: name of dataset that has existing configuration file
242
    """
243
    # Creating connection
244
    mydb = create_database_connection()
245

    
246
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
247

    
248
    # Retrieve list of all collections
249
    collections = mydb.list_collection_names()
250

    
251
    # Drop of all collections
252
    for name in collections:
253
        if pattern.match(name):
254
            mydb[name].drop()
255
            print("Dropping: " + name)
256

    
257
    database_record_logs.reset_ignore_set_loaded(dataset_name)
(2-2/3)