Projekt

Obecné

Profil

Stáhnout (7.93 KB) Statistiky
| Větev: | Revize:
1
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3
from Utilities.helpers import should_skip
4
import pymongo
5
import re
6

    
7
# specify mongodb connection
8
MONGODB_CONNECTION = "mongodb://root:root@database"
9
# mongodb account name
10
MONGODB_ACC_NAME = "root"
11
# mongodb account password
12
MONGODB_ACC_PASSWORD = "root"
13
# mongodb data database
14
MONGODB_DATA_DATABASE = "open-data-db"
15
# mongodb collection with aviable datasets
16
MONGODB_DATASET_COLLECTION = "DATASETS"
17
# mongodb collection with aviable diveces of datasets
18
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
19

    
20
# Path to processed data
21
PROCESSED_DATA_PATH = "ProcessedData/"
22

    
23

    
24
def create_database_connection():
25
    """
26
    Creates connection to mongoDB
27
    
28
    Returns:
29
        Connection to mongoDB
30
    """
31
    client = pymongo.MongoClient(MONGODB_CONNECTION)
32

    
33
    # Authenticating
34
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
35

    
36
    database = client[MONGODB_DATA_DATABASE]
37

    
38
    return database
39

    
40

    
41
def get_data_from_file(filename, config):
42
    """
43
        Opens processed file, reads it line by line
44
        name, ocurrence, date
45
        searches name in config and adds device map coordinates
46
        than creates a dictionary with date without hours as key
47
        and list of data lines as value.
48
    Args:
49
        filename: name of processed file
50
        config: loaded configuration file of dataset
51

    
52
    Returns:
53
        dictionary with date without hours as key
54
        and list of Datalines as value
55
    """
56
    dataset_name = config["dataset-name"]
57
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
58

    
59
    f = open(dataset_path + filename, "r")
60

    
61
    devices = config["devices"]
62
    date_dict = dict()
63

    
64
    for line in f:
65
        line = line[:-1]
66

    
67
        csv_column = line.split(";")
68

    
69
        name = csv_column[0]
70

    
71
        if should_skip(devices[name]):
72
            continue
73

    
74
        occurrence = csv_column[1]
75
        date = csv_column[2]
76
        data_line = database_data_line.DatabaseDataLine(
77
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
78

    
79
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
80
        date_without_hours = date[:-3]
81
        if date_without_hours not in date_dict:
82
            date_dict[date_without_hours] = list()
83

    
84
        date_dict[date_without_hours].append(data_line.to_dictionary())
85

    
86
    return date_dict
87

    
88

    
89
def load_data_to_database(database_connection, dataset_name, data_dic,
90
                          file_name):
91
    """
92
    Takes data_dic created in method get_data_from_file
93
    and loads into into database where collection name is dataset_name + data_dic key
94
    and data lines are line in collection
95

    
96
    Args:
97
        database_connection: created connection to a MONGODB
98
        config: loaded configuration file of dataset
99
        data_dic: dictionary of data lines created in get_data_from_file
100
        file_name: name of file containing data
101
    """
102

    
103
    for date in data_dic:
104
        dataset_collections = database_connection[dataset_name]
105
        dataset_collections.insert_one({'date': date})
106
        date_dataset = database_connection[dataset_name + date]
107
        date_dataset.insert_many(data_dic[date])
108

    
109

    
110
def check_or_update_datasets_collection(database_connection, config):
111
    """
112
    Checks if DATASETS collection contains dataset and if display name was not updated
113

    
114
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118
    # collection where are specified aviable datasets
119
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
120

    
121
    dataset_name = config['dataset-name']
122
    display_name = config['display-name']
123

    
124
    query = {'key-name': dataset_name}
125

    
126
    # check if newly added data already have a dataset specified in collection
127
    dataset_present = collection_datasets.find_one(query)
128

    
129
    if dataset_present is None:
130
        collection_datasets.insert_one({
131
            'key-name': dataset_name,
132
            'display-name': display_name,
133
            'updated': 0
134
        })
135
    elif dataset_present['display-name'] != display_name:
136
        newvalues = {"$set": {'display-name': display_name}}
137
        collection_datasets.update_one(query, newvalues)
138

    
139

    
140
def update_devices_collection(config):
141
    """
142
    Checks if there are any changes in devices specified in config file against 
143
    devices processed and loaded into the database
144

    
145
    If there are new devices replaces old device in databse by new ones
146

    
147
    Args:
148
        config: loaded configuration file of dataset
149

    
150
    Returns:
151
        True - when changes are found and devices replaced
152
        False - when there were no changes
153
    """
154
    database_connection = create_database_connection()
155
    dataset_name = config['dataset-name']
156
    devices = config['devices']
157

    
158
    change_in_devices = False
159

    
160
    collection_devices = database_connection[
161
        dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
162

    
163
    devices_cursor = collection_devices.find()
164

    
165
    db_device_dict = dict()
166

    
167
    for device in devices_cursor:
168
        name = device['name']
169
        db_device_dict[name] = {
170
            'name': name,
171
            'x': device['x'],
172
            'y': device['y']
173
        }
174

    
175
    valid_devices = configure_functions.return_dictionary_of_valid_devices(
176
        devices)
177

    
178
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
179
        change_in_devices = True
180

    
181
    if change_in_devices == False:
182
        for device in valid_devices.keys():
183
            if device in db_device_dict:
184
                config_x = valid_devices[device]['x']
185
                config_y = valid_devices[device]['y']
186
                db_x = db_device_dict[device]['x']
187
                db_y = db_device_dict[device]['y']
188
                if config_x != db_x or config_y != db_y:
189
                    change_in_devices = True
190
                    break
191

    
192
    if change_in_devices == True:
193
        collection_devices.delete_many({})
194
        devices_list = list()
195

    
196
        for device in devices.keys():
197
            if not (should_skip(devices[device])):
198
                devices_list.append({
199
                    'name': device,
200
                    'x': devices[device]['x'],
201
                    'y': devices[device]['y']
202
                })
203

    
204
        collection_devices.insert_many(devices_list)
205

    
206
    return change_in_devices
207

    
208

    
209
def remove_dataset_database(dataset_name):
210
    """
211
    Removes dataset entries from database
212
    Args:
213
        dataset_name: name of dataset that has existing configuration file
214
    """
215
    # Creating connection
216
    mydb = create_database_connection()
217

    
218
    # collection where are specified aviable datasets
219
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
220

    
221
    collection_datasets.delete_one({"key-name": dataset_name})
222
    print("Removing record from DATASETS collection")
223

    
224
    # Retrieve list of all collections
225
    collections = mydb.list_collection_names()
226

    
227
    # Drop of all collections
228
    for name in collections:
229
        if name.startswith(dataset_name):
230
            mydb[name].drop()
231
            print("Dropping: " + name)
232

    
233

    
234
def reset_dataset_database(dataset_name):
235
    """
236
    Reset dataset in database 
237
     - delete everything from except crawled links and mention in DATASETS collection
238
    Args:
239
        dataset_name: name of dataset that has existing configuration file
240
    """
241
    # Creating connection
242
    mydb = create_database_connection()
243

    
244
    pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
245

    
246
    # Retrieve list of all collections
247
    collections = mydb.list_collection_names()
248

    
249
    # Drop of all collections
250
    for name in collections:
251
        if pattern.match(name):
252
            mydb[name].drop()
253
            print("Dropping: " + name)
254

    
255
    database_record_logs.reset_ignore_set_loaded(dataset_name)
(2-2/3)