Projekt

Obecné

Profil

Stáhnout (7.82 KB) Statistiky
| Větev: | Revize:
1
from Utilities.Database import database_data_line, database_record_logs
2
from Utilities import configure_functions
3
import pymongo
4
import re
5

    
6
# specify mongodb connection
7
MONGODB_CONNECTION = "mongodb://root:root@database"
8
# mongodb account name
9
MONGODB_ACC_NAME = "root"
10
# mongodb account password
11
MONGODB_ACC_PASSWORD = "root"
12
# mongodb data database
13
MONGODB_DATA_DATABASE = "open-data-db"
14
# mongodb collection with aviable datasets
15
MONGODB_DATASET_COLLECTION = "DATASETS"
16
# mongodb collection with aviable diveces of datasets
17
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
18

    
19
# Path to processed data
20
PROCESSED_DATA_PATH = "ProcessedData/"
21

    
22

    
23
def create_database_connection():
24
    """
25
    Creates connection to mongoDB
26
    
27
    Returns:
28
        Connection to mongoDB
29
    """
30
    client = pymongo.MongoClient(MONGODB_CONNECTION)
31

    
32
    # Authenticating
33
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
34

    
35
    database = client[MONGODB_DATA_DATABASE]
36

    
37
    return database
38

    
39

    
40
def get_data_from_file(filename, config):
41
    """
42
        Opens processed file, reads it line by line
43
        name, ocurrence, date
44
        searches name in config and adds device map coordinates
45
        than creates a dictionary with date without hours as key
46
        and list of data lines as value.
47
    Args:
48
        filename: name of processed file
49
        config: loaded configuration file of dataset
50

    
51
    Returns:
52
        dictionary with date without hours as key
53
        and list of Datalines as value
54
    """
55
    dataset_name = config["dataset-name"]
56
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
57

    
58
    f = open(dataset_path + filename, "r")
59

    
60
    devices = config["devices"]
61
    date_dict = dict()
62

    
63
    for line in f:
64
        line = line[:-1]
65

    
66
        csv_column = line.split(";")
67

    
68
        name = csv_column[0]
69

    
70
        if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
71
            continue
72

    
73
        occurrence = csv_column[1]
74
        date = csv_column[2]
75
        data_line = database_data_line.DatabaseDataLine(
76
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
77

    
78
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
79
        date_without_hours = date[:-3]
80
        if date_without_hours not in date_dict:
81
            date_dict[date_without_hours] = list()
82

    
83
        date_dict[date_without_hours].append(
84
            data_line.to_dictionary())
85

    
86
    return date_dict
87

    
88

    
89
def load_data_to_database(database_connection,dataset_name, data_dic, file_name):
90
    """
91
    Takes data_dic created in method get_data_from_file
92
    and loads into into database where collection name is dataset_name + data_dic key
93
    and data lines are line in collection
94

    
95
    Args:
96
        database_connection: created connection to a MONGODB
97
        config: loaded configuration file of dataset
98
        data_dic: dictionary of data lines created in get_data_from_file
99
        file_name: name of file containing data
100
    """
101

    
102
    for date in data_dic:
103
        dataset_collections = database_connection[dataset_name]
104
        dataset_collections.insert_one({'date': date})
105
        date_dataset = database_connection[dataset_name + date]
106
        date_dataset.insert_many(data_dic[date])
107

    
108

    
109

    
110
def check_or_update_datasets_collection(database_connection,config):
111
    """
112
    Checks if DATASETS collection contains dataset and if display name was not updated
113

    
114
    Args:
115
        database_connection: created connection to a MONGODB
116
        config: loaded configuration file of dataset
117
    """
118
    # collection where are specified aviable datasets
119
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
120

    
121
    dataset_name = config['dataset-name']
122
    display_name = config['display-name']
123

    
124
    query = {'key-name': dataset_name}
125

    
126
    # check if newly added data already have a dataset specified in collection
127
    dataset_present = collection_datasets.find_one(query)
128

    
129
    if dataset_present is None:
130
        collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name,'updated': 0})
131
    elif dataset_present['display-name'] != display_name:
132
        newvalues = { "$set": { 'display-name': display_name } }
133
        collection_datasets.update_one(query, newvalues)
134

    
135

    
136
def update_devices_collection(config):
137
    """
138
    Checks if there are any changes in devices specified in config file against 
139
    devices processed and loaded into the database
140

    
141
    If there are new devices replaces old device in databse by new ones
142

    
143
    Args:
144
        config: loaded configuration file of dataset
145

    
146
    Returns:
147
        True - when changes are found and devices replaced
148
        False - when there were no changes
149
    """
150
    database_connection = create_database_connection()
151
    dataset_name = config['dataset-name']
152
    devices = config['devices']
153

    
154
    change_in_devices = False
155

    
156
    collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
157

    
158
    devices_cursor = collection_devices.find()
159

    
160
    db_device_dict = dict()
161

    
162
    for device in devices_cursor:
163
        name = device['name']
164
        db_device_dict[name] = {'name': name, 'x': device['x'] , 'y': device['y']}
165

    
166

    
167
    valid_devices = configure_functions.return_dictionary_of_valid_devices(devices)
168

    
169
    if len(valid_devices.keys()) != len(db_device_dict.keys()):
170
        change_in_devices = True
171
    
172
    if change_in_devices == False:
173
        for device in valid_devices.keys():
174
            if device in db_device_dict:
175
                config_x = valid_devices[device]['x']
176
                config_y = valid_devices[device]['y']
177
                db_x = db_device_dict[device]['x']
178
                db_y = db_device_dict[device]['y']
179
                if config_x != db_x or config_y != db_y:
180
                    change_in_devices = True
181
                    break
182

    
183

    
184
    if change_in_devices == True:
185
        collection_devices.delete_many({})
186
        devices_list = list()
187

    
188
        for device in devices.keys():
189
            x = devices[device]['x']
190
            y = devices[device]['y']
191
            if not (x == "SKIP" or x == "UNKNOWN!" or y == "SKIP" or y == "UNKNOWN!"):
192
                devices_list.append({'name': device , 'x': x , 'y': y })
193

    
194
        collection_devices.insert_many(devices_list)
195

    
196
    return change_in_devices
197

    
198
    
199
def remove_dataset_database(dataset_name):
200
    """
201
    Removes dataset entries from database
202
    Args:
203
        dataset_name: name of dataset that has existing configuration file
204
    """
205
    # Creating connection
206
    mydb = create_database_connection()
207

    
208
    # collection where are specified aviable datasets
209
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
210

    
211
    collection_datasets.delete_one({"key-name": dataset_name})
212
    print("Removing record from DATASETS collection")
213

    
214

    
215
    # Retrieve list of all collections
216
    collections = mydb.list_collection_names()
217

    
218
    # Drop of all collections
219
    for name in collections:
220
        if name.startswith(dataset_name):
221
            mydb[name].drop()
222
            print("Dropping: " + name)
223

    
224

    
225
def reset_dataset_database(dataset_name):
226
    """
227
    Reset dataset in database 
228
     - delete everything from except crawled links and mention in DATASETS collection
229
    Args:
230
        dataset_name: name of dataset that has existing configuration file
231
    """
232
    # Creating connection
233
    mydb = create_database_connection()
234

    
235
    pattern = re.compile(dataset_name+'[0-9]+-[0-9]+-+[0-9]+')
236

    
237
    # Retrieve list of all collections
238
    collections = mydb.list_collection_names()
239

    
240
    # Drop of all collections
241
    for name in collections:
242
        if pattern.match(name):
243
            mydb[name].drop()
244

    
245
    database_record_logs.reset_ignore_set_processed(dataset_name)
246
    database_record_logs.reset_ignore_set_loaded(dataset_name)
(2-2/3)