Projekt

Obecné

Profil

Stáhnout (5.94 KB) Statistiky
| Větev: | Revize:
1 527abccd petrh
from Utilities.Database import DatabaseDataLine
2
import pymongo
3
4 04a2b5a4 petrh
# specify mongodb connection
5 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
6 04a2b5a4 petrh
# mongodb account name
7
MONGODB_ACC_NAME = "root"
8
# mongodb account password
9
MONGODB_ACC_PASSWORD = "root"
10
# mongodb data database
11 ce22f1ff petrh
MONGODB_DATA_DATABASE = "open-data-db"
12 04a2b5a4 petrh
# mongodb collection with aviable datasets
13
MONGODB_DATASET_COLLECTION = "DATASETS"
14 0a2832fb vastja
# mongodb collection with aviable diveces of datasets
15
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
16
# mongodb collection with aviable diveces of datasets
17
MONGODB_DATASET_LOADED_FILES_COLLECTION = "FILES"
18 04a2b5a4 petrh
19
# Path to processed data
20
PROCESSED_DATA_PATH = "ProcessedData/"
21
22
23
def create_database_connection():
24
    """
25
    Creates connection to mongoDB
26
    
27
    Returns:
28
        Connection to mongoDB
29
    """
30
    client = pymongo.MongoClient(MONGODB_CONNECTION)
31 527abccd petrh
32 04a2b5a4 petrh
    # Authenticating
33
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
34
35
    database = client[MONGODB_DATA_DATABASE]
36
37
    return database
38
39
40
def get_data_from_file(filename, config):
41
    """
42
        Opens processed file, reads it line by line
43
        name, ocurrence, date
44
        searches name in config and adds device map coordinates
45
        than creates a dictionary with date without hours as key
46
        and list of data lines as value.
47
    Args:
48
        filename: name of processed file
49
        config: loaded configuration file of dataset
50
51
    Returns:
52
        dictionary with date without hours as key
53
        and list of Datalines as value
54
    """
55
    dataset_name = config["dataset-name"]
56
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
57 527abccd petrh
58 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
59
60
    devices = config["devices"]
61 527abccd petrh
    date_dict = dict()
62
63
    for line in f:
64
        line = line[:-1]
65
66 04a2b5a4 petrh
        csv_column = line.split(";")
67 527abccd petrh
68 04a2b5a4 petrh
        name = csv_column[0]
69 2d129043 petrh
70 0a2832fb vastja
        if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
71
            continue
72
73 04a2b5a4 petrh
        occurrence = csv_column[1]
74
        date = csv_column[2]
75 ce22f1ff petrh
        database_data_line = DatabaseDataLine.DatabaseDataLine(
76
            name, devices[name]["x"], devices[name]["y"], date, occurrence)
77 527abccd petrh
78 d6d75a03 petrh
        # if you want to change table split by hours or months change this YYYY-mm-hh-dd
79
        date_without_hours = date[:-3]
80 527abccd petrh
        if date_without_hours not in date_dict:
81
            date_dict[date_without_hours] = list()
82
83 ce22f1ff petrh
        date_dict[date_without_hours].append(
84
            database_data_line.to_dictionary())
85 527abccd petrh
86
    return date_dict
87
88
89 0a2832fb vastja
def load_data_to_database(database_connection,dataset_name, data_dic, file_name):
90 04a2b5a4 petrh
    """
91
    Takes data_dic created in method get_data_from_file
92
    and loads into into database where collection name is dataset_name + data_dic key
93
    and data lines are line in collection
94 527abccd petrh
95 04a2b5a4 petrh
    Args:
96 0a2832fb vastja
        database_connection: created connection to a MONGODB
97
        config: loaded configuration file of dataset
98 04a2b5a4 petrh
        data_dic: dictionary of data lines created in get_data_from_file
99 0a2832fb vastja
        file_name: name of file containing data
100
    """
101
102
    for date in data_dic:
103
        dataset_collections = database_connection[dataset_name]
104 ce378142 petrh
        dataset_collections.insert_one({'date': date})
105 0a2832fb vastja
        date_dataset = database_connection[dataset_name + date]
106
        date_dataset.insert_many(data_dic[date])
107
108
    collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION]
109
    collection_loaded_files.insert_one({'file': file_name})
110
111
112
113
def check_or_update_datasets_collection(database_connection,config):
114 04a2b5a4 petrh
    """
115 0a2832fb vastja
    Checks if DATASETS collection contains dataset and if display name was not updated
116 527abccd petrh
117 0a2832fb vastja
    Args:
118
        database_connection: created connection to a MONGODB
119
        config: loaded configuration file of dataset
120
    """
121 04a2b5a4 petrh
    # collection where are specified aviable datasets
122 0a2832fb vastja
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
123
124
    dataset_name = config['dataset-name']
125
    display_name = config['display-name']
126
127
    query = {'key-name': dataset_name}
128 527abccd petrh
129 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
130 0a2832fb vastja
    dataset_present = collection_datasets.find_one(query)
131 527abccd petrh
132
    if dataset_present is None:
133 0a2832fb vastja
        collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name})
134
    elif dataset_present['display-name'] != display_name:
135
        newvalues = { "$set": { 'display-name': display_name } }
136
        collection_datasets.update_one(query, newvalues)
137 527abccd petrh
138 0a2832fb vastja
139
def update_devices_collection(database_connection,config):
140
    """
141
    Checks if dataset_name collection contains every device with current set up
142
143
    Args:
144
        database_connection: created connection to a MONGODB
145
        config: loaded configuration file of dataset
146
    """
147
    dataset_name = config['dataset-name']
148
149
    collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
150
151
    collection_devices.delete_many({})
152
153
    devices = config['devices']
154
155
    devices_list = list()
156
157
    for device in devices.keys():
158
        if devices[device]['x'] != "SKIP" or devices[device]['y'] != "SKIP":
159
            devices_list.append({'name': device , 'x': devices[device]['x'] , 'y': devices[device]['y'] })
160
161
    collection_devices.insert_many(devices_list)
162
163
164
def check_if_database_doesnt_contain_file(database_connection,dataset_name,file_name):
165
    """
166
    Checks if dataset_name collection contains every device with current set up
167
168
    Args:
169
        database_connection: created connection to a MONGODB
170
        filename: checked file name
171
    """
172
173
    collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION]
174
175
    query = {'file': file_name}
176
177
    # check if newly added data already have a dataset specified in collection
178
    dataset_present = collection_loaded_files.find_one(query)
179
180
    if dataset_present is None:
181
        return True
182
    else:
183
        return False