Projekt

Obecné

Profil

Stáhnout (3.32 KB) Statistiky
| Větev: | Revize:
1 527abccd petrh
from Utilities.Database import DatabaseDataLine
2
import pymongo
3
4 04a2b5a4 petrh
# specify mongodb connection
5 728f8c5d petrh
MONGODB_CONNECTION = "mongodb://root:root@database"
6 04a2b5a4 petrh
# mongodb account name
7
MONGODB_ACC_NAME = "root"
8
# mongodb account password
9
MONGODB_ACC_PASSWORD = "root"
10
# mongodb data database
11
MONGODB_DATA_DATABASE = "DATA"
12
# mongodb collection with aviable datasets
13
MONGODB_DATASET_COLLECTION = "DATASETS"
14
15
# Path to processed data
16
PROCESSED_DATA_PATH = "ProcessedData/"
17
18
19
def create_database_connection():
20
    """
21
    Creates connection to mongoDB
22
    
23
    Returns:
24
        Connection to mongoDB
25
    """
26
    client = pymongo.MongoClient(MONGODB_CONNECTION)
27 527abccd petrh
28 04a2b5a4 petrh
    # Authenticating
29
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
30
31
    database = client[MONGODB_DATA_DATABASE]
32
33
    return database
34
35
36
def get_data_from_file(filename, config):
37
    """
38
        Opens processed file, reads it line by line
39
        name, ocurrence, date
40
        searches name in config and adds device map coordinates
41
        than creates a dictionary with date without hours as key
42
        and list of data lines as value.
43
    Args:
44
        filename: name of processed file
45
        config: loaded configuration file of dataset
46
47
    Returns:
48
        dictionary with date without hours as key
49
        and list of Datalines as value
50
    """
51
    dataset_name = config["dataset-name"]
52
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
53 527abccd petrh
54 04a2b5a4 petrh
    f = open(dataset_path + filename, "r")
55
56
    devices = config["devices"]
57 527abccd petrh
    date_dict = dict()
58
59
    for line in f:
60
        line = line[:-1]
61
62 04a2b5a4 petrh
        csv_column = line.split(";")
63 527abccd petrh
64 04a2b5a4 petrh
        name = csv_column[0]
65 2d129043 petrh
66 04a2b5a4 petrh
        occurrence = csv_column[1]
67
        date = csv_column[2]
68 527abccd petrh
69 2d129043 petrh
70
71 527abccd petrh
        database_data_line = DatabaseDataLine.DatabaseDataLine(name, devices[name]["x"]
72 04a2b5a4 petrh
                                                               , devices[name]["y"], date, occurrence)
73 527abccd petrh
74 04a2b5a4 petrh
        # if you want to change table split by hours or months change this
75
        date_without_hours = date[:-2]
76 527abccd petrh
        if date_without_hours not in date_dict:
77
            date_dict[date_without_hours] = list()
78
79 1187e871 petrh
        date_dict[date_without_hours].append(database_data_line.to_dictionary())
80 527abccd petrh
81
    return date_dict
82
83
84
def load_data_to_database(dataset_name, data_dic):
85 04a2b5a4 petrh
    """
86
    Takes data_dic created in method get_data_from_file
87
    and loads into into database where collection name is dataset_name + data_dic key
88
    and data lines are line in collection
89 527abccd petrh
90 04a2b5a4 petrh
    Args:
91
        dataset_name: name of dataset that has existing configuration file
92
        data_dic: dictionary of data lines created in get_data_from_file
93
    """
94
    database = create_database_connection()
95 527abccd petrh
96 04a2b5a4 petrh
    # collection where are specified aviable datasets
97
    collection_datasets = database[MONGODB_DATASET_COLLECTION]
98 527abccd petrh
99 04a2b5a4 petrh
    # check if newly added data already have a dataset specified in collection
100 587b1c57 petrh
    dataset_present = collection_datasets.find_one({'name': dataset_name})
101 527abccd petrh
102
    if dataset_present is None:
103
        collection_datasets.insert_one({'name': dataset_name})
104
105
    for date in data_dic:
106 04a2b5a4 petrh
        dataset_collections = database[dataset_name]
107 527abccd petrh
        dataset_collections.insert_one({'name': dataset_name+date})
108 04a2b5a4 petrh
        date_dataset = database[dataset_name + date]
109 527abccd petrh
        date_dataset.insert_many(data_dic[date])