Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 04a2b5a4

Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)

Re #7939
- pridana dokumentace metod a trid
- korekce chyb v jmenech promenych
- pridani informaci pro vygenerovane skripty

Zobrazit rozdíly:

python-module/Utilities/Database/DatabaseLoader.py
1 1
from Utilities.Database import DatabaseDataLine
2 2
import pymongo
3 3

  
4
# specify mongodb connection
5
MONGODB_CONNECTION = "mongodb://localhost:27017/"
6
# mongodb account name
7
MONGODB_ACC_NAME = "root"
8
# mongodb account password
9
MONGODB_ACC_PASSWORD = "root"
10
# mongodb data database
11
MONGODB_DATA_DATABASE = "DATA"
12
# mongodb collection with aviable datasets
13
MONGODB_DATASET_COLLECTION = "DATASETS"
14

  
15
# Path to processed data
16
PROCESSED_DATA_PATH = "ProcessedData/"
17

  
18

  
19
def create_database_connection():
20
    """
21
    Creates connection to mongoDB
22
    
23
    Returns:
24
        Connection to mongoDB
25
    """
26
    client = pymongo.MongoClient(MONGODB_CONNECTION)
4 27

  
5
def get_data_from_file(filename, devices):
6
    f = open(filename, "r")
28
    # Authenticating
29
    client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD)
30

  
31
    database = client[MONGODB_DATA_DATABASE]
32

  
33
    return database
34

  
35

  
36
def get_data_from_file(filename, config):
37
    """
38
        Opens processed file, reads it line by line
39
        name, ocurrence, date
40
        searches name in config and adds device map coordinates
41
        than creates a dictionary with date without hours as key
42
        and list of data lines as value.
43
    Args:
44
        filename: name of processed file
45
        config: loaded configuration file of dataset
46

  
47
    Returns:
48
        dictionary with date without hours as key
49
        and list of Datalines as value
50
    """
51
    dataset_name = config["dataset-name"]
52
    dataset_path = PROCESSED_DATA_PATH + dataset_name + '/'
7 53

  
54
    f = open(dataset_path + filename, "r")
55

  
56
    devices = config["devices"]
8 57
    date_dict = dict()
9 58

  
10 59
    for line in f:
11
        # remove \n
12 60
        line = line[:-1]
13
        # split by csv splitter ;
14

  
15
        csv_collum = line.split(";")
16 61

  
17
        name = csv_collum[0]
18
        occurence = csv_collum[1]
19
        date = csv_collum[2]
62
        csv_column = line.split(";")
20 63

  
21
        date_without_hours = date[:-2]
64
        name = csv_column[0]
65
        occurrence = csv_column[1]
66
        date = csv_column[2]
22 67

  
23 68
        database_data_line = DatabaseDataLine.DatabaseDataLine(name, devices[name]["x"]
24
                                                               , devices[name]["y"], date, occurence)
69
                                                               , devices[name]["y"], date, occurrence)
25 70

  
71
        # if you want to change table split by hours or months change this
72
        date_without_hours = date[:-2]
26 73
        if date_without_hours not in date_dict:
27 74
            date_dict[date_without_hours] = list()
28 75

  
29
        date_dict[date_without_hours].append(database_data_line.to_dictionary())
76
        date_dict[date_without_hours].append(database_data_line.to_dictionary)
30 77

  
31 78
    return date_dict
32 79

  
33 80

  
34 81
def load_data_to_database(dataset_name, data_dic):
35
    myclient = pymongo.MongoClient("mongodb://localhost:27017/");
36

  
37
    # Authenticating
38
    myclient.admin.authenticate('root', 'root');
82
    """
83
    Takes data_dic created in method get_data_from_file
84
    and loads into into database where collection name is dataset_name + data_dic key
85
    and data lines are line in collection
39 86

  
40
    # Database DATA
41
    mydb = myclient["DATA"]
87
    Args:
88
        dataset_name: name of dataset that has existing configuration file
89
        data_dic: dictionary of data lines created in get_data_from_file
90
    """
91
    database = create_database_connection()
42 92

  
43
    # Collection Datasets
44
    collection_datasets = mydb["DATASETS"]
93
    # collection where are specified aviable datasets
94
    collection_datasets = database[MONGODB_DATASET_COLLECTION]
45 95

  
96
    # check if newly added data already have a dataset specified in collection
46 97
    dataset_present = collection_datasets.find_one({}, {'name': dataset_name})
47 98

  
48 99
    if dataset_present is None:
49 100
        collection_datasets.insert_one({'name': dataset_name})
50 101

  
51 102
    for date in data_dic:
52
        dataset_collections = mydb[dataset_name]
103
        dataset_collections = database[dataset_name]
53 104
        dataset_collections.insert_one({'name': dataset_name+date})
54
        date_dataset = mydb[dataset_name + date]
105
        date_dataset = database[dataset_name + date]
55 106
        date_dataset.insert_many(data_dic[date])

Také k dispozici: Unified diff