Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 0a2832fb

Přidáno uživatelem Jakub Vašta před asi 4 roky(ů)

Re #8089
- pridana nova proměna do konfig souboru
- predalana tabulka v databasi s nazvem DATASETS
- nove obsahuje "key-name" jako klíč a "display-name" pro zobrazení

Zobrazit rozdíly:

modules/crawler/Utilities/Database/DatabaseLoader.py
11 11
MONGODB_DATA_DATABASE = "open-data-db"
12 12
# mongodb collection with aviable datasets
13 13
MONGODB_DATASET_COLLECTION = "DATASETS"
14
# mongodb collection with aviable diveces of datasets
15
MONGODB_DATASET_DEVICES_COLLECTION = "DEVICES"
16
# mongodb collection with aviable diveces of datasets
17
MONGODB_DATASET_LOADED_FILES_COLLECTION = "FILES"
14 18

  
15 19
# Path to processed data
16 20
PROCESSED_DATA_PATH = "ProcessedData/"
......
63 67

  
64 68
        name = csv_column[0]
65 69

  
70
        if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
71
            continue
72

  
66 73
        occurrence = csv_column[1]
67 74
        date = csv_column[2]
68 75
        database_data_line = DatabaseDataLine.DatabaseDataLine(
......
79 86
    return date_dict
80 87

  
81 88

  
82
def load_data_to_database(dataset_name, data_dic):
89
def load_data_to_database(database_connection,dataset_name, data_dic, file_name):
83 90
    """
84 91
    Takes data_dic created in method get_data_from_file
85 92
    and loads into into database where collection name is dataset_name + data_dic key
86 93
    and data lines are line in collection
87 94

  
88 95
    Args:
89
        dataset_name: name of dataset that has existing configuration file
96
        database_connection: created connection to a MONGODB
97
        config: loaded configuration file of dataset
90 98
        data_dic: dictionary of data lines created in get_data_from_file
99
        file_name: name of file containing data
100
    """
101

  
102
    for date in data_dic:
103
        dataset_collections = database_connection[dataset_name]
104
        dataset_collections.insert_one({'name': dataset_name + date})
105
        date_dataset = database_connection[dataset_name + date]
106
        date_dataset.insert_many(data_dic[date])
107

  
108
    collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION]
109
    collection_loaded_files.insert_one({'file': file_name})
110

  
111

  
112

  
113
def check_or_update_datasets_collection(database_connection,config):
91 114
    """
92
    database = create_database_connection()
115
    Checks if DATASETS collection contains dataset and if display name was not updated
93 116

  
117
    Args:
118
        database_connection: created connection to a MONGODB
119
        config: loaded configuration file of dataset
120
    """
94 121
    # collection where are specified aviable datasets
95
    collection_datasets = database[MONGODB_DATASET_COLLECTION]
122
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
123

  
124
    dataset_name = config['dataset-name']
125
    display_name = config['display-name']
126

  
127
    query = {'key-name': dataset_name}
96 128

  
97 129
    # check if newly added data already have a dataset specified in collection
98
    dataset_present = collection_datasets.find_one({'name': dataset_name})
130
    dataset_present = collection_datasets.find_one(query)
99 131

  
100 132
    if dataset_present is None:
101
        collection_datasets.insert_one({'name': dataset_name})
133
        collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name})
134
    elif dataset_present['display-name'] != display_name:
135
        newvalues = { "$set": { 'display-name': display_name } }
136
        collection_datasets.update_one(query, newvalues)
102 137

  
103
    for date in data_dic:
104
        dataset_collections = database[dataset_name]
105
        dataset_collections.insert_one({'name': dataset_name + date})
106
        date_dataset = database[dataset_name + date]
107
        date_dataset.insert_many(data_dic[date])
138

  
139
def update_devices_collection(database_connection,config):
140
    """
141
    Checks if dataset_name collection contains every device with current set up
142

  
143
    Args:
144
        database_connection: created connection to a MONGODB
145
        config: loaded configuration file of dataset
146
    """
147
    dataset_name = config['dataset-name']
148

  
149
    collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION]
150

  
151
    collection_devices.delete_many({})
152

  
153
    devices = config['devices']
154

  
155
    devices_list = list()
156

  
157
    for device in devices.keys():
158
        if devices[device]['x'] != "SKIP" or devices[device]['y'] != "SKIP":
159
            devices_list.append({'name': device , 'x': devices[device]['x'] , 'y': devices[device]['y'] })
160

  
161
    collection_devices.insert_many(devices_list)
162

  
163

  
164
def check_if_database_doesnt_contain_file(database_connection,dataset_name,file_name):
165
    """
166
    Checks if dataset_name collection contains every device with current set up
167

  
168
    Args:
169
        database_connection: created connection to a MONGODB
170
        filename: checked file name
171
    """
172

  
173
    collection_loaded_files = database_connection[dataset_name + MONGODB_DATASET_LOADED_FILES_COLLECTION]
174

  
175
    query = {'file': file_name}
176

  
177
    # check if newly added data already have a dataset specified in collection
178
    dataset_present = collection_loaded_files.find_one(query)
179

  
180
    if dataset_present is None:
181
        return True
182
    else:
183
        return False

Také k dispozici: Unified diff