Projekt

Obecné

Profil

« Předchozí | Další » 

Revize af7609b5

Přidáno uživatelem Tomáš Ballák před více než 3 roky(ů)

Re #8193 - refactoring crawler

Zobrazit rozdíly:

modules/crawler/Utilities/Database/database_loader.py
1 1
from Utilities.Database import database_data_line, database_record_logs
2 2
from Utilities import configure_functions
3 3
from Utilities.helpers import should_skip, detect_change
4
from shared_types import ConfigType
5
from typing import Dict
4 6
import pymongo
5 7
import re
6 8

  
......
20 22
# Path to processed data
21 23
PROCESSED_DATA_PATH = "ProcessedData/"
22 24

  
25
DatabaseConnectionType = Dict[str, any]
23 26

  
24
def create_database_connection():
27

  
28
def create_database_connection() -> pymongo.database.Database:
25 29
    """
26 30
    Creates connection to mongoDB
27 31

  
......
38 42
    return database
39 43

  
40 44

  
41
def get_data_from_file(filename, config):
45
def get_data_from_file(filename: str, config: ConfigType) -> Dict[str, any]:
42 46
    """
43 47
        Opens processed file, reads it line by line
44 48
        name, ocurrence, date
......
59 63
    f = open(dataset_path + filename, "r")
60 64

  
61 65
    devices = config["devices"]
62
    date_dict = dict()
66
    date_dict = {}
63 67

  
64 68
    for line in f:
65 69
        line = line[:-1]
......
86 90
    return date_dict
87 91

  
88 92

  
89
def load_data_to_database(database_connection, dataset_name, data_dic,
90
                          file_name):
93
def load_data_to_database(database_connection: DatabaseConnectionType,
94
                          dataset_name: str, data_dic: Dict[str, any],
95
                          file_name: str) -> None:
91 96
    """
92 97
    Takes data_dic created in method get_data_from_file
93 98
    and loads into into database where collection name is dataset_name + data_dic key
......
107 112
        date_dataset.insert_many(data_dic[date])
108 113

  
109 114

  
110
def check_or_update_datasets_collection(database_connection, config):
115
def check_or_update_datasets_collection(
116
        database_connection: DatabaseConnectionType, config: ConfigType):
111 117
    """
112 118
    Checks if DATASETS collection contains dataset and if display name was not updated
113 119

  
......
116 122
        config: loaded configuration file of dataset
117 123
    """
118 124
    # collection where are specified aviable datasets
119
    compareKeys = ['display-name',
120
                   'display-color']
125
    compareKeys = ['display-name', 'display-color']
121 126
    collection_datasets = database_connection[MONGODB_DATASET_COLLECTION]
122 127

  
123 128
    query = {'key-name': config['dataset-name']}
......
139 144
        collection_datasets.update_one(query, {"$set": newVal})
140 145

  
141 146

  
142
def update_devices_collection(config):
147
def update_devices_collection(config: ConfigType):
143 148
    """
144 149
    Checks if there are any changes in devices specified in config file against 
145 150
    devices processed and loaded into the database
......
164 169

  
165 170
    devices_cursor = collection_devices.find()
166 171

  
167
    db_device_dict = dict()
172
    db_device_dict = {}
168 173

  
169 174
    for device in devices_cursor:
170 175
        name = device['name']
......
208 213
    return change_in_devices
209 214

  
210 215

  
211
def remove_dataset_database(dataset_name):
216
def remove_dataset_database(dataset_name: str):
212 217
    """
213 218
    Removes dataset entries from database
214 219
    Args:
......
221 226
    collection_datasets = mydb[MONGODB_DATASET_COLLECTION]
222 227

  
223 228
    collection_datasets.delete_one({"key-name": dataset_name})
224
    print("Removing record from DATASETS collection")
229
    print("Odstraňování záznamu z DATASETS kolekce")
225 230

  
226 231
    # Retrieve list of all collections
227 232
    collections = mydb.list_collection_names()
......
230 235
    for name in collections:
231 236
        if name.startswith(dataset_name):
232 237
            mydb[name].drop()
233
            print("Dropping: " + name)
238
            print("Odstraňuji: " + name)
234 239

  
235 240

  
236
def reset_dataset_database(dataset_name):
241
def reset_dataset_database(dataset_name: str):
237 242
    """
238 243
    Reset dataset in database 
239 244
     - delete everything from except crawled links and mention in DATASETS collection
......
252 257
    for name in collections:
253 258
        if pattern.match(name):
254 259
            mydb[name].drop()
255
            print("Dropping: " + name)
260
            print("Odstraňuji: " + name)
256 261

  
257 262
    database_record_logs.reset_ignore_set_loaded(dataset_name)

Také k dispozici: Unified diff