Projekt

Obecné

Profil

« Předchozí | Další » 

Revize af7609b5

Přidáno uživatelem Tomáš Ballák před více než 3 roky(ů)

Re #8193 - refactoring crawler

Zobrazit rozdíly:

modules/crawler/Utilities/Database/database_record_logs.py
1 1
from Utilities.Database import database_loader
2

  
2
from shared_types import StringSetType
3 3
# mongodb collection with with already downloaded links
4 4
MONGODB_DATASET_LINK_COLLECTION = "LINKS"
5 5
# mongodb collection with with already processed files
......
10 10
MONGODB_DATASET_COLLECTION = "DATASETS"
11 11

  
12 12

  
13
def load_ignore_set_links(dataset_name):
13
def load_ignore_set_links(dataset_name: str) -> StringSetType:
14 14
    """
15 15
    Loades from database links of already downloaded files by crawler
16 16
    
......
32 32
    return ignore_set
33 33

  
34 34

  
35
def update_ignore_set_links(dataset_name,link):
35
def update_ignore_set_links(dataset_name: str, link: str) -> None:
36 36
    """
37 37
    Adds links of newly crawled files to the database
38 38
    
......
44 44

  
45 45
    my_col = connection[dataset_name + MONGODB_DATASET_LINK_COLLECTION]
46 46

  
47
    my_col.insert({ "name": link})
47
    my_col.insert({"name": link})
48 48

  
49 49

  
50
def reset_ignore_set_links(dataset_name):
50
def reset_ignore_set_links(dataset_name: str) -> None:
51 51
    """
52 52
    Drops collection of already downloaded links
53 53
    
......
62 62
    my_col.drop()
63 63

  
64 64

  
65

  
66
def load_ignore_set_processed(dataset_name):
65
def load_ignore_set_processed(dataset_name: str) -> StringSetType:
67 66
    """
68 67
    Loads from database set of already processed files
69 68
    
......
85 84
    return ignore_set
86 85

  
87 86

  
88
def update_ignore_set_processed(dataset_name,filename):
87
def update_ignore_set_processed(dataset_name: str, filename: str) -> None:
89 88
    """
90 89
    Adds files of newly processed files to the database
91 90
    
......
97 96

  
98 97
    my_col = connection[dataset_name + MONGODB_DATASET_PROCESSED_COLLECTION]
99 98

  
100
    my_col.insert({ "name": filename})
101

  
99
    my_col.insert({"name": filename})
102 100

  
103 101

  
104
def reset_ignore_set_processed(dataset_name):
102
def reset_ignore_set_processed(dataset_name: str) -> None:
105 103
    """
106 104
    Drops collection of already processed files
107 105
    
......
116 114
    my_col.drop()
117 115

  
118 116

  
119

  
120
def load_ignore_set_loaded(dataset_name):
117
def load_ignore_set_loaded(dataset_name: str) -> StringSetType:
121 118
    """
122 119
    Loads from database set of already loaded files in database
123 120
    
......
139 136
    return ignore_set
140 137

  
141 138

  
142

  
143
def update_ignore_set_loaded(dataset_name,filename):
139
def update_ignore_set_loaded(dataset_name: str, filename: str) -> None:
144 140
    """
145 141
    Adds files of newly loaded files to the database
146 142
    
......
152 148

  
153 149
    my_col = connection[dataset_name + MONGODB_DATASET_LOADED_COLLECTION]
154 150

  
155
    my_col.insert({ "name": filename})
151
    my_col.insert({"name": filename})
156 152

  
157 153

  
158
def reset_ignore_set_loaded(dataset_name):
154
def reset_ignore_set_loaded(dataset_name: str) -> None:
159 155
    """
160 156
    Drops collection of already loaded files
161 157
    
......
170 166
    my_col.drop()
171 167

  
172 168

  
173
def load_updated(dataset_name):
169
def load_updated(dataset_name: str) -> int:
174 170
    """
175 171
    Loads value of (days from last update) from db
176 172
    
......
184 180

  
185 181
    my_col = connection[MONGODB_DATASET_COLLECTION]
186 182

  
187
    data = my_col.find_one({'key-name': dataset_name},{'updated'})
183
    data = my_col.find_one({'key-name': dataset_name}, {'updated'})
188 184

  
189 185
    updated = int(data['updated'])
190 186

  
191 187
    return updated
192 188

  
193 189

  
194
def update_updated(dataset_name,value):
190
def update_updated(dataset_name: str, value: int):
195 191
    """
196 192
    Updates value of (days from last update) in db
197 193
    
......
203 199

  
204 200
    my_col = connection[MONGODB_DATASET_COLLECTION]
205 201

  
206
    myquery = { 'key-name': dataset_name }
207
    new_values = { "$set": { "updated": value } }
202
    myquery = {'key-name': dataset_name}
203
    new_values = {"$set": {"updated": value}}
208 204

  
209
    my_col.update_one(myquery,new_values)
205
    my_col.update_one(myquery, new_values)

Také k dispozici: Unified diff