Revize af7609b5
Přidáno uživatelem Tomáš Ballák před více než 3 roky(ů)
modules/crawler/Utilities/folder_processor.py | ||
---|---|---|
1 | 1 |
import os |
2 | 2 |
import zipfile |
3 |
from shared_types import ConfigType, StringSetType |
|
3 | 4 |
from Utilities.CSV import csv_utils |
4 | 5 |
from Utilities.Database import database_record_logs |
5 | 6 |
|
6 | 7 |
|
7 |
def list_of_all_new_files(ignore_set,path): |
|
8 |
def list_of_all_new_files(ignore_set: StringSetType, |
|
9 |
path: str) -> StringSetType: |
|
8 | 10 |
""" |
9 | 11 |
Get all files from directory and all files written in ignore.txt |
10 | 12 |
and return the difference |
... | ... | |
16 | 18 |
""" |
17 | 19 |
files_in_dir = os.listdir(path) |
18 | 20 |
|
19 |
|
|
20 | 21 |
return set(files_in_dir).difference(ignore_set) |
21 | 22 |
|
22 | 23 |
|
23 |
|
|
24 |
def get_devices_set(dataset_name,path): |
|
24 |
def get_devices_set(dataset_name: str, path: str) -> StringSetType: |
|
25 | 25 |
""" |
26 | 26 |
Goes trough every not loaded file(not contained in ProcessedData/ignore.txt) |
27 | 27 |
Extracts names from not loaded file which should be in first column |
... | ... | |
34 | 34 |
set of unique names contained in not loaded files |
35 | 35 |
""" |
36 | 36 |
ignore_set = database_record_logs.load_ignore_set_loaded(dataset_name) |
37 |
files_in_dir = list_of_all_new_files(ignore_set,path) |
|
37 |
files_in_dir = list_of_all_new_files(ignore_set, path)
|
|
38 | 38 |
|
39 | 39 |
unique_names = set() |
40 | 40 |
|
41 | 41 |
for file_path in files_in_dir: |
42 |
unique_names.update(csv_utils.get_unique_names_from_file(path+file_path, 0)) |
|
42 |
unique_names.update( |
|
43 |
csv_utils.get_unique_names_from_file(path + file_path, 0)) |
|
43 | 44 |
|
44 | 45 |
return unique_names |
45 | 46 |
|
46 | 47 |
|
47 |
def get_unknown_devices_set(config, devices): |
|
48 |
def get_unknown_devices_set(config: ConfigType, |
|
49 |
devices: StringSetType) -> StringSetType: |
|
48 | 50 |
""" |
49 | 51 |
Compares config and devices a return difference |
50 | 52 |
|
... | ... | |
61 | 63 |
return unknown_devices_set |
62 | 64 |
|
63 | 65 |
|
64 |
def unzip_all_csv_zip_files_in_folder(path):
|
|
66 |
def unzip_all_csv_zip_files_in_folder(path: str) -> None:
|
|
65 | 67 |
""" |
66 | 68 |
Load all files from directory and unzip those which end by .zip |
67 | 69 |
After unziping deletes the zip file |
... | ... | |
83 | 85 |
os.remove(zip_file) |
84 | 86 |
|
85 | 87 |
|
86 |
def clean_folder(path):
|
|
88 |
def clean_folder(path: str) -> None:
|
|
87 | 89 |
""" |
88 | 90 |
Deletes all files in folder |
89 | 91 |
|
... | ... | |
93 | 95 |
files = os.listdir(path) |
94 | 96 |
|
95 | 97 |
for file in files: |
96 |
os.remove(path+file) |
|
98 |
os.remove(path + file) |
Také k dispozici: Unified diff
Re #8193 - refactoring crawler