Revize 04a2b5a4
Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)
python-module/Utilities/FolderProcessor.py | ||
---|---|---|
1 | 1 |
import os |
2 | 2 |
import zipfile |
3 |
from CSV import CSVutils |
|
3 | 4 |
|
4 | 5 |
|
5 | 6 |
def list_of_all_files(path): |
7 |
""" |
|
8 |
Get all files from directory and all files written in ignore.txt |
|
9 |
and return the difference |
|
10 |
Args: |
|
11 |
path: path to Directory |
|
12 |
|
|
13 |
Returns: |
|
14 |
list with names of all files in directory |
|
15 |
""" |
|
6 | 16 |
files_in_dir = os.listdir(path) |
7 | 17 |
|
8 | 18 |
ignore_set = load_ignore_set(path) |
... | ... | |
11 | 21 |
|
12 | 22 |
|
13 | 23 |
def load_ignore_set(path): |
24 |
""" |
|
25 |
Reads ignore.txt line by line and add it to a set |
|
26 |
Args: |
|
27 |
path: Path to directory containing ignore.txt file |
|
28 |
|
|
29 |
Returns: |
|
30 |
list of names contained in ignore.txt file |
|
31 |
""" |
|
14 | 32 |
ignore_set = set() |
15 | 33 |
|
16 | 34 |
with open(path + "ignore.txt", "r") as file: |
... | ... | |
21 | 39 |
return ignore_set |
22 | 40 |
|
23 | 41 |
|
24 |
def update_ignore_set(path,file_name): |
|
25 |
|
|
42 |
def update_ignore_set(path, file_name): |
|
43 |
""" |
|
44 |
Adds file_name to the ignore file |
|
45 |
Args: |
|
46 |
path: Path to directory containing ignore.txt file |
|
47 |
file_name: name of file you want to add to ignore file |
|
48 |
""" |
|
26 | 49 |
with open(path + "ignore.txt", "a") as file: |
27 | 50 |
file.write(file_name + '\n') |
28 | 51 |
|
29 | 52 |
|
30 |
def get_devices_set(folder): |
|
53 |
def get_devices_set(path): |
|
54 |
""" |
|
55 |
Goes trough every not loaded file(not contained in ProcessedData/ignore.txt) |
|
56 |
Extracts names from not loaded file which should be in first column |
|
57 |
Creates set of unique devices_names |
|
58 |
|
|
59 |
Args: |
|
60 |
path: Path to Processed directory containing ignore.txt file |
|
31 | 61 |
|
32 |
files_in_dir = list_of_all_files(folder) |
|
62 |
Returns: |
|
63 |
set of unique names contained in not loaded files |
|
64 |
""" |
|
65 |
files_in_dir = list_of_all_files(path) |
|
33 | 66 |
|
34 | 67 |
unique_names = set() |
35 | 68 |
|
36 | 69 |
for file_path in files_in_dir: |
37 |
with open(folder+file_path) as file: |
|
38 |
for line in file: |
|
39 |
array = line.split(";") |
|
40 |
name = array[0] |
|
41 |
unique_names.add(name) |
|
70 |
unique_names.add(CSVutils.get_unique_names_from_file(path+file_path, 0)) |
|
42 | 71 |
|
43 | 72 |
return unique_names |
44 | 73 |
|
45 | 74 |
|
46 |
def get_unknown_devices_set(config,devices): |
|
75 |
def get_unknown_devices_set(config, devices): |
|
76 |
""" |
|
77 |
Compares config and devices a return difference |
|
78 |
|
|
79 |
Args: |
|
80 |
config: loaded configuration file of dataset |
|
81 |
devices: set of unique devices contained in dataset |
|
82 |
|
|
83 |
Returns: |
|
84 |
|
|
85 |
""" |
|
47 | 86 |
devices_set = set(config["devices"].keys()) |
48 | 87 |
unknown_devices_set = devices.difference(devices_set) |
49 | 88 |
|
50 | 89 |
return unknown_devices_set |
51 | 90 |
|
52 | 91 |
|
53 |
def unzip_all_csv_zip_files_in_folder(folder): |
|
54 |
|
|
55 |
files_in_dir = os.listdir(folder) |
|
92 |
def unzip_all_csv_zip_files_in_folder(path): |
|
93 |
""" |
|
94 |
Load all files from directory and unzip those which end by .zip |
|
95 |
After unziping deletes the zip file |
|
96 |
Args: |
|
97 |
path: Path to CrawledData directory containing ignore.txt file |
|
98 |
""" |
|
99 |
files_in_dir = os.listdir(path) |
|
56 | 100 |
zips = [] |
57 | 101 |
|
58 | 102 |
for file in files_in_dir: |
59 | 103 |
if file.endswith(".zip"): |
60 |
zips.append(folder + file)
|
|
104 |
zips.append(path + file)
|
|
61 | 105 |
|
62 | 106 |
for zip_file in zips: |
63 | 107 |
|
64 | 108 |
with zipfile.ZipFile(zip_file, "r") as unziped_file: |
65 |
unziped_file.extractall(folder)
|
|
109 |
unziped_file.extractall(path)
|
|
66 | 110 |
|
67 | 111 |
os.remove(zip_file) |
68 | 112 |
|
Také k dispozici: Unified diff
Re #7939
- pridana dokumentace metod a trid
- korekce chyb v jmenech promenych
- pridani informaci pro vygenerovane skripty