Projekt

Obecné

Profil

Stáhnout (2.96 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
import zipfile
3 34cf65cd petrh
from Utilities.CSV import CSVutils
4 c8f3051b petrh
5
6
def list_of_all_files(path):
7 04a2b5a4 petrh
    """
8
    Get all files from directory and all files written in ignore.txt
9
    and return the difference
10
    Args:
11
        path: path to Directory
12
13
    Returns:
14
        list with names of all files in directory
15
    """
16 c8f3051b petrh
    files_in_dir = os.listdir(path)
17
18
    ignore_set = load_ignore_set(path)
19
20
    return set(files_in_dir).difference(ignore_set)
21
22
23
def load_ignore_set(path):
24 04a2b5a4 petrh
    """
25
    Reads ignore.txt line by line and add it to a set
26
    Args:
27
        path: Path to directory containing ignore.txt file
28
29
    Returns:
30
        list of names contained in ignore.txt file
31
    """
32 c8f3051b petrh
    ignore_set = set()
33
34
    with open(path + "ignore.txt", "r") as file:
35
36
        for line in file:
37
            ignore_set.add(line[:-1])
38
39
    return ignore_set
40
41
42 04a2b5a4 petrh
def update_ignore_set(path, file_name):
43
    """
44
    Adds file_name to the ignore file
45
    Args:
46
        path: Path to directory containing ignore.txt file
47
        file_name: name of file you want to add to ignore file
48
    """
49 c8f3051b petrh
    with open(path + "ignore.txt", "a") as file:
50
        file.write(file_name + '\n')
51
52
53 04a2b5a4 petrh
def get_devices_set(path):
54
    """
55
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
56
     Extracts names from not loaded file which should be in first column
57
     Creates set of unique devices_names
58
59
    Args:
60
        path: Path to Processed directory containing ignore.txt file
61 70e660a8 petrh
62 04a2b5a4 petrh
    Returns:
63
        set of unique names contained in not loaded files
64
    """
65
    files_in_dir = list_of_all_files(path)
66 70e660a8 petrh
67
    unique_names = set()
68
69
    for file_path in files_in_dir:
70 34cf65cd petrh
        unique_names.update(CSVutils.get_unique_names_from_file(path+file_path, 0))
71 70e660a8 petrh
72
    return unique_names
73
74
75 04a2b5a4 petrh
def get_unknown_devices_set(config, devices):
76
    """
77
    Compares config and devices a return difference
78
79
    Args:
80
        config:  loaded configuration file of dataset
81
        devices: set of unique devices contained in dataset
82
83
    Returns:
84
85
    """
86 70e660a8 petrh
    devices_set = set(config["devices"].keys())
87
    unknown_devices_set = devices.difference(devices_set)
88
89
    return unknown_devices_set
90
91
92 04a2b5a4 petrh
def unzip_all_csv_zip_files_in_folder(path):
93
    """
94
    Load all files from directory and unzip those which end by .zip
95
    After unziping deletes the zip file
96
    Args:
97
        path: Path to CrawledData directory containing ignore.txt file
98
    """
99
    files_in_dir = os.listdir(path)
100 c8f3051b petrh
    zips = []
101
102
    for file in files_in_dir:
103
        if file.endswith(".zip"):
104 04a2b5a4 petrh
            zips.append(path + file)
105 c8f3051b petrh
106
    for zip_file in zips:
107
108
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
109 04a2b5a4 petrh
            unziped_file.extractall(path)
110 c8f3051b petrh
111
        os.remove(zip_file)
112
113
114 1187e871 petrh
def clean_folder(path):
115
    """
116
    Deletes all files in folder
117
118
    Args:
119
        path: path to folder
120
    """
121
    files = os.listdir(path)
122
123
    for file in files:
124
        os.remove(path+file)