Projekt

Obecné

Profil

Stáhnout (2.69 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
import zipfile
3 af7609b5 Tomáš Ballák
from shared_types import ConfigType, StringSetType
4 d6ca840d petrh
from Utilities.CSV import csv_utils
5
from Utilities.Database import database_record_logs
6 c8f3051b petrh
7
8 af7609b5 Tomáš Ballák
def list_of_all_new_files(ignore_set: StringSetType,
9
                          path: str) -> StringSetType:
10 04a2b5a4 petrh
    """
11
    Get all files from directory and all files written in ignore.txt
12
    and return the difference
13
    Args:
14
        path: path to Directory
15 d6ca840d petrh
        ignore_set: path to Directory
16 04a2b5a4 petrh
    Returns:
17
        list with names of all files in directory
18
    """
19 c8f3051b petrh
    files_in_dir = os.listdir(path)
20
21
    return set(files_in_dir).difference(ignore_set)
22
23
24 af7609b5 Tomáš Ballák
def get_devices_set(dataset_name: str, path: str) -> StringSetType:
25 04a2b5a4 petrh
    """
26
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
27
     Extracts names from not loaded file which should be in first column
28
     Creates set of unique devices_names
29
30
    Args:
31 d6ca840d petrh
        path: Path to Processed directory
32 70e660a8 petrh
33 04a2b5a4 petrh
    Returns:
34
        set of unique names contained in not loaded files
35
    """
36 d6ca840d petrh
    ignore_set = database_record_logs.load_ignore_set_loaded(dataset_name)
37 af7609b5 Tomáš Ballák
    files_in_dir = list_of_all_new_files(ignore_set, path)
38 70e660a8 petrh
39
    unique_names = set()
40
41
    for file_path in files_in_dir:
42 af7609b5 Tomáš Ballák
        unique_names.update(
43
            csv_utils.get_unique_names_from_file(path + file_path, 0))
44 70e660a8 petrh
45
    return unique_names
46
47
48 af7609b5 Tomáš Ballák
def get_unknown_devices_set(config: ConfigType,
49
                            devices: StringSetType) -> StringSetType:
50 04a2b5a4 petrh
    """
51
    Compares config and devices a return difference
52
53
    Args:
54
        config:  loaded configuration file of dataset
55
        devices: set of unique devices contained in dataset
56
57
    Returns:
58 d6ca840d petrh
        diffrences between two sets (unkown devices)
59 04a2b5a4 petrh
    """
60 70e660a8 petrh
    devices_set = set(config["devices"].keys())
61
    unknown_devices_set = devices.difference(devices_set)
62
63
    return unknown_devices_set
64
65
66 af7609b5 Tomáš Ballák
def unzip_all_csv_zip_files_in_folder(path: str) -> None:
67 04a2b5a4 petrh
    """
68
    Load all files from directory and unzip those which end by .zip
69
    After unziping deletes the zip file
70
    Args:
71
        path: Path to CrawledData directory containing ignore.txt file
72
    """
73
    files_in_dir = os.listdir(path)
74 c8f3051b petrh
    zips = []
75
76
    for file in files_in_dir:
77
        if file.endswith(".zip"):
78 04a2b5a4 petrh
            zips.append(path + file)
79 c8f3051b petrh
80
    for zip_file in zips:
81
82
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
83 04a2b5a4 petrh
            unziped_file.extractall(path)
84 c8f3051b petrh
85
        os.remove(zip_file)
86
87
88 af7609b5 Tomáš Ballák
def clean_folder(path: str) -> None:
89 1187e871 petrh
    """
90
    Deletes all files in folder
91
92
    Args:
93
        path: path to folder
94
    """
95
    files = os.listdir(path)
96
97
    for file in files:
98 af7609b5 Tomáš Ballák
        os.remove(path + file)