Projekt

Obecné

Profil

Stáhnout (2.69 KB) Statistiky
| Větev: | Revize:
1
import os
2
import zipfile
3
from shared_types import ConfigType, StringSetType
4
from Utilities.CSV import csv_utils
5
from Utilities.Database import database_record_logs
6

    
7

    
8
def list_of_all_new_files(ignore_set: StringSetType,
9
                          path: str) -> StringSetType:
10
    """
11
    Get all files from directory and all files written in ignore.txt
12
    and return the difference
13
    Args:
14
        path: path to Directory
15
        ignore_set: path to Directory
16
    Returns:
17
        list with names of all files in directory
18
    """
19
    files_in_dir = os.listdir(path)
20

    
21
    return set(files_in_dir).difference(ignore_set)
22

    
23

    
24
def get_devices_set(dataset_name: str, path: str) -> StringSetType:
25
    """
26
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
27
     Extracts names from not loaded file which should be in first column
28
     Creates set of unique devices_names
29

    
30
    Args:
31
        path: Path to Processed directory
32

    
33
    Returns:
34
        set of unique names contained in not loaded files
35
    """
36
    ignore_set = database_record_logs.load_ignore_set_loaded(dataset_name)
37
    files_in_dir = list_of_all_new_files(ignore_set, path)
38

    
39
    unique_names = set()
40

    
41
    for file_path in files_in_dir:
42
        unique_names.update(
43
            csv_utils.get_unique_names_from_file(path + file_path, 0))
44

    
45
    return unique_names
46

    
47

    
48
def get_unknown_devices_set(config: ConfigType,
49
                            devices: StringSetType) -> StringSetType:
50
    """
51
    Compares config and devices a return difference
52

    
53
    Args:
54
        config:  loaded configuration file of dataset
55
        devices: set of unique devices contained in dataset
56

    
57
    Returns:
58
        diffrences between two sets (unkown devices)
59
    """
60
    devices_set = set(config["devices"].keys())
61
    unknown_devices_set = devices.difference(devices_set)
62

    
63
    return unknown_devices_set
64

    
65

    
66
def unzip_all_csv_zip_files_in_folder(path: str) -> None:
67
    """
68
    Load all files from directory and unzip those which end by .zip
69
    After unziping deletes the zip file
70
    Args:
71
        path: Path to CrawledData directory containing ignore.txt file
72
    """
73
    files_in_dir = os.listdir(path)
74
    zips = []
75

    
76
    for file in files_in_dir:
77
        if file.endswith(".zip"):
78
            zips.append(path + file)
79

    
80
    for zip_file in zips:
81

    
82
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
83
            unziped_file.extractall(path)
84

    
85
        os.remove(zip_file)
86

    
87

    
88
def clean_folder(path: str) -> None:
89
    """
90
    Deletes all files in folder
91

    
92
    Args:
93
        path: path to folder
94
    """
95
    files = os.listdir(path)
96

    
97
    for file in files:
98
        os.remove(path + file)
(3-3/4)