Projekt

Obecné

Profil

Stáhnout (2.44 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
import zipfile
3 d6ca840d petrh
from Utilities.CSV import csv_utils
4
from Utilities.Database import database_record_logs
5 c8f3051b petrh
6
7 d6ca840d petrh
def list_of_all_new_files(ignore_set,path):
8 04a2b5a4 petrh
    """
9
    Get all files from directory and all files written in ignore.txt
10
    and return the difference
11
    Args:
12
        path: path to Directory
13 d6ca840d petrh
        ignore_set: path to Directory
14 04a2b5a4 petrh
    Returns:
15
        list with names of all files in directory
16
    """
17 c8f3051b petrh
    files_in_dir = os.listdir(path)
18
19
20
    return set(files_in_dir).difference(ignore_set)
21
22
23
24 d6ca840d petrh
def get_devices_set(dataset_name,path):
25 04a2b5a4 petrh
    """
26
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
27
     Extracts names from not loaded file which should be in first column
28
     Creates set of unique devices_names
29
30
    Args:
31 d6ca840d petrh
        path: Path to Processed directory
32 70e660a8 petrh
33 04a2b5a4 petrh
    Returns:
34
        set of unique names contained in not loaded files
35
    """
36 d6ca840d petrh
    ignore_set = database_record_logs.load_ignore_set_loaded(dataset_name)
37
    files_in_dir = list_of_all_new_files(ignore_set,path)
38 70e660a8 petrh
39
    unique_names = set()
40
41
    for file_path in files_in_dir:
42 d6ca840d petrh
        unique_names.update(csv_utils.get_unique_names_from_file(path+file_path, 0))
43 70e660a8 petrh
44
    return unique_names
45
46
47 04a2b5a4 petrh
def get_unknown_devices_set(config, devices):
48
    """
49
    Compares config and devices a return difference
50
51
    Args:
52
        config:  loaded configuration file of dataset
53
        devices: set of unique devices contained in dataset
54
55
    Returns:
56 d6ca840d petrh
        diffrences between two sets (unkown devices)
57 04a2b5a4 petrh
    """
58 70e660a8 petrh
    devices_set = set(config["devices"].keys())
59
    unknown_devices_set = devices.difference(devices_set)
60
61
    return unknown_devices_set
62
63
64 04a2b5a4 petrh
def unzip_all_csv_zip_files_in_folder(path):
65
    """
66
    Load all files from directory and unzip those which end by .zip
67
    After unziping deletes the zip file
68
    Args:
69
        path: Path to CrawledData directory containing ignore.txt file
70
    """
71
    files_in_dir = os.listdir(path)
72 c8f3051b petrh
    zips = []
73
74
    for file in files_in_dir:
75
        if file.endswith(".zip"):
76 04a2b5a4 petrh
            zips.append(path + file)
77 c8f3051b petrh
78
    for zip_file in zips:
79
80
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
81 04a2b5a4 petrh
            unziped_file.extractall(path)
82 c8f3051b petrh
83
        os.remove(zip_file)
84
85
86 1187e871 petrh
def clean_folder(path):
87
    """
88
    Deletes all files in folder
89
90
    Args:
91
        path: path to folder
92
    """
93
    files = os.listdir(path)
94
95
    for file in files:
96
        os.remove(path+file)