Projekt

Obecné

Profil

Stáhnout (2.75 KB) Statistiky
| Větev: | Revize:
1
import os
2
import zipfile
3
from CSV import CSVutils
4

    
5

    
6
def list_of_all_files(path):
7
    """
8
    Get all files from directory and all files written in ignore.txt
9
    and return the difference
10
    Args:
11
        path: path to Directory
12

    
13
    Returns:
14
        list with names of all files in directory
15
    """
16
    files_in_dir = os.listdir(path)
17

    
18
    ignore_set = load_ignore_set(path)
19

    
20
    return set(files_in_dir).difference(ignore_set)
21

    
22

    
23
def load_ignore_set(path):
24
    """
25
    Reads ignore.txt line by line and add it to a set
26
    Args:
27
        path: Path to directory containing ignore.txt file
28

    
29
    Returns:
30
        list of names contained in ignore.txt file
31
    """
32
    ignore_set = set()
33

    
34
    with open(path + "ignore.txt", "r") as file:
35

    
36
        for line in file:
37
            ignore_set.add(line[:-1])
38

    
39
    return ignore_set
40

    
41

    
42
def update_ignore_set(path, file_name):
43
    """
44
    Adds file_name to the ignore file
45
    Args:
46
        path: Path to directory containing ignore.txt file
47
        file_name: name of file you want to add to ignore file
48
    """
49
    with open(path + "ignore.txt", "a") as file:
50
        file.write(file_name + '\n')
51

    
52

    
53
def get_devices_set(path):
54
    """
55
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
56
     Extracts names from not loaded file which should be in first column
57
     Creates set of unique devices_names
58

    
59
    Args:
60
        path: Path to Processed directory containing ignore.txt file
61

    
62
    Returns:
63
        set of unique names contained in not loaded files
64
    """
65
    files_in_dir = list_of_all_files(path)
66

    
67
    unique_names = set()
68

    
69
    for file_path in files_in_dir:
70
        unique_names.add(CSVutils.get_unique_names_from_file(path+file_path, 0))
71

    
72
    return unique_names
73

    
74

    
75
def get_unknown_devices_set(config, devices):
76
    """
77
    Compares config and devices a return difference
78

    
79
    Args:
80
        config:  loaded configuration file of dataset
81
        devices: set of unique devices contained in dataset
82

    
83
    Returns:
84

    
85
    """
86
    devices_set = set(config["devices"].keys())
87
    unknown_devices_set = devices.difference(devices_set)
88

    
89
    return unknown_devices_set
90

    
91

    
92
def unzip_all_csv_zip_files_in_folder(path):
93
    """
94
    Load all files from directory and unzip those which end by .zip
95
    After unziping deletes the zip file
96
    Args:
97
        path: Path to CrawledData directory containing ignore.txt file
98
    """
99
    files_in_dir = os.listdir(path)
100
    zips = []
101

    
102
    for file in files_in_dir:
103
        if file.endswith(".zip"):
104
            zips.append(path + file)
105

    
106
    for zip_file in zips:
107

    
108
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
109
            unziped_file.extractall(path)
110

    
111
        os.remove(zip_file)
112

    
113

    
(3-3/3)