Projekt

Obecné

Profil

Stáhnout (2.44 KB) Statistiky
| Větev: | Revize:
1
import os
2
import zipfile
3
from Utilities.CSV import csv_utils
4
from Utilities.Database import database_record_logs
5

    
6

    
7
def list_of_all_new_files(ignore_set,path):
8
    """
9
    Get all files from directory and all files written in ignore.txt
10
    and return the difference
11
    Args:
12
        path: path to Directory
13
        ignore_set: path to Directory
14
    Returns:
15
        list with names of all files in directory
16
    """
17
    files_in_dir = os.listdir(path)
18

    
19

    
20
    return set(files_in_dir).difference(ignore_set)
21

    
22

    
23

    
24
def get_devices_set(dataset_name,path):
25
    """
26
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
27
     Extracts names from not loaded file which should be in first column
28
     Creates set of unique devices_names
29

    
30
    Args:
31
        path: Path to Processed directory
32

    
33
    Returns:
34
        set of unique names contained in not loaded files
35
    """
36
    ignore_set = database_record_logs.load_ignore_set_loaded(dataset_name)
37
    files_in_dir = list_of_all_new_files(ignore_set,path)
38

    
39
    unique_names = set()
40

    
41
    for file_path in files_in_dir:
42
        unique_names.update(csv_utils.get_unique_names_from_file(path+file_path, 0))
43

    
44
    return unique_names
45

    
46

    
47
def get_unknown_devices_set(config, devices):
48
    """
49
    Compares config and devices a return difference
50

    
51
    Args:
52
        config:  loaded configuration file of dataset
53
        devices: set of unique devices contained in dataset
54

    
55
    Returns:
56
        diffrences between two sets (unkown devices)
57
    """
58
    devices_set = set(config["devices"].keys())
59
    unknown_devices_set = devices.difference(devices_set)
60

    
61
    return unknown_devices_set
62

    
63

    
64
def unzip_all_csv_zip_files_in_folder(path):
65
    """
66
    Load all files from directory and unzip those which end by .zip
67
    After unziping deletes the zip file
68
    Args:
69
        path: Path to CrawledData directory containing ignore.txt file
70
    """
71
    files_in_dir = os.listdir(path)
72
    zips = []
73

    
74
    for file in files_in_dir:
75
        if file.endswith(".zip"):
76
            zips.append(path + file)
77

    
78
    for zip_file in zips:
79

    
80
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
81
            unziped_file.extractall(path)
82

    
83
        os.remove(zip_file)
84

    
85

    
86
def clean_folder(path):
87
    """
88
    Deletes all files in folder
89

    
90
    Args:
91
        path: path to folder
92
    """
93
    files = os.listdir(path)
94

    
95
    for file in files:
96
        os.remove(path+file)
(3-3/4)