Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 04a2b5a4

Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)

Re #7939
- pridana dokumentace metod a trid
- korekce chyb v jmenech promenych
- pridani informaci pro vygenerovane skripty

Zobrazit rozdíly:

python-module/Utilities/FolderProcessor.py
1 1
import os
2 2
import zipfile
3
from CSV import CSVutils
3 4

  
4 5

  
5 6
def list_of_all_files(path):
7
    """
8
    Get all files from directory and all files written in ignore.txt
9
    and return the difference
10
    Args:
11
        path: path to Directory
12

  
13
    Returns:
14
        list with names of all files in directory
15
    """
6 16
    files_in_dir = os.listdir(path)
7 17

  
8 18
    ignore_set = load_ignore_set(path)
......
11 21

  
12 22

  
13 23
def load_ignore_set(path):
24
    """
25
    Reads ignore.txt line by line and add it to a set
26
    Args:
27
        path: Path to directory containing ignore.txt file
28

  
29
    Returns:
30
        list of names contained in ignore.txt file
31
    """
14 32
    ignore_set = set()
15 33

  
16 34
    with open(path + "ignore.txt", "r") as file:
......
21 39
    return ignore_set
22 40

  
23 41

  
24
def update_ignore_set(path,file_name):
25

  
42
def update_ignore_set(path, file_name):
43
    """
44
    Adds file_name to the ignore file
45
    Args:
46
        path: Path to directory containing ignore.txt file
47
        file_name: name of file you want to add to ignore file
48
    """
26 49
    with open(path + "ignore.txt", "a") as file:
27 50
        file.write(file_name + '\n')
28 51

  
29 52

  
30
def get_devices_set(folder):
53
def get_devices_set(path):
54
    """
55
     Goes trough every not loaded file(not contained in ProcessedData/ignore.txt)
56
     Extracts names from not loaded file which should be in first column
57
     Creates set of unique devices_names
58

  
59
    Args:
60
        path: Path to Processed directory containing ignore.txt file
31 61

  
32
    files_in_dir = list_of_all_files(folder)
62
    Returns:
63
        set of unique names contained in not loaded files
64
    """
65
    files_in_dir = list_of_all_files(path)
33 66

  
34 67
    unique_names = set()
35 68

  
36 69
    for file_path in files_in_dir:
37
        with open(folder+file_path) as file:
38
            for line in file:
39
                array = line.split(";")
40
                name = array[0]
41
                unique_names.add(name)
70
        unique_names.add(CSVutils.get_unique_names_from_file(path+file_path, 0))
42 71

  
43 72
    return unique_names
44 73

  
45 74

  
46
def get_unknown_devices_set(config,devices):
75
def get_unknown_devices_set(config, devices):
76
    """
77
    Compares config and devices a return difference
78

  
79
    Args:
80
        config:  loaded configuration file of dataset
81
        devices: set of unique devices contained in dataset
82

  
83
    Returns:
84

  
85
    """
47 86
    devices_set = set(config["devices"].keys())
48 87
    unknown_devices_set = devices.difference(devices_set)
49 88

  
50 89
    return unknown_devices_set
51 90

  
52 91

  
53
def unzip_all_csv_zip_files_in_folder(folder):
54

  
55
    files_in_dir = os.listdir(folder)
92
def unzip_all_csv_zip_files_in_folder(path):
93
    """
94
    Load all files from directory and unzip those which end by .zip
95
    After unziping deletes the zip file
96
    Args:
97
        path: Path to CrawledData directory containing ignore.txt file
98
    """
99
    files_in_dir = os.listdir(path)
56 100
    zips = []
57 101

  
58 102
    for file in files_in_dir:
59 103
        if file.endswith(".zip"):
60
            zips.append(folder + file)
104
            zips.append(path + file)
61 105

  
62 106
    for zip_file in zips:
63 107

  
64 108
        with zipfile.ZipFile(zip_file, "r") as unziped_file:
65
            unziped_file.extractall(folder)
109
            unziped_file.extractall(path)
66 110

  
67 111
        os.remove(zip_file)
68 112

  

Také k dispozici: Unified diff