Projekt

Obecné

Profil

Stáhnout (1.78 KB) Statistiky
| Větev: | Revize:
1
import os
2

    
3
# Path to configuration files
4
CONFIG_FILES_PATH = "DatasetConfigs/"
5
# Path to crawled data
6
CRAWLED_DATA_PATH = "CrawledData/"
7
# Path to processed data
8
PROCESSED_DATA_PATH = "ProcessedData/"
9
# Path to crawler logs
10
CRAWLER_LOGS_PATH = "CrawlerLogs/"
11

    
12

    
13
def prepare_strucure_for_all_datasets() -> None:
14
    """
15
    Prepares folders that are necessery but does not contain code so they are excluded from gitlab by gitignore
16
    """
17

    
18
    if not os.path.isdir(CRAWLED_DATA_PATH):
19
        try:
20
            os.mkdir(CRAWLED_DATA_PATH)
21
        except os.error as e:
22
            print(e)
23
            print("Nelze vytvořit adresář %s" % CRAWLED_DATA_PATH)
24

    
25
    if not os.path.isdir(PROCESSED_DATA_PATH):
26
        try:
27
            os.mkdir(PROCESSED_DATA_PATH)
28
        except os.error as e:
29
            print(e)
30
            print("Nelze vytvořit adresář %s" % PROCESSED_DATA_PATH)
31

    
32
    if not os.path.isdir(CRAWLER_LOGS_PATH):
33
        try:
34
            os.mkdir(CRAWLER_LOGS_PATH)
35
        except os.error as e:
36
            print(e)
37
            print("Nelze vytvořit adresář %s" % CRAWLER_LOGS_PATH)
38

    
39
    files_in_dir = os.listdir(CONFIG_FILES_PATH)
40

    
41
    for file in files_in_dir:
42
        name = file.split('.')
43
        prepare_structure(name[0])
44

    
45

    
46
def prepare_structure(dataset_name: str) -> None:
47
    """
48
    Create folder for every dataset in newly created folder for processed and crawled data
49
    """
50

    
51
    path = CRAWLED_DATA_PATH + dataset_name
52
    if not os.path.isdir(path):
53
        os.mkdir(path)
54

    
55
    path = PROCESSED_DATA_PATH + dataset_name
56
    if not os.path.isdir(path):
57
        os.mkdir(PROCESSED_DATA_PATH + dataset_name)
58

    
59

    
60
def main() -> None:
61
    print("Inicializuji počáteční strukturu pro stažená a zpracovaná data")
62
    prepare_strucure_for_all_datasets()
63

    
64

    
65
if __name__ == "__main__":
66
    main()
(4-4/13)