Projekt

Obecné

Profil

Stáhnout (1.78 KB) Statistiky
| Větev: | Revize:
1 d6ca840d petrh
import os
2
3
# Path to configuration files
4
CONFIG_FILES_PATH = "DatasetConfigs/"
5
# Path to crawled data
6
CRAWLED_DATA_PATH = "CrawledData/"
7
# Path to processed data
8
PROCESSED_DATA_PATH = "ProcessedData/"
9
# Path to crawler logs
10
CRAWLER_LOGS_PATH = "CrawlerLogs/"
11
12
13 af7609b5 Tomáš Ballák
def prepare_strucure_for_all_datasets() -> None:
14 d6ca840d petrh
    """
15
    Prepares folders that are necessery but does not contain code so they are excluded from gitlab by gitignore
16
    """
17
18 af7609b5 Tomáš Ballák
    if not os.path.isdir(CRAWLED_DATA_PATH):
19 d6ca840d petrh
        try:
20
            os.mkdir(CRAWLED_DATA_PATH)
21
        except os.error as e:
22
            print(e)
23 af7609b5 Tomáš Ballák
            print("Nelze vytvořit adresář %s" % CRAWLED_DATA_PATH)
24 d6ca840d petrh
25 af7609b5 Tomáš Ballák
    if not os.path.isdir(PROCESSED_DATA_PATH):
26 d6ca840d petrh
        try:
27
            os.mkdir(PROCESSED_DATA_PATH)
28
        except os.error as e:
29
            print(e)
30 af7609b5 Tomáš Ballák
            print("Nelze vytvořit adresář %s" % PROCESSED_DATA_PATH)
31
32
    if not os.path.isdir(CRAWLER_LOGS_PATH):
33 d6ca840d petrh
        try:
34
            os.mkdir(CRAWLER_LOGS_PATH)
35
        except os.error as e:
36
            print(e)
37 af7609b5 Tomáš Ballák
            print("Nelze vytvořit adresář %s" % CRAWLER_LOGS_PATH)
38 d6ca840d petrh
39
    files_in_dir = os.listdir(CONFIG_FILES_PATH)
40
41
    for file in files_in_dir:
42
        name = file.split('.')
43
        prepare_structure(name[0])
44
45
46 af7609b5 Tomáš Ballák
def prepare_structure(dataset_name: str) -> None:
47 d6ca840d petrh
    """
48
    Create folder for every dataset in newly created folder for processed and crawled data
49
    """
50
51 af7609b5 Tomáš Ballák
    path = CRAWLED_DATA_PATH + dataset_name
52
    if not os.path.isdir(path):
53 d6ca840d petrh
        os.mkdir(path)
54
55 af7609b5 Tomáš Ballák
    path = PROCESSED_DATA_PATH + dataset_name
56
    if not os.path.isdir(path):
57 d6ca840d petrh
        os.mkdir(PROCESSED_DATA_PATH + dataset_name)
58
59
60 af7609b5 Tomáš Ballák
def main() -> None:
61
    print("Inicializuji počáteční strukturu pro stažená a zpracovaná data")
62
    prepare_strucure_for_all_datasets()
63
64
65
if __name__ == "__main__":
66
    main()