Projekt

Obecné

Profil

Stáhnout (1.74 KB) Statistiky
| Větev: | Revize:
1
import os
2

    
3
# Path to configuration files
4
CONFIG_FILES_PATH = "DatasetConfigs/"
5
# Path to crawled data
6
CRAWLED_DATA_PATH = "CrawledData/"
7
# Path to processed data
8
PROCESSED_DATA_PATH = "ProcessedData/"
9
# Path to crawler logs
10
CRAWLER_LOGS_PATH = "CrawlerLogs/"
11

    
12

    
13
def prepare_strucure_for_all_datasets():
14
    """
15
    Prepares folders that are necessery but does not contain code so they are excluded from gitlab by gitignore
16
    """
17

    
18
    if  not os.path.isdir(CRAWLED_DATA_PATH) :
19
        try:
20
            os.mkdir(CRAWLED_DATA_PATH)
21
        except os.error as e:
22
            print(e)
23
            print("Creation of the directory %s failed" % CRAWLED_DATA_PATH)
24

    
25
    if  not os.path.isdir(PROCESSED_DATA_PATH) :
26
        try:
27
            os.mkdir(PROCESSED_DATA_PATH)
28
        except os.error as e:
29
            print(e)
30
            print("Creation of the directory %s failed" % PROCESSED_DATA_PATH)
31
    
32
    if  not os.path.isdir(CRAWLER_LOGS_PATH) :
33
        try:
34
            os.mkdir(CRAWLER_LOGS_PATH)
35
        except os.error as e:
36
            print(e)
37
            print("Creation of the directory %s failed" % PROCESSED_DATA_PATH)
38

    
39

    
40
    files_in_dir = os.listdir(CONFIG_FILES_PATH)
41

    
42
    for file in files_in_dir:
43
        name = file.split('.')
44
        prepare_structure(name[0])
45

    
46

    
47
def prepare_structure(dataset_name):
48
    """
49
    Create folder for every dataset in newly created folder for processed and crawled data
50
    """
51

    
52
    path =  CRAWLED_DATA_PATH + dataset_name
53
    if  not os.path.isdir(path) :
54
        os.mkdir(path)
55

    
56
    path =  PROCESSED_DATA_PATH + dataset_name
57
    if not  os.path.isdir(path):
58
        os.mkdir(PROCESSED_DATA_PATH + dataset_name)
59

    
60

    
61
print("Inicializuji počáteční strukturu pro stažená a zpracovaná data")
62
prepare_strucure_for_all_datasets()
(4-4/12)