Revize af7609b5
Přidáno uživatelem Tomáš Ballák před více než 3 roky(ů)
modules/crawler/docker_prepare_structure.py | ||
---|---|---|
10 | 10 |
CRAWLER_LOGS_PATH = "CrawlerLogs/" |
11 | 11 |
|
12 | 12 |
|
13 |
def prepare_strucure_for_all_datasets(): |
|
13 |
def prepare_strucure_for_all_datasets() -> None:
|
|
14 | 14 |
""" |
15 | 15 |
Prepares folders that are necessery but does not contain code so they are excluded from gitlab by gitignore |
16 | 16 |
""" |
17 | 17 |
|
18 |
if not os.path.isdir(CRAWLED_DATA_PATH) :
|
|
18 |
if not os.path.isdir(CRAWLED_DATA_PATH):
|
|
19 | 19 |
try: |
20 | 20 |
os.mkdir(CRAWLED_DATA_PATH) |
21 | 21 |
except os.error as e: |
22 | 22 |
print(e) |
23 |
print("Creation of the directory %s failed" % CRAWLED_DATA_PATH)
|
|
23 |
print("Nelze vytvořit adresář %s" % CRAWLED_DATA_PATH)
|
|
24 | 24 |
|
25 |
if not os.path.isdir(PROCESSED_DATA_PATH) :
|
|
25 |
if not os.path.isdir(PROCESSED_DATA_PATH):
|
|
26 | 26 |
try: |
27 | 27 |
os.mkdir(PROCESSED_DATA_PATH) |
28 | 28 |
except os.error as e: |
29 | 29 |
print(e) |
30 |
print("Creation of the directory %s failed" % PROCESSED_DATA_PATH)
|
|
31 |
|
|
32 |
if not os.path.isdir(CRAWLER_LOGS_PATH) :
|
|
30 |
print("Nelze vytvořit adresář %s" % PROCESSED_DATA_PATH)
|
|
31 |
|
|
32 |
if not os.path.isdir(CRAWLER_LOGS_PATH):
|
|
33 | 33 |
try: |
34 | 34 |
os.mkdir(CRAWLER_LOGS_PATH) |
35 | 35 |
except os.error as e: |
36 | 36 |
print(e) |
37 |
print("Creation of the directory %s failed" % PROCESSED_DATA_PATH) |
|
38 |
|
|
37 |
print("Nelze vytvořit adresář %s" % CRAWLER_LOGS_PATH) |
|
39 | 38 |
|
40 | 39 |
files_in_dir = os.listdir(CONFIG_FILES_PATH) |
41 | 40 |
|
... | ... | |
44 | 43 |
prepare_structure(name[0]) |
45 | 44 |
|
46 | 45 |
|
47 |
def prepare_structure(dataset_name):
|
|
46 |
def prepare_structure(dataset_name: str) -> None:
|
|
48 | 47 |
""" |
49 | 48 |
Create folder for every dataset in newly created folder for processed and crawled data |
50 | 49 |
""" |
51 | 50 |
|
52 |
path = CRAWLED_DATA_PATH + dataset_name
|
|
53 |
if not os.path.isdir(path) :
|
|
51 |
path = CRAWLED_DATA_PATH + dataset_name |
|
52 |
if not os.path.isdir(path):
|
|
54 | 53 |
os.mkdir(path) |
55 | 54 |
|
56 |
path = PROCESSED_DATA_PATH + dataset_name
|
|
57 |
if not os.path.isdir(path):
|
|
55 |
path = PROCESSED_DATA_PATH + dataset_name |
|
56 |
if not os.path.isdir(path): |
|
58 | 57 |
os.mkdir(PROCESSED_DATA_PATH + dataset_name) |
59 | 58 |
|
60 | 59 |
|
61 |
print("Inicializuji počáteční strukturu pro stažená a zpracovaná data") |
|
62 |
prepare_strucure_for_all_datasets() |
|
60 |
def main() -> None: |
|
61 |
print("Inicializuji počáteční strukturu pro stažená a zpracovaná data") |
|
62 |
prepare_strucure_for_all_datasets() |
|
63 |
|
|
64 |
|
|
65 |
if __name__ == "__main__": |
|
66 |
main() |
Také k dispozici: Unified diff
Re #8193 - refactoring crawler