1
|
import os
|
2
|
from Utilities import folder_processor
|
3
|
from Utilities.Database import database_loader
|
4
|
from Utilities import configure_functions
|
5
|
|
6
|
# Path to crawled data
|
7
|
CRAWLED_DATA_PATH = "CrawledData/"
|
8
|
# Path to processed data
|
9
|
PROCESSED_DATA_PATH = "ProcessedData/"
|
10
|
# Path to crawler logs
|
11
|
CRAWLER_LOGS_PATH = "CrawlerLogs/"
|
12
|
# Path to dataset configuration files
|
13
|
CONFIG_FILES_PATH = "DatasetConfigs"
|
14
|
|
15
|
|
16
|
def hard_reset_dataset(dataset_name):
|
17
|
"""
|
18
|
Resets all saved data in dataset except config and implementation
|
19
|
Args:
|
20
|
dataset_name: name of dataset that has existing configuration file
|
21
|
"""
|
22
|
|
23
|
path = CRAWLED_DATA_PATH + dataset_name + "/"
|
24
|
folder_processor.clean_folder(path)
|
25
|
|
26
|
path = PROCESSED_DATA_PATH + dataset_name + "/"
|
27
|
folder_processor.clean_folder(path)
|
28
|
|
29
|
database_loader.remove_dataset_database(dataset_name)
|
30
|
|
31
|
|
32
|
def soft_reset_dataset(dataset_name):
|
33
|
"""
|
34
|
Resets all saved data in dataset except config and implementation
|
35
|
Args:
|
36
|
dataset_name: name of dataset that has existing configuration file
|
37
|
"""
|
38
|
path = PROCESSED_DATA_PATH + dataset_name + "/"
|
39
|
folder_processor.clean_folder(path)
|
40
|
|
41
|
database_loader.remove_dataset_database(dataset_name)
|
42
|
|
43
|
|
44
|
|
45
|
def soft_reset_all_datasets():
|
46
|
"""
|
47
|
Resets all saved data in all datasets with config file except configs and implementation
|
48
|
"""
|
49
|
datasets = os.listdir(CONFIG_FILES_PATH)
|
50
|
|
51
|
for dataset in datasets:
|
52
|
soft_reset_dataset(dataset.split('.')[0])
|
53
|
|
54
|
|
55
|
def hard_reset_all_datasets():
|
56
|
"""
|
57
|
Resets all saved data in all datasets with config file except configs and implementation
|
58
|
"""
|
59
|
datasets = os.listdir(CONFIG_FILES_PATH)
|
60
|
|
61
|
for dataset in datasets:
|
62
|
hard_reset_dataset(dataset.split('.')[0])
|
63
|
|
64
|
|
65
|
print("Zadejte jméno Datasetu který chcete resetovat (pokud všechny zadejte '-ALL'):\n")
|
66
|
|
67
|
dataset_name = input().upper()
|
68
|
|
69
|
print("Chcete smazat i stažené stažená data ? (ANO/NE) \n")
|
70
|
|
71
|
input_decision = input().upper()
|
72
|
|
73
|
if dataset_name == '-ALL':
|
74
|
if input_decision == 'ANO':
|
75
|
hard_reset_all_datasets()
|
76
|
elif input_decision == 'NE':
|
77
|
soft_reset_all_datasets()
|
78
|
else:
|
79
|
print('Neplatný vstup (ANO/NE)')
|
80
|
else:
|
81
|
test = configure_functions.check_if_there_is_a_config_file(dataset_name)
|
82
|
if test == True:
|
83
|
if input_decision == 'ANO':
|
84
|
hard_reset_dataset(dataset_name)
|
85
|
elif input_decision == 'NE':
|
86
|
soft_reset_dataset(dataset_name)
|
87
|
else:
|
88
|
print('Neplatný vstup (ANO/NE)')
|
89
|
else:
|
90
|
print("Tento dataset v architektuře neexistuje")
|
91
|
|