1
|
import os
|
2
|
from Utilities import FolderProcessor
|
3
|
|
4
|
# Path to crawled data
|
5
|
CRAWLED_DATA_PATH = "CrawledData/"
|
6
|
# Path to processed data
|
7
|
PROCESSED_DATA_PATH = "ProcessedData/"
|
8
|
# Path to crawler logs
|
9
|
CRAWLER_LOGS_PATH = "CrawlerLogs/"
|
10
|
# Path to dataset configuration files
|
11
|
CONFIG_FILES_PATH = "DatasetConfigs"
|
12
|
|
13
|
|
14
|
def create_ignore_file(path, text):
|
15
|
"""
|
16
|
Creates ignore file
|
17
|
Args:
|
18
|
path: path to directory for creating ignore.txt
|
19
|
text: text that will be on first line of ignore.txt can be None
|
20
|
"""
|
21
|
with open(path + "/ignore.txt", "w") as file:
|
22
|
if text is not None:
|
23
|
file.write(text + "\n")
|
24
|
|
25
|
|
26
|
def create_updated_file(path):
|
27
|
"""
|
28
|
Creates updated file
|
29
|
Args:
|
30
|
path: path to directory for creating updated.txt
|
31
|
"""
|
32
|
with open(path + "/updated.txt", "w") as file:
|
33
|
file.write(str(0) + "\n")
|
34
|
|
35
|
|
36
|
def reset_dataset(dataset_name):
|
37
|
"""
|
38
|
Resets all saved data in dataset except config and implementation
|
39
|
Args:
|
40
|
dataset_name: name of dataset that has existing configuration file
|
41
|
"""
|
42
|
path = CRAWLED_DATA_PATH + dataset_name + "/"
|
43
|
FolderProcessor.clean_folder(path)
|
44
|
create_ignore_file(path, "ignore.txt")
|
45
|
|
46
|
path = PROCESSED_DATA_PATH + dataset_name + "/"
|
47
|
FolderProcessor.clean_folder(path)
|
48
|
create_ignore_file(path, "ignore.txt")
|
49
|
|
50
|
path = CRAWLER_LOGS_PATH + dataset_name + "/"
|
51
|
FolderProcessor.clean_folder(path)
|
52
|
create_ignore_file(path, None)
|
53
|
create_updated_file(path)
|
54
|
|
55
|
print("Zadejte jméno Datasetu který chcete resetovat:\n")
|
56
|
reset_dataset(input())
|