Projekt

Obecné

Profil

Stáhnout (1.54 KB) Statistiky
| Větev: | Revize:
1
import os
2
from Utilities import FolderProcessor
3

    
4
# Path to crawled data
5
CRAWLED_DATA_PATH = "../CrawledData/"
6
# Path to processed data
7
PROCESSED_DATA_PATH = "../ProcessedData/"
8
# Path to crawler logs
9
CRAWLER_LOGS_PATH = "../CrawlerLogs/"
10
# Path to dataset configuration files
11
CONFIG_FILES_PATH = "../DatasetConfigs"
12

    
13

    
14
def create_ignore_file(path, text):
15
    """
16
    Creates ignore file
17
    Args:
18
        path: path to directory for creating ignore.txt
19
        text: text that will be on first line of ignore.txt can be None
20
    """
21
    with open(path + "/ignore.txt", "w") as file:
22
        if text is not None:
23
            file.write(text + "\n")
24

    
25

    
26
def reset_dataset(dataset_name):
27
    """
28
    Resets all saved data in dataset except config and implementation
29
    Args:
30
        dataset_name: name of dataset that has existing configuration file
31
    """
32
    path = CRAWLED_DATA_PATH + dataset_name + "/"
33
    FolderProcessor.clean_folder(path)
34
    create_ignore_file(path, "ignore.txt")
35

    
36
    path = PROCESSED_DATA_PATH + dataset_name + "/"
37
    FolderProcessor.clean_folder(path)
38
    create_ignore_file(path, "ignore.txt")
39

    
40
    path = CRAWLER_LOGS_PATH + dataset_name + "/"
41
    FolderProcessor.clean_folder(path)
42
    create_ignore_file(path, None)
43

    
44

    
45
def reset_all_datasets():
46
    """
47
    Resets all saved data in all datasets with config file except configs and implementation
48
    """
49
    datasets = os.listdir(CONFIG_FILES_PATH)
50

    
51
    for dataset in datasets:
52
        reset_dataset(dataset.split('.')[0])
53

    
54

    
55
reset_all_datasets()
(5-5/5)