Projekt

Obecné

Profil

Stáhnout (1.73 KB) Statistiky
| Větev: | Revize:
1
import os
2
from Utilities import FolderProcessor
3

    
4
# Path to crawled data
5
CRAWLED_DATA_PATH = "CrawledData/"
6
# Path to processed data
7
PROCESSED_DATA_PATH = "ProcessedData/"
8
# Path to crawler logs
9
CRAWLER_LOGS_PATH = "CrawlerLogs/"
10
# Path to dataset configuration files
11
CONFIG_FILES_PATH = "DatasetConfigs"
12

    
13

    
14
def create_ignore_file(path, text):
15
    """
16
    Creates ignore file
17
    Args:
18
        path: path to directory for creating ignore.txt
19
        text: text that will be on first line of ignore.txt can be None
20
    """
21
    with open(path + "/ignore.txt", "w") as file:
22
        if text is not None:
23
            file.write(text + "\n")
24

    
25

    
26
def create_updated_file(path):
27
    """
28
    Creates updated file
29
    Args:
30
        path: path to directory for creating updated.txt
31
    """
32
    with open(path + "/updated.txt", "w") as file:
33
            file.write(str(0) + "\n")
34

    
35

    
36
def reset_dataset(dataset_name):
37
    """
38
    Resets all saved data in dataset except config and implementation
39
    Args:
40
        dataset_name: name of dataset that has existing configuration file
41
    """
42
    path = CRAWLED_DATA_PATH + dataset_name + "/"
43
    FolderProcessor.clean_folder(path)
44
    create_ignore_file(path, "ignore.txt")
45

    
46
    path = PROCESSED_DATA_PATH + dataset_name + "/"
47
    FolderProcessor.clean_folder(path)
48
    create_ignore_file(path, "ignore.txt")
49

    
50
    path = CRAWLER_LOGS_PATH + dataset_name + "/"
51
    FolderProcessor.clean_folder(path)
52
    create_ignore_file(path, None)
53
    create_updated_file(path)
54

    
55

    
56
def reset_all_datasets():
57
    """
58
    Resets all saved data in all datasets with config file except configs and implementation
59
    """
60
    datasets = os.listdir(CONFIG_FILES_PATH)
61

    
62
    for dataset in datasets:
63
        reset_dataset(dataset.split('.')[0])
64

    
65

    
66
reset_all_datasets()
(11-11/14)