Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 2494ea3a

Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)

Upravenen system spousteni scriptu

Zobrazit rozdíly:

modules/crawler/PrepareNewDataset.py
1 1
import os
2 2

  
3 3
# Path to crawled data
4
CRAWLED_DATA_PATH = "../CrawledData/"
4
CRAWLED_DATA_PATH = "CrawledData/"
5 5
# Path to processed data
6
PROCESSED_DATA_PATH = "../ProcessedData/"
6
PROCESSED_DATA_PATH = "ProcessedData/"
7 7
# Path to crawler logs
8
CRAWLER_LOGS_PATH = "../CrawlerLogs/"
8
CRAWLER_LOGS_PATH = "CrawlerLogs/"
9 9
# Path for DatasetCrawlers implementations
10
CRAWLER_PROGRAM_PATH = "../DatasetCrawler"
10
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
11 11
# Path for DatasetProcessors implementations
12
PROCESSOR_PROGRAM_PATH = "../DatasetProcessing"
12
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
13 13
# Path to dataset configuration files
14
CONFIG_FILES_PATH = "../DatasetConfigs"
14
CONFIG_FILES_PATH = "DatasetConfigs"
15 15

  
16 16

  
17 17
def create_default_config_file(dataset_name):
......
61 61
        file.write("    False if not implemented\n")
62 62
        file.write("    True when implemented\n")
63 63
        file.write("    \"\"\"\n")
64
        file.write("    #with open(filename, \"r\") as file:\n")
64 65
        file.write("    print(\"You must implements process_file method first!\")\n")
65 66
        file.write("    #CSVutils.export_data_to_csv(filename, date_dict)\n")
66 67
        file.write("    return False\n")
......
154 155
    create_default_processor(dataset_name)
155 156
    create_default_config_file(dataset_name)
156 157

  
157

  
158
prepare_dataset_structure("TEST")
158
print("Zadejte jméno nového datasetu:\n")
159
prepare_dataset_structure(input())
modules/crawler/RemoveDataset.py
29 29
    os.remove(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "Crawler.py")
30 30
    os.remove(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "Processor.py")
31 31

  
32
    print("Dataset: " + dataset_name + " removed")
32
    print("Dataset " + dataset_name + " removed")
33 33

  
34
remove_dataset("TEST")
34
print("Zadejte jméno Datasetu který chcete odstranit:\n")
35
remove_dataset(input())
modules/crawler/RemoveDatasetDatabase.py
28 28

  
29 29
    print("Database Cleaned")
30 30

  
31

  
32
remove_dataset_database("KOLOBEZKY")
31
print("Zadejte jméno Datasetu který chcete odstranit z databáze:\n")
32
remove_dataset_database(input())
modules/crawler/ResetDataset.py
52 52
    create_ignore_file(path, None)
53 53
    create_updated_file(path)
54 54

  
55

  
56
def reset_all_datasets():
57
    """
58
    Resets all saved data in all datasets with config file except configs and implementation
59
    """
60
    datasets = os.listdir(CONFIG_FILES_PATH)
61

  
62
    for dataset in datasets:
63
        reset_dataset(dataset.split('.')[0])
64

  
65

  
66
reset_all_datasets()
55
print("Zadejte jméno Datasetu který chcete resetovat:\n")
56
reset_dataset(input())
modules/crawler/ResetDatasets.py
1
import os
2
from Utilities import FolderProcessor
3

  
4
# Path to crawled data
5
CRAWLED_DATA_PATH = "CrawledData/"
6
# Path to processed data
7
PROCESSED_DATA_PATH = "ProcessedData/"
8
# Path to crawler logs
9
CRAWLER_LOGS_PATH = "CrawlerLogs/"
10
# Path to dataset configuration files
11
CONFIG_FILES_PATH = "DatasetConfigs"
12

  
13

  
14
def create_ignore_file(path, text):
15
    """
16
    Creates ignore file
17
    Args:
18
        path: path to directory for creating ignore.txt
19
        text: text that will be on first line of ignore.txt can be None
20
    """
21
    with open(path + "/ignore.txt", "w") as file:
22
        if text is not None:
23
            file.write(text + "\n")
24

  
25

  
26
def create_updated_file(path):
27
    """
28
    Creates updated file
29
    Args:
30
        path: path to directory for creating updated.txt
31
    """
32
    with open(path + "/updated.txt", "w") as file:
33
            file.write(str(0) + "\n")
34

  
35

  
36
def reset_dataset(dataset_name):
37
    """
38
    Resets all saved data in dataset except config and implementation
39
    Args:
40
        dataset_name: name of dataset that has existing configuration file
41
    """
42
    path = CRAWLED_DATA_PATH + dataset_name + "/"
43
    FolderProcessor.clean_folder(path)
44
    create_ignore_file(path, "ignore.txt")
45

  
46
    path = PROCESSED_DATA_PATH + dataset_name + "/"
47
    FolderProcessor.clean_folder(path)
48
    create_ignore_file(path, "ignore.txt")
49

  
50
    path = CRAWLER_LOGS_PATH + dataset_name + "/"
51
    FolderProcessor.clean_folder(path)
52
    create_ignore_file(path, None)
53
    create_updated_file(path)
54

  
55

  
56
def reset_all_datasets():
57
    """
58
    Resets all saved data in all datasets with config file except configs and implementation
59
    """
60
    datasets = os.listdir(CONFIG_FILES_PATH)
61

  
62
    for dataset in datasets:
63
        reset_dataset(dataset.split('.')[0])
64

  
65

  
66
reset_all_datasets()

Také k dispozici: Unified diff