Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 04a2b5a4

Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)

Re #7939
- pridana dokumentace metod a trid
- korekce chyb v jmenech promenych
- pridani informaci pro vygenerovane skripty

Zobrazit rozdíly:

python-module/Scripts/PrepareNewDataset.py
1 1
import os
2 2

  
3
# Path to crawled data
3 4
CRAWLED_DATA_PATH = "../CrawledData/"
5
# Path to processed data
4 6
PROCESSED_DATA_PATH = "../ProcessedData/"
7
# Path to crawler logs
5 8
CRAWLER_LOGS_PATH = "../CrawlerLogs/"
9
# Path for DatasetCrawlers implementations
6 10
CRAWLER_PROGRAM_PATH = "../DatasetCrawler"
11
# Path for DatasetProcessors implementations
7 12
PROCESSOR_PROGRAM_PATH = "../DatasetProcessing"
13
# Path to dataset configuration files
8 14
CONFIG_FILES_PATH = "../DatasetConfigs"
9 15

  
10 16

  
11 17
def create_default_config_file(dataset_name):
18
    """
19
    Creates default config file
12 20

  
21
    Args:
22
        dataset_name: Name of newly created dataset
23
    """
13 24
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
14 25
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
15 26
        file.write("dataset-name: " + dataset_name + "\n")
16 27
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
17
        file.write("url: ZDE VLOZTE URL/\n")
28
        file.write("url: ZDE VLOZTE URL\n")
18 29
        file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
19 30
        file.write("regex: ZDE VLOZTE REGEX\n")
20 31
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
......
25 36

  
26 37

  
27 38
def create_default_processor(dataset_name):
39
    """
40
    Creates default processor for dataset
41

  
42
    Args:
43
        dataset_name: Name of newly created dataset
44
    """
28 45
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "Processor.py", "w") as file:
46
        file.write("from Utilities.CSV import CSVDataLine, CSVutils")
47
        file.write("\n")
48
        file.write("\n")
29 49
        file.write("def process_file(filename):\n")
50
        file.write("    \"\"\"\n")
51
        file.write("    Method that take path to crawled file and outputs date dictionary using method:\n")
52
        file.write("    CSVutils.export_data_to_csv(filename, date_dict)\n")
53
        file.write("    Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815)\n")
54
        file.write("    and value is dictionary where keys devices (specified in configuration file)\n")
55
        file.write("    and value is CSVDataLine.CSVDataLine with device,date and occurrence\n")
56
        file.write("\n")
57
        file.write("    Args:\n")
58
        file.write("    filename: name of processed file\n")
59
        file.write("\n")
60
        file.write("    Returns:\n")
61
        file.write("    False if not implemented\n")
62
        file.write("    True when implemented\n")
63
        file.write("    \"\"\"\n")
30 64
        file.write("    print(\"You must implements process_file method first!\")\n")
65
        file.write("    #CSVutils.export_data_to_csv(filename, date_dict)\n")
66
        file.write("    return False\n")
31 67

  
32 68

  
33 69
def create_default_crawler(dataset_name):
70
    """
71
    Creates default crawler for dataset
72

  
73
    Args:
74
        dataset_name: Name of newly created dataset
75
    """
34 76

  
35 77
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "Crawler.py", "w") as file:
78
        file.write("# Path to crawled data\n")
79
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
80
        file.write("\n")
81
        file.write("\n")
36 82
        file.write("def crawl(config):\n")
83
        file.write("    \"\"\"\n")
84
        file.write("    Implement crawl method that downloads new data to path_for_files\n")
85
        file.write("    For keeping the project structure\n")
86
        file.write("    url , regex, and dataset_name from config\n")
87
        file.write("    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
88
        file.write("\n")
89
        file.write("    Args:\n")
90
        file.write("        config: loaded configuration file of dataset\n")
91
        file.write("    \"\"\"\n")
92
        file.write("    dataset_name = config[\"dataset-name\"]\n")
93
        file.write("    url = config['url']\n")
94
        file.write("    regex = config['regex']\n")
95
        file.write("    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
37 96
        file.write("    print(\"You must implements Crawl method first!\")\n")
38 97

  
39 98

  
40
def create_ignore_file(path,text):
41

  
99
def create_ignore_file(path, text):
100
    """
101
    Creates ignore file
102
    Args:
103
        path: path to directory for creating ignore.txt
104
        text: text that will be on first line of ignore.txt can be None
105
    """
42 106
    with open(path + "/ignore.txt", "w") as file:
43 107
        if text is not None:
44 108
            file.write(text + "\n")
45 109

  
46 110

  
47 111
def prepare_dataset_structure(dataset_name):
112
    """
113
    Prepares folders for new dataset
114
    Args:
115
        dataset_name: Name of newly created dataset
116
    """
48 117
    jump_folder = "../"
49 118

  
50 119
    # create folder for crawled data
51 120
    try:
52 121
        path = CRAWLED_DATA_PATH+dataset_name
53 122
        os.mkdir(path)
54
        create_ignore_file(path,"ignore.txt")
123
        create_ignore_file(path, "ignore.txt")
55 124
    except os.error as e:
56 125
        print(e)
57 126
        print("Creation of the directory %s failed" % path)
......
77 146
    create_default_config_file(dataset_name)
78 147

  
79 148

  
80
prepare_dataset_structure("WIFI")
149
prepare_dataset_structure("TEST")

Také k dispozici: Unified diff