Projekt

Obecné

Profil

Stáhnout (5.12 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
3 04a2b5a4 petrh
# Path to crawled data
4 2494ea3a petrh
CRAWLED_DATA_PATH = "CrawledData/"
5 04a2b5a4 petrh
# Path to processed data
6 2494ea3a petrh
PROCESSED_DATA_PATH = "ProcessedData/"
7 04a2b5a4 petrh
# Path for DatasetCrawlers implementations
8 2494ea3a petrh
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
9 04a2b5a4 petrh
# Path for DatasetProcessors implementations
10 2494ea3a petrh
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
11 04a2b5a4 petrh
# Path to dataset configuration files
12 2494ea3a petrh
CONFIG_FILES_PATH = "DatasetConfigs"
13 c8f3051b petrh
14
15
def create_default_config_file(dataset_name):
16 04a2b5a4 petrh
    """
17
    Creates default config file
18 c8f3051b petrh
19 04a2b5a4 petrh
    Args:
20
        dataset_name: Name of newly created dataset
21
    """
22 c8f3051b petrh
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
23
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
24
        file.write("dataset-name: " + dataset_name + "\n")
25 0a2832fb vastja
        file.write("# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n")
26
        file.write("dataset-name: " + dataset_name + "\n")
27 c8f3051b petrh
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
28 04a2b5a4 petrh
        file.write("url: ZDE VLOZTE URL\n")
29 70e660a8 petrh
        file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
30 c8f3051b petrh
        file.write("regex: ZDE VLOZTE REGEX\n")
31
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
32
                   "tak defaultni hodnota (dny)\n")
33
        file.write("update-period: ZDE VLOZTE HODNOTU\n")
34 70e660a8 petrh
        file.write("# pozice jednotlivych zarizeni, ktera jsou v datasetu\n")
35
        file.write("devices:\n")
36 c8f3051b petrh
37
38
def create_default_processor(dataset_name):
39 04a2b5a4 petrh
    """
40
    Creates default processor for dataset
41
42
    Args:
43
        dataset_name: Name of newly created dataset
44
    """
45 d6ca840d petrh
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py", "w") as file:
46
        file.write("from Utilities.CSV import csv_data_line")
47 04a2b5a4 petrh
        file.write("\n")
48
        file.write("\n")
49 c8f3051b petrh
        file.write("def process_file(filename):\n")
50 04a2b5a4 petrh
        file.write("    \"\"\"\n")
51 2d129043 petrh
        file.write("    Method that take path to crawled file and outputs date dictionary:\n")
52 04a2b5a4 petrh
        file.write("    Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815)\n")
53 d6ca840d petrh
        file.write("    and value is dictionary where keys are devices (specified in configuration file)\n")
54
        file.write("    and value is CSVDataLine.csv_data_line with device,date and occurrence\n")
55 04a2b5a4 petrh
        file.write("\n")
56
        file.write("    Args:\n")
57
        file.write("    filename: name of processed file\n")
58
        file.write("\n")
59
        file.write("    Returns:\n")
60 2d129043 petrh
        file.write("    None if not implemented\n")
61
        file.write("    date_dict when implemented\n")
62 04a2b5a4 petrh
        file.write("    \"\"\"\n")
63 2d129043 petrh
        file.write("    date_dict = dict()\n")
64
        file.write("\n")
65 2494ea3a petrh
        file.write("    #with open(filename, \"r\") as file:\n")
66 c8f3051b petrh
        file.write("    print(\"You must implements process_file method first!\")\n")
67 2d129043 petrh
        file.write("    return None\n")
68 c8f3051b petrh
69
70
def create_default_crawler(dataset_name):
71 04a2b5a4 petrh
    """
72
    Creates default crawler for dataset
73
74
    Args:
75
        dataset_name: Name of newly created dataset
76
    """
77 c8f3051b petrh
78 d6ca840d petrh
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py", "w") as file:
79 04a2b5a4 petrh
        file.write("# Path to crawled data\n")
80
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
81
        file.write("\n")
82
        file.write("\n")
83 c8f3051b petrh
        file.write("def crawl(config):\n")
84 04a2b5a4 petrh
        file.write("    \"\"\"\n")
85
        file.write("    Implement crawl method that downloads new data to path_for_files\n")
86
        file.write("    For keeping the project structure\n")
87
        file.write("    url , regex, and dataset_name from config\n")
88
        file.write("    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
89
        file.write("\n")
90
        file.write("    Args:\n")
91
        file.write("        config: loaded configuration file of dataset\n")
92
        file.write("    \"\"\"\n")
93
        file.write("    dataset_name = config[\"dataset-name\"]\n")
94
        file.write("    url = config['url']\n")
95
        file.write("    regex = config['regex']\n")
96
        file.write("    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
97 c8f3051b petrh
        file.write("    print(\"You must implements Crawl method first!\")\n")
98
99
100
def prepare_dataset_structure(dataset_name):
101 04a2b5a4 petrh
    """
102
    Prepares folders for new dataset
103
    Args:
104
        dataset_name: Name of newly created dataset
105
    """
106 c8f3051b petrh
107
    # create folder for crawled data
108 d6ca840d petrh
    path = CRAWLED_DATA_PATH+dataset_name
109 c8f3051b petrh
    try:
110
        os.mkdir(path)
111
    except os.error as e:
112
        print(e)
113
        print("Creation of the directory %s failed" % path)
114
115
    # create folder for processed data
116 d6ca840d petrh
    path = PROCESSED_DATA_PATH + dataset_name
117 c8f3051b petrh
    try:
118
        os.mkdir(path)
119
    except OSError:
120
        print("Creation of the directory %s failed" % path)
121
122
    create_default_crawler(dataset_name)
123
    create_default_processor(dataset_name)
124
    create_default_config_file(dataset_name)
125
126 2494ea3a petrh
print("Zadejte jméno nového datasetu:\n")
127 d6ca840d petrh
prepare_dataset_structure(input().upper())