Projekt

Obecné

Profil

Stáhnout (5.7 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
3 04a2b5a4 petrh
# Path to crawled data
4 2494ea3a petrh
CRAWLED_DATA_PATH = "CrawledData/"
5 04a2b5a4 petrh
# Path to processed data
6 2494ea3a petrh
PROCESSED_DATA_PATH = "ProcessedData/"
7 04a2b5a4 petrh
# Path for DatasetCrawlers implementations
8 2494ea3a petrh
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
9 04a2b5a4 petrh
# Path for DatasetProcessors implementations
10 2494ea3a petrh
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
11 04a2b5a4 petrh
# Path to dataset configuration files
12 2494ea3a petrh
CONFIG_FILES_PATH = "DatasetConfigs"
13 2f227a6c ballakt
# Default color for visualization of dataset (buble info in map)
14
DEFAULT_COLOR = "#000000"
15 c8f3051b petrh
16
17 2f227a6c ballakt
def create_default_config_file(dataset_name: str):
18 04a2b5a4 petrh
    """
19
    Creates default config file
20 c8f3051b petrh
21 04a2b5a4 petrh
    Args:
22
        dataset_name: Name of newly created dataset
23
    """
24 c8f3051b petrh
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
25
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
26 3692d853 petrh
        file.write("display-name: " + dataset_name + "\n")
27 2f227a6c ballakt
        file.write(
28
            "# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n")
29
        file.write("display-color: " + DEFAULT_COLOR + "\n")
30
        file.write(
31
            "# barva pro tento dataset v hexadecimální hodnotě (#000000)\n")
32 0a2832fb vastja
        file.write("dataset-name: " + dataset_name + "\n")
33 c8f3051b petrh
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
34 04a2b5a4 petrh
        file.write("url: ZDE VLOZTE URL\n")
35 2f227a6c ballakt
        file.write(
36
            "# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
37 c8f3051b petrh
        file.write("regex: ZDE VLOZTE REGEX\n")
38
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
39
                   "tak defaultni hodnota (dny)\n")
40
        file.write("update-period: ZDE VLOZTE HODNOTU\n")
41 70e660a8 petrh
        file.write("# pozice jednotlivych zarizeni, ktera jsou v datasetu\n")
42
        file.write("devices:\n")
43 c8f3051b petrh
44
45
def create_default_processor(dataset_name):
46 04a2b5a4 petrh
    """
47
    Creates default processor for dataset
48
49
    Args:
50
        dataset_name: Name of newly created dataset
51
    """
52 d6ca840d petrh
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py", "w") as file:
53
        file.write("from Utilities.CSV import csv_data_line")
54 04a2b5a4 petrh
        file.write("\n")
55
        file.write("\n")
56 c8f3051b petrh
        file.write("def process_file(filename):\n")
57 04a2b5a4 petrh
        file.write("    \"\"\"\n")
58 2f227a6c ballakt
        file.write(
59
            "    Method that take path to crawled file and outputs date dictionary:\n")
60
        file.write(
61
            "    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n")
62
        file.write(
63
            "    and value is dictionary where keys are devices (specified in configuration file)\n")
64
        file.write(
65
            "    and value is CSVDataLine.csv_data_line with device,date and occurrence\n")
66 04a2b5a4 petrh
        file.write("\n")
67
        file.write("    Args:\n")
68
        file.write("    filename: name of processed file\n")
69
        file.write("\n")
70
        file.write("    Returns:\n")
71 2d129043 petrh
        file.write("    None if not implemented\n")
72
        file.write("    date_dict when implemented\n")
73 04a2b5a4 petrh
        file.write("    \"\"\"\n")
74 2d129043 petrh
        file.write("    date_dict = dict()\n")
75
        file.write("\n")
76 2494ea3a petrh
        file.write("    #with open(filename, \"r\") as file:\n")
77 2f227a6c ballakt
        file.write(
78
            "    print(\"You must implements process_file method first!\")\n")
79 2d129043 petrh
        file.write("    return None\n")
80 c8f3051b petrh
81
82
def create_default_crawler(dataset_name):
83 04a2b5a4 petrh
    """
84
    Creates default crawler for dataset
85
86
    Args:
87
        dataset_name: Name of newly created dataset
88
    """
89 c8f3051b petrh
90 d6ca840d petrh
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py", "w") as file:
91 04a2b5a4 petrh
        file.write("# Path to crawled data\n")
92
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
93
        file.write("\n")
94
        file.write("\n")
95 c8f3051b petrh
        file.write("def crawl(config):\n")
96 04a2b5a4 petrh
        file.write("    \"\"\"\n")
97 2f227a6c ballakt
        file.write(
98
            "    Implement crawl method that downloads new data to path_for_files\n")
99 04a2b5a4 petrh
        file.write("    For keeping the project structure\n")
100
        file.write("    url , regex, and dataset_name from config\n")
101 2f227a6c ballakt
        file.write(
102
            "    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
103 04a2b5a4 petrh
        file.write("\n")
104
        file.write("    Args:\n")
105
        file.write("        config: loaded configuration file of dataset\n")
106
        file.write("    \"\"\"\n")
107
        file.write("    dataset_name = config[\"dataset-name\"]\n")
108
        file.write("    url = config['url']\n")
109
        file.write("    regex = config['regex']\n")
110 2f227a6c ballakt
        file.write(
111
            "    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
112 c8f3051b petrh
        file.write("    print(\"You must implements Crawl method first!\")\n")
113
114
115
def prepare_dataset_structure(dataset_name):
116 04a2b5a4 petrh
    """
117
    Prepares folders for new dataset
118
    Args:
119
        dataset_name: Name of newly created dataset
120
    """
121 c8f3051b petrh
122
    # create folder for crawled data
123 d6ca840d petrh
    path = CRAWLED_DATA_PATH+dataset_name
124 c8f3051b petrh
    try:
125
        os.mkdir(path)
126
    except os.error as e:
127
        print(e)
128
        print("Creation of the directory %s failed" % path)
129
130
    # create folder for processed data
131 d6ca840d petrh
    path = PROCESSED_DATA_PATH + dataset_name
132 c8f3051b petrh
    try:
133
        os.mkdir(path)
134
    except OSError:
135
        print("Creation of the directory %s failed" % path)
136
137
    create_default_crawler(dataset_name)
138
    create_default_processor(dataset_name)
139
    create_default_config_file(dataset_name)
140
141 2f227a6c ballakt
142 2494ea3a petrh
print("Zadejte jméno nového datasetu:\n")
143 3692d853 petrh
144
dataset_name = input().upper()
145
146
if dataset_name.isalpha():
147
    prepare_dataset_structure(dataset_name)
148
    print("Architektura vytvořena \n")
149
else:
150
    print("Jméno musí obsahovat pouze písmena z abecedy (bez mezer)\n")