Projekt

Obecné

Profil

Stáhnout (6.33 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2 04a2b5a4 petrh
# Path to crawled data
3 2494ea3a petrh
CRAWLED_DATA_PATH = "CrawledData/"
4 04a2b5a4 petrh
# Path to processed data
5 2494ea3a petrh
PROCESSED_DATA_PATH = "ProcessedData/"
6 04a2b5a4 petrh
# Path for DatasetCrawlers implementations
7 2494ea3a petrh
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
8 04a2b5a4 petrh
# Path for DatasetProcessors implementations
9 2494ea3a petrh
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
10 04a2b5a4 petrh
# Path to dataset configuration files
11 2494ea3a petrh
CONFIG_FILES_PATH = "DatasetConfigs"
12 2f227a6c ballakt
# Default color for visualization of dataset (buble info in map)
13
DEFAULT_COLOR = "#000000"
14 c8f3051b petrh
15
16 af7609b5 Tomáš Ballák
def create_default_config_file(dataset_name: str) -> None:
17 04a2b5a4 petrh
    """
18
    Creates default config file
19 c8f3051b petrh
20 04a2b5a4 petrh
    Args:
21
        dataset_name: Name of newly created dataset
22
    """
23 c8f3051b petrh
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
24 af7609b5 Tomáš Ballák
        file.write("# Name of the dataset inside the application\n")
25 3692d853 petrh
        file.write("display-name: " + dataset_name + "\n")
26 2f227a6c ballakt
        file.write(
27 af7609b5 Tomáš Ballák
            "# Color for the dataset in a hex value (default value #000000)\n")
28
        file.write(f'display-color: \'{DEFAULT_COLOR}\' \n')
29 2f227a6c ballakt
        file.write(
30 af7609b5 Tomáš Ballák
            "# One word dataset name (structure of all modules will be affected by this)\n"
31
        )
32 0a2832fb vastja
        file.write("dataset-name: " + dataset_name + "\n")
33 af7609b5 Tomáš Ballák
        file.write("# Url for the source of this dataset\n")
34
        file.write("url: ENTER URL HERE\n")
35
        file.write(
36
            "# Optional parameter which specifies a pattern of the datasets name\n"
37
        )
38
        file.write(
39
            "# Example: DATASET_NAME_[0-9][0-9]_[0-9][0-9][0-9][0-9].zip\n")
40
        file.write(
41
            "# - DATASET_NAME_01_2020.zip where '01_2020' specifies date in this dataset\n"
42
        )
43
        file.write("regex: ENTER REGEX HERE\n")
44 2f227a6c ballakt
        file.write(
45 af7609b5 Tomáš Ballák
            "# Optional parameter which specifies the way of searching new datasets (if empty the period is set to every day)\n"
46
        )
47
        file.write("update-period: ENTER UPDATE PERIOD HERE\n")
48
        file.write("# Coordinates of every datasets device (entinty)\n")
49 70e660a8 petrh
        file.write("devices:\n")
50 c8f3051b petrh
51
52 af7609b5 Tomáš Ballák
def create_default_processor(dataset_name: str) -> None:
53 04a2b5a4 petrh
    """
54
    Creates default processor for dataset
55
56
    Args:
57
        dataset_name: Name of newly created dataset
58
    """
59 af7609b5 Tomáš Ballák
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py",
60
              "w") as file:
61
        file.write("from Utilities.CSV import csv_data_line\n")
62
        file.write("from shared_types import DateDict")
63 04a2b5a4 petrh
        file.write("\n")
64
        file.write("\n")
65 af7609b5 Tomáš Ballák
        file.write("def process_file(filename: str) -> DateDict:\n")
66 04a2b5a4 petrh
        file.write("    \"\"\"\n")
67 2f227a6c ballakt
        file.write(
68 af7609b5 Tomáš Ballák
            "    Method that takes the path to crawled file and outputs date dictionary:\n"
69
        )
70 2f227a6c ballakt
        file.write(
71 af7609b5 Tomáš Ballák
            "    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n"
72
        )
73 2f227a6c ballakt
        file.write(
74 af7609b5 Tomáš Ballák
            "    and value is dictionary where keys are devices (specified in configuration file)\n"
75
        )
76 2f227a6c ballakt
        file.write(
77 af7609b5 Tomáš Ballák
            "    and value is CSVDataLine.csv_data_line with device,date and occurrence\n"
78
        )
79 04a2b5a4 petrh
        file.write("\n")
80
        file.write("    Args:\n")
81 af7609b5 Tomáš Ballák
        file.write("    filename: name of the processed file\n")
82 04a2b5a4 petrh
        file.write("\n")
83
        file.write("    Returns:\n")
84 2d129043 petrh
        file.write("    None if not implemented\n")
85
        file.write("    date_dict when implemented\n")
86 04a2b5a4 petrh
        file.write("    \"\"\"\n")
87 af7609b5 Tomáš Ballák
        file.write("    date_dict: DateDict = {}\n")
88 2d129043 petrh
        file.write("\n")
89 2494ea3a petrh
        file.write("    #with open(filename, \"r\") as file:\n")
90 2f227a6c ballakt
        file.write(
91 af7609b5 Tomáš Ballák
            "    print(\"You must implement the process_file method first!\")\n"
92
        )
93
        file.write("    return date_dict\n")
94 c8f3051b petrh
95
96 af7609b5 Tomáš Ballák
def create_default_crawler(dataset_name: str) -> None:
97 04a2b5a4 petrh
    """
98
    Creates default crawler for dataset
99
100
    Args:
101
        dataset_name: Name of newly created dataset
102
    """
103 c8f3051b petrh
104 af7609b5 Tomáš Ballák
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py",
105
              "w") as file:
106
        file.write("from shared_types import ConfigType\n")
107 04a2b5a4 petrh
        file.write("# Path to crawled data\n")
108 af7609b5 Tomáš Ballák
        file.write(f'CRAWLED_DATA_PATH = "{CRAWLED_DATA_PATH}" \n')
109 04a2b5a4 petrh
        file.write("\n")
110
        file.write("\n")
111 af7609b5 Tomáš Ballák
        file.write("def crawl(config: ConfigType):\n")
112 04a2b5a4 petrh
        file.write("    \"\"\"\n")
113 2f227a6c ballakt
        file.write(
114 af7609b5 Tomáš Ballák
            "    Implementation the crawl method which downloads new data to the path_for_files\n"
115
        )
116 04a2b5a4 petrh
        file.write("    For keeping the project structure\n")
117
        file.write("    url , regex, and dataset_name from config\n")
118 2f227a6c ballakt
        file.write(
119 af7609b5 Tomáš Ballák
            "    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n"
120
        )
121 04a2b5a4 petrh
        file.write("\n")
122
        file.write("    Args:\n")
123
        file.write("        config: loaded configuration file of dataset\n")
124
        file.write("    \"\"\"\n")
125
        file.write("    dataset_name = config[\"dataset-name\"]\n")
126
        file.write("    url = config['url']\n")
127
        file.write("    regex = config['regex']\n")
128 2f227a6c ballakt
        file.write(
129
            "    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
130 af7609b5 Tomáš Ballák
        file.write(
131
            "    print(\"Není implementován crawler pro získávání dat!\")\n")
132 c8f3051b petrh
133
134 af7609b5 Tomáš Ballák
def prepare_dataset_structure(dataset_name: str) -> None:
135 04a2b5a4 petrh
    """
136
    Prepares folders for new dataset
137
    Args:
138
        dataset_name: Name of newly created dataset
139
    """
140 c8f3051b petrh
141
    # create folder for crawled data
142 af7609b5 Tomáš Ballák
    path = CRAWLED_DATA_PATH + dataset_name
143 c8f3051b petrh
    try:
144
        os.mkdir(path)
145
    except os.error as e:
146
        print(e)
147
        print("Creation of the directory %s failed" % path)
148
149
    # create folder for processed data
150 d6ca840d petrh
    path = PROCESSED_DATA_PATH + dataset_name
151 c8f3051b petrh
    try:
152
        os.mkdir(path)
153
    except OSError:
154 af7609b5 Tomáš Ballák
        print("Nelze vytvořit adresář %s" % path)
155 c8f3051b petrh
156
    create_default_crawler(dataset_name)
157
    create_default_processor(dataset_name)
158
    create_default_config_file(dataset_name)
159
160 2f227a6c ballakt
161 af7609b5 Tomáš Ballák
def main() -> None:
162
    print("Zadejte jméno nového datasetu:\n")
163
    dataset_name = input().upper()
164
165
    if dataset_name.isalpha():
166
        prepare_dataset_structure(dataset_name)
167
        print("Architektura vytvořena \n")
168
    else:
169
        print("Jméno musí obsahovat pouze písmena z abecedy (bez mezer)\n")
170 3692d853 petrh
171
172 af7609b5 Tomáš Ballák
if __name__ == "__main__":
173
    main()