Revize 2f227a6c
Přidáno uživatelem Tomáš Ballák před více než 3 roky(ů)
modules/crawler/prepare_new_dataset.py | ||
---|---|---|
10 | 10 |
PROCESSOR_PROGRAM_PATH = "DatasetProcessing" |
11 | 11 |
# Path to dataset configuration files |
12 | 12 |
CONFIG_FILES_PATH = "DatasetConfigs" |
13 |
# Default color for visualization of dataset (buble info in map) |
|
14 |
DEFAULT_COLOR = "#000000" |
|
13 | 15 |
|
14 | 16 |
|
15 |
def create_default_config_file(dataset_name): |
|
17 |
def create_default_config_file(dataset_name: str):
|
|
16 | 18 |
""" |
17 | 19 |
Creates default config file |
18 | 20 |
|
... | ... | |
22 | 24 |
with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file: |
23 | 25 |
file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n") |
24 | 26 |
file.write("display-name: " + dataset_name + "\n") |
25 |
file.write("# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n") |
|
27 |
file.write( |
|
28 |
"# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n") |
|
29 |
file.write("display-color: " + DEFAULT_COLOR + "\n") |
|
30 |
file.write( |
|
31 |
"# barva pro tento dataset v hexadecimální hodnotě (#000000)\n") |
|
26 | 32 |
file.write("dataset-name: " + dataset_name + "\n") |
27 | 33 |
file.write("# root slozka, ktera obsahuje odkazy na dataset\n") |
28 | 34 |
file.write("url: ZDE VLOZTE URL\n") |
29 |
file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n") |
|
35 |
file.write( |
|
36 |
"# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n") |
|
30 | 37 |
file.write("regex: ZDE VLOZTE REGEX\n") |
31 | 38 |
file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, " |
32 | 39 |
"tak defaultni hodnota (dny)\n") |
... | ... | |
48 | 55 |
file.write("\n") |
49 | 56 |
file.write("def process_file(filename):\n") |
50 | 57 |
file.write(" \"\"\"\n") |
51 |
file.write(" Method that take path to crawled file and outputs date dictionary:\n") |
|
52 |
file.write(" Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n") |
|
53 |
file.write(" and value is dictionary where keys are devices (specified in configuration file)\n") |
|
54 |
file.write(" and value is CSVDataLine.csv_data_line with device,date and occurrence\n") |
|
58 |
file.write( |
|
59 |
" Method that take path to crawled file and outputs date dictionary:\n") |
|
60 |
file.write( |
|
61 |
" Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n") |
|
62 |
file.write( |
|
63 |
" and value is dictionary where keys are devices (specified in configuration file)\n") |
|
64 |
file.write( |
|
65 |
" and value is CSVDataLine.csv_data_line with device,date and occurrence\n") |
|
55 | 66 |
file.write("\n") |
56 | 67 |
file.write(" Args:\n") |
57 | 68 |
file.write(" filename: name of processed file\n") |
... | ... | |
63 | 74 |
file.write(" date_dict = dict()\n") |
64 | 75 |
file.write("\n") |
65 | 76 |
file.write(" #with open(filename, \"r\") as file:\n") |
66 |
file.write(" print(\"You must implements process_file method first!\")\n") |
|
77 |
file.write( |
|
78 |
" print(\"You must implements process_file method first!\")\n") |
|
67 | 79 |
file.write(" return None\n") |
68 | 80 |
|
69 | 81 |
|
... | ... | |
82 | 94 |
file.write("\n") |
83 | 95 |
file.write("def crawl(config):\n") |
84 | 96 |
file.write(" \"\"\"\n") |
85 |
file.write(" Implement crawl method that downloads new data to path_for_files\n") |
|
97 |
file.write( |
|
98 |
" Implement crawl method that downloads new data to path_for_files\n") |
|
86 | 99 |
file.write(" For keeping the project structure\n") |
87 | 100 |
file.write(" url , regex, and dataset_name from config\n") |
88 |
file.write(" You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n") |
|
101 |
file.write( |
|
102 |
" You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n") |
|
89 | 103 |
file.write("\n") |
90 | 104 |
file.write(" Args:\n") |
91 | 105 |
file.write(" config: loaded configuration file of dataset\n") |
... | ... | |
93 | 107 |
file.write(" dataset_name = config[\"dataset-name\"]\n") |
94 | 108 |
file.write(" url = config['url']\n") |
95 | 109 |
file.write(" regex = config['regex']\n") |
96 |
file.write(" path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n") |
|
110 |
file.write( |
|
111 |
" path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n") |
|
97 | 112 |
file.write(" print(\"You must implements Crawl method first!\")\n") |
98 | 113 |
|
99 | 114 |
|
... | ... | |
123 | 138 |
create_default_processor(dataset_name) |
124 | 139 |
create_default_config_file(dataset_name) |
125 | 140 |
|
141 |
|
|
126 | 142 |
print("Zadejte jméno nového datasetu:\n") |
127 | 143 |
|
128 | 144 |
dataset_name = input().upper() |
Také k dispozici: Unified diff
Feature Re #8156 show all datasets