Revize 9990127e
Přidáno uživatelem Petr Hlaváč před téměř 5 roky(ů)
python-module/DatasetConfigs/JIS.yaml | ||
---|---|---|
192 | 192 |
x: UNKNOWN! |
193 | 193 |
y: UNKNOWN! |
194 | 194 |
|
195 |
- US 005 - z?vora vjezd: |
|
196 |
x: UNKNOWN! |
|
197 |
y: UNKNOWN! |
|
198 |
|
|
199 |
- US 005 - m?? vjezd: |
|
200 |
x: UNKNOWN! |
|
201 |
y: UNKNOWN! |
|
202 |
|
python-module/DatasetCrawler/JISCrawler.py | ||
---|---|---|
1 | 1 |
from Utilities import FolderProcessor |
2 | 2 |
from Utilities.Crawler import BasicCrawler |
3 | 3 |
|
4 |
# Path to crawled data |
|
5 |
CRAWLED_DATA_PATH = "CrawledData/" |
|
4 | 6 |
|
5 |
def crawl(config): |
|
6 | 7 |
|
8 |
def crawl(config): |
|
9 |
""" |
|
10 |
Implement crawl method that downloads new data to path_for_files |
|
11 |
For keeping the project structure |
|
12 |
url , regex, and dataset_name from config |
|
13 |
You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py |
|
14 |
|
|
15 |
Args: |
|
16 |
config: loaded configuration file of dataset |
|
17 |
""" |
|
7 | 18 |
dataset_name = config["dataset-name"] |
8 | 19 |
url = config['url'] |
9 | 20 |
regex = config['regex'] |
21 |
path_for_files = CRAWLED_DATA_PATH + dataset_name + '/' |
|
10 | 22 |
|
11 | 23 |
first_level_links = BasicCrawler.get_all_links(url) |
12 | 24 |
filtered_first_level_links = BasicCrawler.filter_links(first_level_links, "^OD_ZCU") |
... | ... | |
24 | 36 |
files.append(file_link) |
25 | 37 |
|
26 | 38 |
for file in files: |
27 |
BasicCrawler.download_file_from_url(file, "CrawledData/" + dataset_name + "/", dataset_name)
|
|
39 |
BasicCrawler.download_file_from_url(file, path_for_files, dataset_name)
|
|
28 | 40 |
|
29 |
FolderProcessor.unzip_all_csv_zip_files_in_folder("CrawledData/" + dataset_name + "/") |
|
41 |
FolderProcessor.unzip_all_csv_zip_files_in_folder(path_for_files) |
python-module/DatasetCrawler/KOLOBEZKYCrawler.py | ||
---|---|---|
1 | 1 |
from Utilities import FolderProcessor |
2 | 2 |
from Utilities.Crawler import BasicCrawler |
3 | 3 |
|
4 |
# Path to crawled data |
|
5 |
CRAWLED_DATA_PATH = "CrawledData/" |
|
4 | 6 |
|
5 |
def crawl(config): |
|
6 | 7 |
|
8 |
def crawl(config): |
|
9 |
""" |
|
10 |
Implement crawl method that downloads new data to path_for_files |
|
11 |
For keeping the project structure |
|
12 |
url , regex, and dataset_name from config |
|
13 |
You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py |
|
14 |
|
|
15 |
Args: |
|
16 |
config: loaded configuration file of dataset |
|
17 |
""" |
|
7 | 18 |
dataset_name = config["dataset-name"] |
8 | 19 |
url = config['url'] |
9 | 20 |
regex = config['regex'] |
21 |
path_for_files = CRAWLED_DATA_PATH + dataset_name + '/' |
|
10 | 22 |
|
11 | 23 |
first_level_links = BasicCrawler.get_all_links(url) |
12 | 24 |
filtered_first_level_links = BasicCrawler.filter_links(first_level_links, "^OD_ZCU") |
python-module/DatasetCrawler/WIFICrawler.py | ||
---|---|---|
1 | 1 |
from Utilities import FolderProcessor |
2 | 2 |
from Utilities.Crawler import BasicCrawler |
3 | 3 |
|
4 |
# Path to crawled data |
|
5 |
CRAWLED_DATA_PATH = "CrawledData/" |
|
4 | 6 |
|
5 |
def crawl(config): |
|
6 | 7 |
|
8 |
def crawl(config): |
|
9 |
""" |
|
10 |
Implement crawl method that downloads new data to path_for_files |
|
11 |
For keeping the project structure |
|
12 |
url , regex, and dataset_name from config |
|
13 |
You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py |
|
14 |
|
|
15 |
Args: |
|
16 |
config: loaded configuration file of dataset |
|
17 |
""" |
|
7 | 18 |
dataset_name = config["dataset-name"] |
8 | 19 |
url = config['url'] |
9 | 20 |
regex = config['regex'] |
21 |
path_for_files = CRAWLED_DATA_PATH + dataset_name + '/' |
|
10 | 22 |
|
11 | 23 |
first_level_links = BasicCrawler.get_all_links(url) |
12 | 24 |
filtered_first_level_links = BasicCrawler.filter_links(first_level_links, "^OD_ZCU") |
... | ... | |
24 | 36 |
files.append(file_link) |
25 | 37 |
|
26 | 38 |
for file in files: |
27 |
BasicCrawler.download_file_from_url(file, "CrawledData/" + dataset_name + "/", dataset_name)
|
|
39 |
BasicCrawler.download_file_from_url(file, path_for_files, dataset_name)
|
|
28 | 40 |
|
29 |
FolderProcessor.unzip_all_csv_zip_files_in_folder("CrawledData/" + dataset_name + "/") |
|
41 |
FolderProcessor.unzip_all_csv_zip_files_in_folder(path_for_files) |
python-module/DatasetProcessing/JISProcessor.py | ||
---|---|---|
3 | 3 |
|
4 | 4 |
|
5 | 5 |
def process_file(filename): |
6 |
|
|
6 |
""" |
|
7 |
Method that take path to crawled file and outputs date dictionary using method: |
|
8 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
9 |
Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815) |
|
10 |
and value is dictionary where keys devices (specified in configuration file) |
|
11 |
and value is CSVDataLine.CSVDataLine with device,date and occurrence |
|
12 |
|
|
13 |
Args: |
|
14 |
filename: name of processed file |
|
15 |
|
|
16 |
Returns: |
|
17 |
False if not implemented |
|
18 |
True when implemented |
|
19 |
""" |
|
7 | 20 |
with open(filename, "r", encoding="utf-8") as file: |
8 | 21 |
|
9 | 22 |
date_dict = dict() |
... | ... | |
12 | 25 |
|
13 | 26 |
array = line.split(";") |
14 | 27 |
|
15 |
date = DateFormating.date_time_formater(array[1][1:-1]) |
|
28 |
date = DateFormating.date_time_formatter(array[1][1:-1])
|
|
16 | 29 |
name = array[0][1:-1] |
17 | 30 |
occurence = array[2][:-1] |
18 | 31 |
|
... | ... | |
24 | 37 |
else: |
25 | 38 |
date_dict[date][name] = CSVDataLine.CSVDataLine(name, date, int(occurence)) |
26 | 39 |
|
27 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
40 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
41 |
return True |
|
42 |
|
python-module/DatasetProcessing/KOLOBEZKYProcessor.py | ||
---|---|---|
3 | 3 |
|
4 | 4 |
|
5 | 5 |
def process_file(filename): |
6 |
|
|
6 |
""" |
|
7 |
Method that take path to crawled file and outputs date dictionary using method: |
|
8 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
9 |
Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815) |
|
10 |
and value is dictionary where keys devices (specified in configuration file) |
|
11 |
and value is CSVDataLine.CSVDataLine with device,date and occurrence |
|
12 |
|
|
13 |
Args: |
|
14 |
filename: name of processed file |
|
15 |
|
|
16 |
Returns: |
|
17 |
False if not implemented |
|
18 |
True when implemented |
|
19 |
""" |
|
7 | 20 |
with open(filename, "r") as file: |
8 | 21 |
|
9 | 22 |
date_dict = dict() |
... | ... | |
12 | 25 |
|
13 | 26 |
array = line.split(";") |
14 | 27 |
|
15 |
date = DateFormating.date_time_formater(array[0][1:-1]) |
|
28 |
date = DateFormating.date_time_formatter(array[0][1:-1])
|
|
16 | 29 |
name = array[1][1:-1] |
17 | 30 |
|
18 | 31 |
if date not in date_dict: |
... | ... | |
23 | 36 |
else: |
24 | 37 |
date_dict[date][name] = CSVDataLine.CSVDataLine(name, date, 1) |
25 | 38 |
|
26 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
39 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
40 |
return True |
|
41 |
|
python-module/DatasetProcessing/WIFIProcessor.py | ||
---|---|---|
3 | 3 |
|
4 | 4 |
|
5 | 5 |
def process_file(filename): |
6 |
|
|
6 |
""" |
|
7 |
Method that take path to crawled file and outputs date dictionary using method: |
|
8 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
9 |
Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815) |
|
10 |
and value is dictionary where keys devices (specified in configuration file) |
|
11 |
and value is CSVDataLine.CSVDataLine with device,date and occurrence |
|
12 |
|
|
13 |
Args: |
|
14 |
filename: name of processed file |
|
15 |
|
|
16 |
Returns: |
|
17 |
False if not implemented |
|
18 |
True when implemented |
|
19 |
""" |
|
7 | 20 |
with open(filename, "r", encoding="utf-8") as file: |
8 | 21 |
|
9 | 22 |
date_dict = dict() |
... | ... | |
12 | 25 |
|
13 | 26 |
array = line.split(";") |
14 | 27 |
|
15 |
date = DateFormating.date_time_formater(array[4][1:-2]) |
|
28 |
date = DateFormating.date_time_formatter(array[4][1:-2])
|
|
16 | 29 |
name = array[1][1:-1] |
17 | 30 |
occurence = array[0] |
18 | 31 |
|
... | ... | |
24 | 37 |
else: |
25 | 38 |
date_dict[date][name] = CSVDataLine.CSVDataLine(name, date, int(occurence)) |
26 | 39 |
|
27 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
40 |
CSVutils.export_data_to_csv(filename, date_dict) |
|
41 |
return True |
|
42 |
|
python-module/Pipeline.py | ||
---|---|---|
1 | 1 |
from Utilities import FolderProcessor, ConfigureFunctions |
2 | 2 |
from Utilities.Database import DatabaseLoader |
3 | 3 |
|
4 |
|
|
5 |
CONFIG_FILES_PATH = "DatasetConfigs/" |
|
4 |
# Path to crawled data |
|
6 | 5 |
CRAWLED_DATA_PATH = "CrawledData/" |
6 |
# Path to processed data |
|
7 | 7 |
PROCESSED_DATA_PATH = "ProcessedData/" |
8 |
# Path to dataset crawler implementations |
|
8 | 9 |
CRAWLER_LIB_PATH = "DatasetCrawler." |
10 |
# Path to dataset processor implementations |
|
9 | 11 |
PROCESSOR_LIB_PATH = "DatasetProcessing." |
10 | 12 |
|
11 | 13 |
|
12 | 14 |
def crawl_data(config): |
15 |
""" |
|
16 |
Imports dataset crawler in DatasetCrawler/"dataset_name"Crawler.py |
|
17 |
runs crawler. |
|
13 | 18 |
|
19 |
Args: |
|
20 |
config: loaded configuration file of dataset |
|
21 |
""" |
|
14 | 22 |
dataset_name = config["dataset-name"] |
15 | 23 |
|
16 |
my_function = __import__(CRAWLER_LIB_PATH + dataset_name + "Crawler",globals(),locals(),['crawl']).crawl
|
|
24 |
my_function = __import__(CRAWLER_LIB_PATH + dataset_name + "Crawler", globals(), locals(), ['crawl']).crawl
|
|
17 | 25 |
my_function(config) |
18 | 26 |
|
19 | 27 |
dataset_name += '/' |
20 | 28 |
|
21 | 29 |
|
22 | 30 |
def process_data(dataset_name): |
31 |
""" |
|
32 |
Goes trough every not processed file(not contained in CrawledData/"dataset_name"/ignore.txt) |
|
33 |
Imports dataset processor in DatasetProcessing/"dataset_name"Processor.py |
|
34 |
Runs processor on every file |
|
35 |
After successful processing updates ignore.txt |
|
36 |
|
|
37 |
Args: |
|
38 |
dataset_name: name of dataset that has existing configuration file |
|
39 |
""" |
|
23 | 40 |
dataset_path = dataset_name + '/' |
24 | 41 |
|
25 |
process_file_func = __import__(PROCESSOR_LIB_PATH + dataset_name + "Processor",globals(),locals(),
|
|
42 |
process_file_func = __import__(PROCESSOR_LIB_PATH + dataset_name + "Processor", globals(), locals(),
|
|
26 | 43 |
['process_file']).process_file |
27 | 44 |
|
28 |
# get all not processed files from dataset |
|
29 | 45 |
not_processed_files = FolderProcessor.list_of_all_files(CRAWLED_DATA_PATH + dataset_path) |
30 | 46 |
|
31 |
# process every file |
|
32 | 47 |
for not_processed_file in not_processed_files: |
33 |
# call processing for one file in dataset |
|
34 | 48 |
process_file_func(CRAWLED_DATA_PATH + dataset_path + not_processed_file) |
35 | 49 |
FolderProcessor.update_ignore_set(CRAWLED_DATA_PATH + dataset_path, not_processed_file) |
36 | 50 |
|
37 | 51 |
|
38 | 52 |
def validate_process_data(config): |
53 |
""" |
|
54 |
Function goes through newly processed data and checks theirs status |
|
55 |
|
|
56 |
Args: |
|
57 |
config: loaded configuration file of dataset |
|
58 |
|
|
59 |
Returns: |
|
60 |
boolean variable TRUE/FALSE. |
|
61 |
Data processed correctly - TRUE |
|
62 |
Wrong format or NEW unknown devices - FALSE |
|
63 |
""" |
|
39 | 64 |
processed_devices_set = FolderProcessor.get_devices_set(PROCESSED_DATA_PATH + config["dataset-name"] + '/') |
40 |
unknown_devices_set = FolderProcessor.get_unknown_devices_set(config,processed_devices_set) |
|
65 |
unknown_devices_set = FolderProcessor.get_unknown_devices_set(config, processed_devices_set)
|
|
41 | 66 |
unknown_devices_size = len(unknown_devices_set) |
42 | 67 |
|
43 | 68 |
if unknown_devices_size != 0: |
44 | 69 |
print("There is " + str(unknown_devices_size) + " unknown devies") |
45 |
ConfigureFunctions.update_configuration(CONFIG_FILES_PATH + config["dataset-name"] + ".yaml", unknown_devices_set)
|
|
70 |
ConfigureFunctions.update_configuration(config["dataset-name"], unknown_devices_set)
|
|
46 | 71 |
return False |
47 | 72 |
|
48 | 73 |
|
49 | 74 |
def load_data_to_database(config): |
50 |
|
|
75 |
""" |
|
76 |
Goes trough every not loaded file(not contained in ProcessedData/ignore.txt) |
|
77 |
loads data appends coordination from configurations |
|
78 |
and exports it into the database |
|
79 |
After successful exporting updates ignore.txt |
|
80 |
|
|
81 |
Args: |
|
82 |
config: loaded configuration file of dataset |
|
83 |
""" |
|
51 | 84 |
dataset_name = config["dataset-name"] |
52 | 85 |
dataset_path = dataset_name + '/' |
53 | 86 |
|
... | ... | |
57 | 90 |
# load every file |
58 | 91 |
for not_loaded_file in not_loaded_files: |
59 | 92 |
# load processed data |
60 |
processed_data = DatabaseLoader.get_data_from_file(PROCESSED_DATA_PATH + dataset_path + not_loaded_file, |
|
61 |
config["devices"]) |
|
93 |
processed_data = DatabaseLoader.get_data_from_file(not_loaded_file, config) |
|
62 | 94 |
# load processed data to database |
63 | 95 |
DatabaseLoader.load_data_to_database(dataset_name, processed_data) |
64 | 96 |
FolderProcessor.update_ignore_set(PROCESSED_DATA_PATH + dataset_path, not_loaded_file) |
65 | 97 |
|
66 | 98 |
|
67 | 99 |
def run_full_pipeline(dataset_name): |
68 |
config = ConfigureFunctions.load_configuration(CONFIG_FILES_PATH + dataset_name) |
|
100 |
""" |
|
101 |
Loads config file and starts full pipeline |
|
102 |
-crawl data |
|
103 |
-process data |
|
104 |
-load data to database |
|
105 |
|
|
106 |
Args: |
|
107 |
dataset_name: name of dataset that has existing configuration file |
|
108 |
""" |
|
109 |
config = ConfigureFunctions.load_configuration(dataset_name) |
|
69 | 110 |
crawl_data(config) |
70 | 111 |
process_data(config["dataset-name"]) |
71 | 112 |
|
python-module/Scripts/PrepareNewDataset.py | ||
---|---|---|
1 | 1 |
import os |
2 | 2 |
|
3 |
# Path to crawled data |
|
3 | 4 |
CRAWLED_DATA_PATH = "../CrawledData/" |
5 |
# Path to processed data |
|
4 | 6 |
PROCESSED_DATA_PATH = "../ProcessedData/" |
7 |
# Path to crawler logs |
|
5 | 8 |
CRAWLER_LOGS_PATH = "../CrawlerLogs/" |
9 |
# Path for DatasetCrawlers implementations |
|
6 | 10 |
CRAWLER_PROGRAM_PATH = "../DatasetCrawler" |
11 |
# Path for DatasetProcessors implementations |
|
7 | 12 |
PROCESSOR_PROGRAM_PATH = "../DatasetProcessing" |
13 |
# Path to dataset configuration files |
|
8 | 14 |
CONFIG_FILES_PATH = "../DatasetConfigs" |
9 | 15 |
|
10 | 16 |
|
11 | 17 |
def create_default_config_file(dataset_name): |
18 |
""" |
|
19 |
Creates default config file |
|
12 | 20 |
|
21 |
Args: |
|
22 |
dataset_name: Name of newly created dataset |
|
23 |
""" |
|
13 | 24 |
with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file: |
14 | 25 |
file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n") |
15 | 26 |
file.write("dataset-name: " + dataset_name + "\n") |
16 | 27 |
file.write("# root slozka, ktera obsahuje odkazy na dataset\n") |
17 |
file.write("url: ZDE VLOZTE URL/\n")
|
|
28 |
file.write("url: ZDE VLOZTE URL\n") |
|
18 | 29 |
file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n") |
19 | 30 |
file.write("regex: ZDE VLOZTE REGEX\n") |
20 | 31 |
file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, " |
... | ... | |
25 | 36 |
|
26 | 37 |
|
27 | 38 |
def create_default_processor(dataset_name): |
39 |
""" |
|
40 |
Creates default processor for dataset |
|
41 |
|
|
42 |
Args: |
|
43 |
dataset_name: Name of newly created dataset |
|
44 |
""" |
|
28 | 45 |
with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "Processor.py", "w") as file: |
46 |
file.write("from Utilities.CSV import CSVDataLine, CSVutils") |
|
47 |
file.write("\n") |
|
48 |
file.write("\n") |
|
29 | 49 |
file.write("def process_file(filename):\n") |
50 |
file.write(" \"\"\"\n") |
|
51 |
file.write(" Method that take path to crawled file and outputs date dictionary using method:\n") |
|
52 |
file.write(" CSVutils.export_data_to_csv(filename, date_dict)\n") |
|
53 |
file.write(" Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815)\n") |
|
54 |
file.write(" and value is dictionary where keys devices (specified in configuration file)\n") |
|
55 |
file.write(" and value is CSVDataLine.CSVDataLine with device,date and occurrence\n") |
|
56 |
file.write("\n") |
|
57 |
file.write(" Args:\n") |
|
58 |
file.write(" filename: name of processed file\n") |
|
59 |
file.write("\n") |
|
60 |
file.write(" Returns:\n") |
|
61 |
file.write(" False if not implemented\n") |
|
62 |
file.write(" True when implemented\n") |
|
63 |
file.write(" \"\"\"\n") |
|
30 | 64 |
file.write(" print(\"You must implements process_file method first!\")\n") |
65 |
file.write(" #CSVutils.export_data_to_csv(filename, date_dict)\n") |
|
66 |
file.write(" return False\n") |
|
31 | 67 |
|
32 | 68 |
|
33 | 69 |
def create_default_crawler(dataset_name): |
70 |
""" |
|
71 |
Creates default crawler for dataset |
|
72 |
|
|
73 |
Args: |
|
74 |
dataset_name: Name of newly created dataset |
|
75 |
""" |
|
34 | 76 |
|
35 | 77 |
with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "Crawler.py", "w") as file: |
78 |
file.write("# Path to crawled data\n") |
|
79 |
file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n") |
|
80 |
file.write("\n") |
|
81 |
file.write("\n") |
|
36 | 82 |
file.write("def crawl(config):\n") |
83 |
file.write(" \"\"\"\n") |
|
84 |
file.write(" Implement crawl method that downloads new data to path_for_files\n") |
|
85 |
file.write(" For keeping the project structure\n") |
|
86 |
file.write(" url , regex, and dataset_name from config\n") |
|
87 |
file.write(" You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n") |
|
88 |
file.write("\n") |
|
89 |
file.write(" Args:\n") |
|
90 |
file.write(" config: loaded configuration file of dataset\n") |
|
91 |
file.write(" \"\"\"\n") |
|
92 |
file.write(" dataset_name = config[\"dataset-name\"]\n") |
|
93 |
file.write(" url = config['url']\n") |
|
94 |
file.write(" regex = config['regex']\n") |
|
95 |
file.write(" path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n") |
|
37 | 96 |
file.write(" print(\"You must implements Crawl method first!\")\n") |
38 | 97 |
|
39 | 98 |
|
40 |
def create_ignore_file(path,text): |
|
41 |
|
|
99 |
def create_ignore_file(path, text): |
|
100 |
""" |
|
101 |
Creates ignore file |
|
102 |
Args: |
|
103 |
path: path to directory for creating ignore.txt |
|
104 |
text: text that will be on first line of ignore.txt can be None |
|
105 |
""" |
|
42 | 106 |
with open(path + "/ignore.txt", "w") as file: |
43 | 107 |
if text is not None: |
44 | 108 |
file.write(text + "\n") |
45 | 109 |
|
46 | 110 |
|
47 | 111 |
def prepare_dataset_structure(dataset_name): |
112 |
""" |
|
113 |
Prepares folders for new dataset |
|
114 |
Args: |
|
115 |
dataset_name: Name of newly created dataset |
|
116 |
""" |
|
48 | 117 |
jump_folder = "../" |
49 | 118 |
|
50 | 119 |
# create folder for crawled data |
51 | 120 |
try: |
52 | 121 |
path = CRAWLED_DATA_PATH+dataset_name |
53 | 122 |
os.mkdir(path) |
54 |
create_ignore_file(path,"ignore.txt") |
|
123 |
create_ignore_file(path, "ignore.txt")
|
|
55 | 124 |
except os.error as e: |
56 | 125 |
print(e) |
57 | 126 |
print("Creation of the directory %s failed" % path) |
... | ... | |
77 | 146 |
create_default_config_file(dataset_name) |
78 | 147 |
|
79 | 148 |
|
80 |
prepare_dataset_structure("WIFI") |
|
149 |
prepare_dataset_structure("TEST") |
python-module/Utilities/CSV/CSVDataLine.py | ||
---|---|---|
1 | 1 |
class CSVDataLine: |
2 |
|
|
3 |
def __init__(self, name, date, occurence): |
|
2 |
""" |
|
3 |
Class that specifies the look of data line in processed csv file |
|
4 |
prepared for database |
|
5 |
""" |
|
6 |
def __init__(self, name, date, occurrence): |
|
4 | 7 |
self.name = name |
5 | 8 |
self.date = date |
6 |
self.occurence = occurence
|
|
9 |
self.occurrence = occurrence
|
|
7 | 10 |
|
8 | 11 |
def to_csv(self): |
9 |
return self.name + ";" + str(self.occurence) + ";" + self.date |
|
12 |
return self.name + ";" + str(self.occurrence) + ";" + self.date
|
|
10 | 13 |
|
python-module/Utilities/CSV/CSVutils.py | ||
---|---|---|
1 |
# Path to processed data |
|
1 | 2 |
PROCESSED_DATA_PATH = "ProcessedData/" |
2 | 3 |
|
4 |
|
|
3 | 5 |
def get_unique_names_from_file(filename, column_number): |
6 |
""" |
|
7 |
|
|
8 |
Args: |
|
9 |
filename: |
|
10 |
column_number: |
|
11 |
|
|
12 |
Returns: |
|
13 |
|
|
14 |
""" |
|
4 | 15 |
f = open(filename, "r") |
5 | 16 |
|
6 | 17 |
# create set of unique names |
python-module/Utilities/ConfigureFunctions.py | ||
---|---|---|
1 | 1 |
import yaml |
2 | 2 |
|
3 |
# Path to dataset configuration files |
|
4 |
CONFIG_FILES_PATH = "DatasetConfigs/" |
|
5 |
# Config file type |
|
6 |
CONFIG_FILE_TYPE = ".yaml" |
|
3 | 7 |
|
4 |
def load_configuration(configure_file_name): |
|
5 | 8 |
|
6 |
with open(configure_file_name) as f: |
|
9 |
def load_configuration(dataset_name): |
|
10 |
""" |
|
11 |
Loads yaml configuration file into memory |
|
12 |
|
|
13 |
Args: |
|
14 |
dataset_name: name of dataset that has existing configuration file |
|
15 |
|
|
16 |
Returns: |
|
17 |
yaml configuration file as dictionary |
|
18 |
""" |
|
19 |
with open(CONFIG_FILES_PATH + dataset_name + CONFIG_FILE_TYPE, "r") as f: |
|
7 | 20 |
data = yaml.load(f, Loader=yaml.FullLoader) |
8 | 21 |
|
9 | 22 |
devices_dic = dict() |
... | ... | |
17 | 30 |
return data |
18 | 31 |
|
19 | 32 |
|
20 |
def update_configuration(configure_file_name, new_devices): |
|
33 |
def update_configuration(dataset_name, new_devices): |
|
34 |
""" |
|
35 |
Open dataset and appends new_devices to the end |
|
21 | 36 |
|
22 |
with open(configure_file_name, "a") as file: |
|
37 |
Args: |
|
38 |
dataset_name: name of dataset that has existing configuration file |
|
39 |
new_devices: list or set of new devices for dataset |
|
40 |
""" |
|
41 |
with open(CONFIG_FILES_PATH + dataset_name + CONFIG_FILE_TYPE, "a") as file: |
|
23 | 42 |
for device in new_devices: |
24 | 43 |
file.write(" - "+device+":\n") |
25 | 44 |
file.write(" x: UNKNOWN!\n") |
python-module/Utilities/Crawler/BasicCrawler.py | ||
---|---|---|
1 |
import requests |
|
2 |
import re |
|
3 |
from Utilities import FolderProcessor |
|
4 |
from bs4 import BeautifulSoup |
|
5 |
|
|
6 |
|
|
7 |
def get_all_links(url): |
|
8 |
# create response object |
|
9 |
r = requests.get(url) |
|
10 |
|
|
11 |
# create beautiful-soup object |
|
12 |
soup = BeautifulSoup(r.content, 'html5lib') |
|
13 |
links = [] |
|
14 |
|
|
15 |
for link in soup.findAll('a'): |
|
16 |
links.append(link.get('href')) |
|
17 |
|
|
18 |
return links |
|
19 |
|
|
20 |
|
|
21 |
def filter_links(links, regex): |
|
22 |
fitlered_links = [] |
|
23 |
|
|
24 |
for link in links: |
|
25 |
if re.search(regex,link): |
|
26 |
fitlered_links.append(link) |
|
27 |
|
|
28 |
return fitlered_links |
|
29 |
|
|
30 |
|
|
31 |
def create_absolute_links(links, archive): |
|
32 |
absolute_links = [] |
|
33 |
|
|
34 |
for link in links: |
|
35 |
absolute_links.append(archive + link) |
|
36 |
|
|
37 |
return absolute_links |
|
38 |
|
|
39 |
|
|
40 |
def remove_downloaded_links(links,dataset_name): |
|
41 |
|
|
42 |
downloaded_links = FolderProcessor.load_ignore_set("CrawlerLogs/" + dataset_name + "/") |
|
43 |
final_links = set(links) - downloaded_links |
|
44 |
|
|
45 |
return final_links |
|
46 |
|
|
47 |
|
|
48 |
def download_file_from_url(url,path, dataset_name): |
|
49 |
r = requests.get(url, stream=True) |
|
50 |
|
|
51 |
url_parts = url.split("/") |
|
52 |
file_name = url_parts[len(url_parts)-1] |
|
53 |
|
|
54 |
with open(path + file_name, "wb") as file: |
|
55 |
for chunk in r.iter_content(chunk_size=1024): |
|
56 |
|
|
57 |
# writing one chunk at a time to pdf file |
|
58 |
if chunk: |
|
59 |
file.write(chunk) |
|
60 |
|
|
61 |
FolderProcessor.update_ignore_set("CrawlerLogs/" + dataset_name + "/", url) |
python-module/Utilities/Crawler/BasicCrawlerFunctions.py | ||
---|---|---|
1 |
import requests |
|
2 |
import re |
|
3 |
from Utilities import FolderProcessor |
|
4 |
from bs4 import BeautifulSoup |
|
5 |
|
|
6 |
# Path to crawler logs |
|
7 |
CRAWLER_LOGS_PATH = "CrawlerLogs/" |
|
8 |
|
|
9 |
|
|
10 |
def get_all_links(url): |
|
11 |
""" |
|
12 |
Sends http request to url, downloads all data, |
|
13 |
extract links |
|
14 |
|
|
15 |
Args: |
|
16 |
url: url of website we want to search |
|
17 |
|
|
18 |
Returns: |
|
19 |
list of all links |
|
20 |
""" |
|
21 |
# create response object |
|
22 |
r = requests.get(url) |
|
23 |
|
|
24 |
# create beautiful-soup object |
|
25 |
soup = BeautifulSoup(r.content, 'html5lib') |
|
26 |
links = [] |
|
27 |
|
|
28 |
for link in soup.findAll('a'): |
|
29 |
links.append(link.get('href')) |
|
30 |
|
|
31 |
return links |
|
32 |
|
|
33 |
|
|
34 |
def filter_links(links, regex): |
|
35 |
""" |
|
36 |
Filters list of links using regex |
|
37 |
|
|
38 |
Args: |
|
39 |
links: list of links |
|
40 |
regex: regex used for filtering |
|
41 |
|
|
42 |
Returns: |
|
43 |
filtered list of links |
|
44 |
""" |
|
45 |
filtered_links = [] |
|
46 |
|
|
47 |
for link in links: |
|
48 |
if re.search(regex, link): |
|
49 |
filtered_links.append(link) |
|
50 |
|
|
51 |
return filtered_links |
|
52 |
|
|
53 |
|
|
54 |
def create_absolute_links(links, archive): |
|
55 |
""" |
|
56 |
Appends archive path to every link in links |
|
57 |
Args: |
|
58 |
links: list of relative links |
|
59 |
archive: archive url |
|
60 |
|
|
61 |
Returns: |
|
62 |
list of absolute links |
|
63 |
""" |
|
64 |
absolute_links = [] |
|
65 |
|
|
66 |
for link in links: |
|
67 |
absolute_links.append(archive + link) |
|
68 |
|
|
69 |
return absolute_links |
|
70 |
|
|
71 |
|
|
72 |
def remove_downloaded_links(links, dataset_name): |
|
73 |
""" |
|
74 |
Loads already downloaded links from CRAWLER_LOGS_PATH ignore.txt |
|
75 |
Args: |
|
76 |
links: list of links |
|
77 |
dataset_name: name of dataset that has existing configuration file |
|
78 |
|
|
79 |
Returns: |
|
80 |
List of links without already downloaded links |
|
81 |
""" |
|
82 |
downloaded_links = FolderProcessor.load_ignore_set(CRAWLER_LOGS_PATH + dataset_name + "/") |
|
83 |
final_links = set(links) - downloaded_links |
|
84 |
|
|
85 |
return final_links |
|
86 |
|
|
87 |
|
|
88 |
def download_file_from_url(url, dataset_name): |
|
89 |
""" |
|
90 |
Downloads file on provided url and saves it to path |
|
91 |
Args: |
|
92 |
url: url file we want to download |
|
93 |
dataset_name: name of dataset that has existing configuration file |
|
94 |
""" |
|
95 |
r = requests.get(url, stream=True) |
|
96 |
|
|
97 |
# splits url and extract last part that contains filename |
|
98 |
url_parts = url.split("/") |
|
99 |
file_name = url_parts[len(url_parts)-1] |
|
100 |
|
|
101 |
path = CRAWLER_LOGS_PATH + dataset_name + '/' |
|
102 |
|
|
103 |
# download file chunk by chunk so we can download large files |
|
104 |
with open(path + file_name, "wb") as file: |
|
105 |
for chunk in r.iter_content(chunk_size=1024): |
|
106 |
|
|
107 |
# writing one chunk at a time to file |
|
108 |
if chunk: |
|
109 |
file.write(chunk) |
|
110 |
|
|
111 |
# after successful download update list of already downloaded files |
|
112 |
FolderProcessor.update_ignore_set(CRAWLER_LOGS_PATH + dataset_name + "/", url) |
python-module/Utilities/Database/DatabaseDataLine.py | ||
---|---|---|
1 | 1 |
class DatabaseDataLine: |
2 |
|
|
3 |
def __init__(self, name, longitude, latitude, date, occurence): |
|
2 |
""" |
|
3 |
Class that specifies the look of data line in database |
|
4 |
""" |
|
5 |
def __init__(self, name, longitude, latitude, date, occurrence): |
|
4 | 6 |
self.name = name |
5 | 7 |
self.latitude = latitude |
6 | 8 |
self.longitude = longitude |
7 | 9 |
self.date = date |
8 |
self.occurence = occurence
|
|
10 |
self.occurrence = occurrence
|
|
9 | 11 |
|
10 | 12 |
def to_dictionary(self): |
11 |
return {"place": self.name, "x": self.longitude, "y": self.latitude, "number": self.occurence, "date": self.date} |
|
13 |
return {"place": self.name, "x": self.longitude, "y": self.latitude, "number": self.occurrence, "date": self.date} |
python-module/Utilities/Database/DatabaseLoader.py | ||
---|---|---|
1 | 1 |
from Utilities.Database import DatabaseDataLine |
2 | 2 |
import pymongo |
3 | 3 |
|
4 |
# specify mongodb connection |
|
5 |
MONGODB_CONNECTION = "mongodb://localhost:27017/" |
|
6 |
# mongodb account name |
|
7 |
MONGODB_ACC_NAME = "root" |
|
8 |
# mongodb account password |
|
9 |
MONGODB_ACC_PASSWORD = "root" |
|
10 |
# mongodb data database |
|
11 |
MONGODB_DATA_DATABASE = "DATA" |
|
12 |
# mongodb collection with aviable datasets |
|
13 |
MONGODB_DATASET_COLLECTION = "DATASETS" |
|
14 |
|
|
15 |
# Path to processed data |
|
16 |
PROCESSED_DATA_PATH = "ProcessedData/" |
|
17 |
|
|
18 |
|
|
19 |
def create_database_connection(): |
|
20 |
""" |
|
21 |
Creates connection to mongoDB |
|
22 |
|
|
23 |
Returns: |
|
24 |
Connection to mongoDB |
|
25 |
""" |
|
26 |
client = pymongo.MongoClient(MONGODB_CONNECTION) |
|
4 | 27 |
|
5 |
def get_data_from_file(filename, devices): |
|
6 |
f = open(filename, "r") |
|
28 |
# Authenticating |
|
29 |
client.admin.authenticate(MONGODB_ACC_NAME, MONGODB_ACC_PASSWORD) |
|
30 |
|
|
31 |
database = client[MONGODB_DATA_DATABASE] |
|
32 |
|
|
33 |
return database |
|
34 |
|
|
35 |
|
|
36 |
def get_data_from_file(filename, config): |
|
37 |
""" |
|
38 |
Opens processed file, reads it line by line |
|
39 |
name, ocurrence, date |
|
40 |
searches name in config and adds device map coordinates |
|
41 |
than creates a dictionary with date without hours as key |
|
42 |
and list of data lines as value. |
|
43 |
Args: |
|
44 |
filename: name of processed file |
|
45 |
config: loaded configuration file of dataset |
|
46 |
|
|
47 |
Returns: |
|
48 |
dictionary with date without hours as key |
|
49 |
and list of Datalines as value |
|
50 |
""" |
|
51 |
dataset_name = config["dataset-name"] |
|
52 |
dataset_path = PROCESSED_DATA_PATH + dataset_name + '/' |
|
7 | 53 |
|
54 |
f = open(dataset_path + filename, "r") |
|
55 |
|
|
56 |
devices = config["devices"] |
|
8 | 57 |
date_dict = dict() |
9 | 58 |
|
10 | 59 |
for line in f: |
11 |
# remove \n |
|
12 | 60 |
line = line[:-1] |
13 |
# split by csv splitter ; |
|
14 |
|
|
15 |
csv_collum = line.split(";") |
|
16 | 61 |
|
17 |
name = csv_collum[0] |
|
18 |
occurence = csv_collum[1] |
|
19 |
date = csv_collum[2] |
|
62 |
csv_column = line.split(";") |
|
20 | 63 |
|
21 |
date_without_hours = date[:-2] |
|
64 |
name = csv_column[0] |
|
65 |
occurrence = csv_column[1] |
|
66 |
date = csv_column[2] |
|
22 | 67 |
|
23 | 68 |
database_data_line = DatabaseDataLine.DatabaseDataLine(name, devices[name]["x"] |
24 |
, devices[name]["y"], date, occurence) |
|
69 |
, devices[name]["y"], date, occurrence)
|
|
25 | 70 |
|
71 |
# if you want to change table split by hours or months change this |
|
72 |
date_without_hours = date[:-2] |
|
26 | 73 |
if date_without_hours not in date_dict: |
27 | 74 |
date_dict[date_without_hours] = list() |
28 | 75 |
|
29 |
date_dict[date_without_hours].append(database_data_line.to_dictionary())
|
|
76 |
date_dict[date_without_hours].append(database_data_line.to_dictionary) |
|
30 | 77 |
|
31 | 78 |
return date_dict |
32 | 79 |
|
33 | 80 |
|
34 | 81 |
def load_data_to_database(dataset_name, data_dic): |
35 |
myclient = pymongo.MongoClient("mongodb://localhost:27017/");
|
|
36 |
|
|
37 |
# Authenticating
|
|
38 |
myclient.admin.authenticate('root', 'root');
|
|
82 |
"""
|
|
83 |
Takes data_dic created in method get_data_from_file |
|
84 |
and loads into into database where collection name is dataset_name + data_dic key
|
|
85 |
and data lines are line in collection
|
|
39 | 86 |
|
40 |
# Database DATA |
|
41 |
mydb = myclient["DATA"] |
|
87 |
Args: |
|
88 |
dataset_name: name of dataset that has existing configuration file |
|
89 |
data_dic: dictionary of data lines created in get_data_from_file |
|
90 |
""" |
|
91 |
database = create_database_connection() |
|
42 | 92 |
|
43 |
# Collection Datasets
|
|
44 |
collection_datasets = mydb["DATASETS"]
|
|
93 |
# collection where are specified aviable datasets
|
|
94 |
collection_datasets = database[MONGODB_DATASET_COLLECTION]
|
|
45 | 95 |
|
96 |
# check if newly added data already have a dataset specified in collection |
|
46 | 97 |
dataset_present = collection_datasets.find_one({}, {'name': dataset_name}) |
47 | 98 |
|
48 | 99 |
if dataset_present is None: |
49 | 100 |
collection_datasets.insert_one({'name': dataset_name}) |
50 | 101 |
|
51 | 102 |
for date in data_dic: |
52 |
dataset_collections = mydb[dataset_name]
|
|
103 |
dataset_collections = database[dataset_name]
|
|
53 | 104 |
dataset_collections.insert_one({'name': dataset_name+date}) |
54 |
date_dataset = mydb[dataset_name + date]
|
|
105 |
date_dataset = database[dataset_name + date]
|
|
55 | 106 |
date_dataset.insert_many(data_dic[date]) |
python-module/Utilities/DateFormating.py | ||
---|---|---|
1 |
def date_formater(string_date): |
|
1 |
def date_formatter(string_date): |
|
2 |
""" |
|
3 |
|
|
4 |
Args: |
|
5 |
string_date: string containing date in format 22.08.2018 12:27:00 |
|
6 |
|
|
7 |
Returns: |
|
8 |
string of date in format 0804201814 ddmmYYYY |
|
9 |
""" |
|
2 | 10 |
if string_date[11].isspace(): |
3 | 11 |
pos = 0 |
4 | 12 |
srr = "" |
... | ... | |
18 | 26 |
return return_date |
19 | 27 |
|
20 | 28 |
|
21 |
def date_time_formater(string_date): |
|
29 |
def date_time_formatter(string_date): |
|
30 |
""" |
|
31 |
Converts one type of date format "dd.mm.yyyy hh.mm.ss" to date format ddmmYYYYhh |
|
32 |
Args: |
|
33 |
string_date: string containing date in format 22.08.2018 12:27:00 |
|
34 |
|
|
35 |
Returns: |
|
36 |
string of date in format 0804201814 ddmmYYYYhh |
|
37 |
""" |
|
22 | 38 |
if string_date[11].isspace(): |
23 | 39 |
pos = 0 |
24 | 40 |
srr = "" |
... | ... | |
35 | 51 |
|
36 | 52 |
return_date = string_date[:2] + string_date[3:5] + string_date[6:10] + string_date[11:13] |
37 | 53 |
|
38 |
return return_date |
|
54 |
return return_date |
python-module/Utilities/FolderProcessor.py | ||
---|---|---|
1 | 1 |
import os |
2 | 2 |
import zipfile |
3 |
from CSV import CSVutils |
|
3 | 4 |
|
4 | 5 |
|
5 | 6 |
def list_of_all_files(path): |
7 |
""" |
|
8 |
Get all files from directory and all files written in ignore.txt |
|
9 |
and return the difference |
|
10 |
Args: |
|
11 |
path: path to Directory |
|
12 |
|
|
13 |
Returns: |
|
14 |
list with names of all files in directory |
|
15 |
""" |
|
6 | 16 |
files_in_dir = os.listdir(path) |
7 | 17 |
|
8 | 18 |
ignore_set = load_ignore_set(path) |
... | ... | |
11 | 21 |
|
12 | 22 |
|
13 | 23 |
def load_ignore_set(path): |
24 |
""" |
|
25 |
Reads ignore.txt line by line and add it to a set |
|
26 |
Args: |
|
27 |
path: Path to directory containing ignore.txt file |
|
28 |
|
|
29 |
Returns: |
|
30 |
list of names contained in ignore.txt file |
|
31 |
""" |
|
14 | 32 |
ignore_set = set() |
15 | 33 |
|
16 | 34 |
with open(path + "ignore.txt", "r") as file: |
... | ... | |
21 | 39 |
return ignore_set |
22 | 40 |
|
23 | 41 |
|
24 |
def update_ignore_set(path,file_name): |
|
25 |
|
|
42 |
def update_ignore_set(path, file_name): |
|
43 |
""" |
|
44 |
Adds file_name to the ignore file |
|
45 |
Args: |
|
46 |
path: Path to directory containing ignore.txt file |
|
47 |
file_name: name of file you want to add to ignore file |
|
48 |
""" |
|
26 | 49 |
with open(path + "ignore.txt", "a") as file: |
27 | 50 |
file.write(file_name + '\n') |
28 | 51 |
|
29 | 52 |
|
30 |
def get_devices_set(folder): |
|
53 |
def get_devices_set(path): |
|
54 |
""" |
|
55 |
Goes trough every not loaded file(not contained in ProcessedData/ignore.txt) |
|
56 |
Extracts names from not loaded file which should be in first column |
|
57 |
Creates set of unique devices_names |
|
58 |
|
|
59 |
Args: |
|
60 |
path: Path to Processed directory containing ignore.txt file |
|
31 | 61 |
|
32 |
files_in_dir = list_of_all_files(folder) |
|
62 |
Returns: |
|
63 |
set of unique names contained in not loaded files |
|
64 |
""" |
|
65 |
files_in_dir = list_of_all_files(path) |
|
33 | 66 |
|
34 | 67 |
unique_names = set() |
35 | 68 |
|
36 | 69 |
for file_path in files_in_dir: |
37 |
with open(folder+file_path) as file: |
|
38 |
for line in file: |
|
39 |
array = line.split(";") |
|
40 |
name = array[0] |
|
41 |
unique_names.add(name) |
|
70 |
unique_names.add(CSVutils.get_unique_names_from_file(path+file_path, 0)) |
|
42 | 71 |
|
43 | 72 |
return unique_names |
44 | 73 |
|
45 | 74 |
|
46 |
def get_unknown_devices_set(config,devices): |
|
75 |
def get_unknown_devices_set(config, devices): |
|
76 |
""" |
|
77 |
Compares config and devices a return difference |
|
78 |
|
|
79 |
Args: |
|
80 |
config: loaded configuration file of dataset |
|
81 |
devices: set of unique devices contained in dataset |
|
82 |
|
|
83 |
Returns: |
|
84 |
|
|
85 |
""" |
|
47 | 86 |
devices_set = set(config["devices"].keys()) |
48 | 87 |
unknown_devices_set = devices.difference(devices_set) |
49 | 88 |
|
50 | 89 |
return unknown_devices_set |
51 | 90 |
|
52 | 91 |
|
53 |
def unzip_all_csv_zip_files_in_folder(folder): |
|
54 |
|
|
55 |
files_in_dir = os.listdir(folder) |
|
92 |
def unzip_all_csv_zip_files_in_folder(path): |
|
93 |
""" |
|
94 |
Load all files from directory and unzip those which end by .zip |
|
95 |
After unziping deletes the zip file |
|
96 |
Args: |
|
97 |
path: Path to CrawledData directory containing ignore.txt file |
|
98 |
""" |
|
99 |
files_in_dir = os.listdir(path) |
|
56 | 100 |
zips = [] |
57 | 101 |
|
58 | 102 |
for file in files_in_dir: |
59 | 103 |
if file.endswith(".zip"): |
60 |
zips.append(folder + file)
|
|
104 |
zips.append(path + file)
|
|
61 | 105 |
|
62 | 106 |
for zip_file in zips: |
63 | 107 |
|
64 | 108 |
with zipfile.ZipFile(zip_file, "r") as unziped_file: |
65 |
unziped_file.extractall(folder)
|
|
109 |
unziped_file.extractall(path)
|
|
66 | 110 |
|
67 | 111 |
os.remove(zip_file) |
68 | 112 |
|
python-module/main.py | ||
---|---|---|
1 | 1 |
import Pipeline |
2 | 2 |
import os |
3 | 3 |
|
4 |
# Path to configuration files |
|
4 | 5 |
CONFIG_FILES_PATH = "DatasetConfigs/" |
5 | 6 |
|
6 | 7 |
|
7 | 8 |
def run_pipeline_for_all_datasets(): |
9 |
""" |
|
10 |
Runs whole DataScript pipeline for every dataset that has existing configuration file |
|
11 |
""" |
|
8 | 12 |
files_in_dir = os.listdir(CONFIG_FILES_PATH) |
9 | 13 |
|
10 | 14 |
for file in files_in_dir: |
11 |
Pipeline.run_full_pipeline(file) |
|
15 |
name = file.split('.') |
|
16 |
Pipeline.run_full_pipeline(name[0]) |
|
12 | 17 |
|
13 | 18 |
|
14 | 19 |
def run_pipeline_for_one_dataset(dataset_name): |
20 |
""" |
|
21 |
Runs whole DataScript pipeline for only one dataset |
|
22 |
|
|
23 |
Args: |
|
24 |
dataset_name: name of dataset that has existing configuration file |
|
25 |
""" |
|
15 | 26 |
Pipeline.run_full_pipeline(dataset_name) |
16 | 27 |
|
17 | 28 |
|
Také k dispozici: Unified diff
Re #7939
- pridana dokumentace metod a trid
- korekce chyb v jmenech promenych
- pridani informaci pro vygenerovane skripty