Projekt

Obecné

Profil

Stáhnout (5.31 KB) Statistiky
| Větev: | Revize:
1 c8f3051b petrh
import os
2
3 04a2b5a4 petrh
# Path to crawled data
4 2494ea3a petrh
CRAWLED_DATA_PATH = "CrawledData/"
5 04a2b5a4 petrh
# Path to processed data
6 2494ea3a petrh
PROCESSED_DATA_PATH = "ProcessedData/"
7 04a2b5a4 petrh
# Path for DatasetCrawlers implementations
8 2494ea3a petrh
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
9 04a2b5a4 petrh
# Path for DatasetProcessors implementations
10 2494ea3a petrh
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
11 04a2b5a4 petrh
# Path to dataset configuration files
12 2494ea3a petrh
CONFIG_FILES_PATH = "DatasetConfigs"
13 c8f3051b petrh
14
15
def create_default_config_file(dataset_name):
16 04a2b5a4 petrh
    """
17
    Creates default config file
18 c8f3051b petrh
19 04a2b5a4 petrh
    Args:
20
        dataset_name: Name of newly created dataset
21
    """
22 c8f3051b petrh
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
23
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
24 3692d853 petrh
        file.write("display-name: " + dataset_name + "\n")
25 0a2832fb vastja
        file.write("# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n")
26
        file.write("dataset-name: " + dataset_name + "\n")
27 c8f3051b petrh
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
28 04a2b5a4 petrh
        file.write("url: ZDE VLOZTE URL\n")
29 70e660a8 petrh
        file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
30 c8f3051b petrh
        file.write("regex: ZDE VLOZTE REGEX\n")
31
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
32
                   "tak defaultni hodnota (dny)\n")
33
        file.write("update-period: ZDE VLOZTE HODNOTU\n")
34 70e660a8 petrh
        file.write("# pozice jednotlivych zarizeni, ktera jsou v datasetu\n")
35
        file.write("devices:\n")
36 c8f3051b petrh
37
38
def create_default_processor(dataset_name):
39 04a2b5a4 petrh
    """
40
    Creates default processor for dataset
41
42
    Args:
43
        dataset_name: Name of newly created dataset
44
    """
45 d6ca840d petrh
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py", "w") as file:
46
        file.write("from Utilities.CSV import csv_data_line")
47 04a2b5a4 petrh
        file.write("\n")
48
        file.write("\n")
49 c8f3051b petrh
        file.write("def process_file(filename):\n")
50 04a2b5a4 petrh
        file.write("    \"\"\"\n")
51 2d129043 petrh
        file.write("    Method that take path to crawled file and outputs date dictionary:\n")
52 3692d853 petrh
        file.write("    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n")
53 d6ca840d petrh
        file.write("    and value is dictionary where keys are devices (specified in configuration file)\n")
54
        file.write("    and value is CSVDataLine.csv_data_line with device,date and occurrence\n")
55 04a2b5a4 petrh
        file.write("\n")
56
        file.write("    Args:\n")
57
        file.write("    filename: name of processed file\n")
58
        file.write("\n")
59
        file.write("    Returns:\n")
60 2d129043 petrh
        file.write("    None if not implemented\n")
61
        file.write("    date_dict when implemented\n")
62 04a2b5a4 petrh
        file.write("    \"\"\"\n")
63 2d129043 petrh
        file.write("    date_dict = dict()\n")
64
        file.write("\n")
65 2494ea3a petrh
        file.write("    #with open(filename, \"r\") as file:\n")
66 c8f3051b petrh
        file.write("    print(\"You must implements process_file method first!\")\n")
67 2d129043 petrh
        file.write("    return None\n")
68 c8f3051b petrh
69
70
def create_default_crawler(dataset_name):
71 04a2b5a4 petrh
    """
72
    Creates default crawler for dataset
73
74
    Args:
75
        dataset_name: Name of newly created dataset
76
    """
77 c8f3051b petrh
78 d6ca840d petrh
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py", "w") as file:
79 04a2b5a4 petrh
        file.write("# Path to crawled data\n")
80
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
81
        file.write("\n")
82
        file.write("\n")
83 c8f3051b petrh
        file.write("def crawl(config):\n")
84 04a2b5a4 petrh
        file.write("    \"\"\"\n")
85
        file.write("    Implement crawl method that downloads new data to path_for_files\n")
86
        file.write("    For keeping the project structure\n")
87
        file.write("    url , regex, and dataset_name from config\n")
88
        file.write("    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
89
        file.write("\n")
90
        file.write("    Args:\n")
91
        file.write("        config: loaded configuration file of dataset\n")
92
        file.write("    \"\"\"\n")
93
        file.write("    dataset_name = config[\"dataset-name\"]\n")
94
        file.write("    url = config['url']\n")
95
        file.write("    regex = config['regex']\n")
96
        file.write("    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
97 c8f3051b petrh
        file.write("    print(\"You must implements Crawl method first!\")\n")
98
99
100
def prepare_dataset_structure(dataset_name):
101 04a2b5a4 petrh
    """
102
    Prepares folders for new dataset
103
    Args:
104
        dataset_name: Name of newly created dataset
105
    """
106 c8f3051b petrh
107
    # create folder for crawled data
108 d6ca840d petrh
    path = CRAWLED_DATA_PATH+dataset_name
109 c8f3051b petrh
    try:
110
        os.mkdir(path)
111
    except os.error as e:
112
        print(e)
113
        print("Creation of the directory %s failed" % path)
114
115
    # create folder for processed data
116 d6ca840d petrh
    path = PROCESSED_DATA_PATH + dataset_name
117 c8f3051b petrh
    try:
118
        os.mkdir(path)
119
    except OSError:
120
        print("Creation of the directory %s failed" % path)
121
122
    create_default_crawler(dataset_name)
123
    create_default_processor(dataset_name)
124
    create_default_config_file(dataset_name)
125
126 2494ea3a petrh
print("Zadejte jméno nového datasetu:\n")
127 3692d853 petrh
128
dataset_name = input().upper()
129
130
if dataset_name.isalpha():
131
    prepare_dataset_structure(dataset_name)
132
    print("Architektura vytvořena \n")
133
else:
134
    print("Jméno musí obsahovat pouze písmena z abecedy (bez mezer)\n")