Projekt

Obecné

Profil

Stáhnout (5.7 KB) Statistiky
| Větev: | Revize:
1
import os
2

    
3
# Path to crawled data
4
CRAWLED_DATA_PATH = "CrawledData/"
5
# Path to processed data
6
PROCESSED_DATA_PATH = "ProcessedData/"
7
# Path for DatasetCrawlers implementations
8
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
9
# Path for DatasetProcessors implementations
10
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
11
# Path to dataset configuration files
12
CONFIG_FILES_PATH = "DatasetConfigs"
13
# Default color for visualization of dataset (buble info in map)
14
DEFAULT_COLOR = "#000000"
15

    
16

    
17
def create_default_config_file(dataset_name: str):
18
    """
19
    Creates default config file
20

    
21
    Args:
22
        dataset_name: Name of newly created dataset
23
    """
24
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
25
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
26
        file.write("display-name: " + dataset_name + "\n")
27
        file.write(
28
            "# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n")
29
        file.write("display-color: " + DEFAULT_COLOR + "\n")
30
        file.write(
31
            "# barva pro tento dataset v hexadecimální hodnotě (#000000)\n")
32
        file.write("dataset-name: " + dataset_name + "\n")
33
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
34
        file.write("url: ZDE VLOZTE URL\n")
35
        file.write(
36
            "# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
37
        file.write("regex: ZDE VLOZTE REGEX\n")
38
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
39
                   "tak defaultni hodnota (dny)\n")
40
        file.write("update-period: ZDE VLOZTE HODNOTU\n")
41
        file.write("# pozice jednotlivych zarizeni, ktera jsou v datasetu\n")
42
        file.write("devices:\n")
43

    
44

    
45
def create_default_processor(dataset_name):
46
    """
47
    Creates default processor for dataset
48

    
49
    Args:
50
        dataset_name: Name of newly created dataset
51
    """
52
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py", "w") as file:
53
        file.write("from Utilities.CSV import csv_data_line")
54
        file.write("\n")
55
        file.write("\n")
56
        file.write("def process_file(filename):\n")
57
        file.write("    \"\"\"\n")
58
        file.write(
59
            "    Method that take path to crawled file and outputs date dictionary:\n")
60
        file.write(
61
            "    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)\n")
62
        file.write(
63
            "    and value is dictionary where keys are devices (specified in configuration file)\n")
64
        file.write(
65
            "    and value is CSVDataLine.csv_data_line with device,date and occurrence\n")
66
        file.write("\n")
67
        file.write("    Args:\n")
68
        file.write("    filename: name of processed file\n")
69
        file.write("\n")
70
        file.write("    Returns:\n")
71
        file.write("    None if not implemented\n")
72
        file.write("    date_dict when implemented\n")
73
        file.write("    \"\"\"\n")
74
        file.write("    date_dict = dict()\n")
75
        file.write("\n")
76
        file.write("    #with open(filename, \"r\") as file:\n")
77
        file.write(
78
            "    print(\"You must implements process_file method first!\")\n")
79
        file.write("    return None\n")
80

    
81

    
82
def create_default_crawler(dataset_name):
83
    """
84
    Creates default crawler for dataset
85

    
86
    Args:
87
        dataset_name: Name of newly created dataset
88
    """
89

    
90
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py", "w") as file:
91
        file.write("# Path to crawled data\n")
92
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
93
        file.write("\n")
94
        file.write("\n")
95
        file.write("def crawl(config):\n")
96
        file.write("    \"\"\"\n")
97
        file.write(
98
            "    Implement crawl method that downloads new data to path_for_files\n")
99
        file.write("    For keeping the project structure\n")
100
        file.write("    url , regex, and dataset_name from config\n")
101
        file.write(
102
            "    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
103
        file.write("\n")
104
        file.write("    Args:\n")
105
        file.write("        config: loaded configuration file of dataset\n")
106
        file.write("    \"\"\"\n")
107
        file.write("    dataset_name = config[\"dataset-name\"]\n")
108
        file.write("    url = config['url']\n")
109
        file.write("    regex = config['regex']\n")
110
        file.write(
111
            "    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
112
        file.write("    print(\"You must implements Crawl method first!\")\n")
113

    
114

    
115
def prepare_dataset_structure(dataset_name):
116
    """
117
    Prepares folders for new dataset
118
    Args:
119
        dataset_name: Name of newly created dataset
120
    """
121

    
122
    # create folder for crawled data
123
    path = CRAWLED_DATA_PATH+dataset_name
124
    try:
125
        os.mkdir(path)
126
    except os.error as e:
127
        print(e)
128
        print("Creation of the directory %s failed" % path)
129

    
130
    # create folder for processed data
131
    path = PROCESSED_DATA_PATH + dataset_name
132
    try:
133
        os.mkdir(path)
134
    except OSError:
135
        print("Creation of the directory %s failed" % path)
136

    
137
    create_default_crawler(dataset_name)
138
    create_default_processor(dataset_name)
139
    create_default_config_file(dataset_name)
140

    
141

    
142
print("Zadejte jméno nového datasetu:\n")
143

    
144
dataset_name = input().upper()
145

    
146
if dataset_name.isalpha():
147
    prepare_dataset_structure(dataset_name)
148
    print("Architektura vytvořena \n")
149
else:
150
    print("Jméno musí obsahovat pouze písmena z abecedy (bez mezer)\n")
(8-8/12)