Projekt

Obecné

Profil

Stáhnout (5.12 KB) Statistiky
| Větev: | Revize:
1
import os
2

    
3
# Path to crawled data
4
CRAWLED_DATA_PATH = "CrawledData/"
5
# Path to processed data
6
PROCESSED_DATA_PATH = "ProcessedData/"
7
# Path for DatasetCrawlers implementations
8
CRAWLER_PROGRAM_PATH = "DatasetCrawler"
9
# Path for DatasetProcessors implementations
10
PROCESSOR_PROGRAM_PATH = "DatasetProcessing"
11
# Path to dataset configuration files
12
CONFIG_FILES_PATH = "DatasetConfigs"
13

    
14

    
15
def create_default_config_file(dataset_name):
16
    """
17
    Creates default config file
18

    
19
    Args:
20
        dataset_name: Name of newly created dataset
21
    """
22
    with open(CONFIG_FILES_PATH + "/" + dataset_name + ".yaml", "w") as file:
23
        file.write("# jmeno datasetu, pod kterym bude zobrazen v aplikaci\n")
24
        file.write("dataset-name: " + dataset_name + "\n")
25
        file.write("# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře\n")
26
        file.write("dataset-name: " + dataset_name + "\n")
27
        file.write("# root slozka, ktera obsahuje odkazy na dataset\n")
28
        file.write("url: ZDE VLOZTE URL\n")
29
        file.write("# volitelny parameter, ktery specifikuje vzor jmrna datasetu, ktera se budou stahovat\n")
30
        file.write("regex: ZDE VLOZTE REGEX\n")
31
        file.write("# volitelny parametr, ktery udava jak casto se budou hledat nove datasety, pokud prazdne, "
32
                   "tak defaultni hodnota (dny)\n")
33
        file.write("update-period: ZDE VLOZTE HODNOTU\n")
34
        file.write("# pozice jednotlivych zarizeni, ktera jsou v datasetu\n")
35
        file.write("devices:\n")
36

    
37

    
38
def create_default_processor(dataset_name):
39
    """
40
    Creates default processor for dataset
41

    
42
    Args:
43
        dataset_name: Name of newly created dataset
44
    """
45
    with open(PROCESSOR_PROGRAM_PATH + "/" + dataset_name + "_processor.py", "w") as file:
46
        file.write("from Utilities.CSV import csv_data_line")
47
        file.write("\n")
48
        file.write("\n")
49
        file.write("def process_file(filename):\n")
50
        file.write("    \"\"\"\n")
51
        file.write("    Method that take path to crawled file and outputs date dictionary:\n")
52
        file.write("    Date dictionary is a dictionary where keys are dates in format ddmmYYYYhh (0804201815)\n")
53
        file.write("    and value is dictionary where keys are devices (specified in configuration file)\n")
54
        file.write("    and value is CSVDataLine.csv_data_line with device,date and occurrence\n")
55
        file.write("\n")
56
        file.write("    Args:\n")
57
        file.write("    filename: name of processed file\n")
58
        file.write("\n")
59
        file.write("    Returns:\n")
60
        file.write("    None if not implemented\n")
61
        file.write("    date_dict when implemented\n")
62
        file.write("    \"\"\"\n")
63
        file.write("    date_dict = dict()\n")
64
        file.write("\n")
65
        file.write("    #with open(filename, \"r\") as file:\n")
66
        file.write("    print(\"You must implements process_file method first!\")\n")
67
        file.write("    return None\n")
68

    
69

    
70
def create_default_crawler(dataset_name):
71
    """
72
    Creates default crawler for dataset
73

    
74
    Args:
75
        dataset_name: Name of newly created dataset
76
    """
77

    
78
    with open(CRAWLER_PROGRAM_PATH + "/" + dataset_name + "_crawler.py", "w") as file:
79
        file.write("# Path to crawled data\n")
80
        file.write("CRAWLED_DATA_PATH = \"CrawledData/\"\n")
81
        file.write("\n")
82
        file.write("\n")
83
        file.write("def crawl(config):\n")
84
        file.write("    \"\"\"\n")
85
        file.write("    Implement crawl method that downloads new data to path_for_files\n")
86
        file.write("    For keeping the project structure\n")
87
        file.write("    url , regex, and dataset_name from config\n")
88
        file.write("    You can use already implemented functions from Utilities/Crawler/BasicCrawlerFunctions.py\n")
89
        file.write("\n")
90
        file.write("    Args:\n")
91
        file.write("        config: loaded configuration file of dataset\n")
92
        file.write("    \"\"\"\n")
93
        file.write("    dataset_name = config[\"dataset-name\"]\n")
94
        file.write("    url = config['url']\n")
95
        file.write("    regex = config['regex']\n")
96
        file.write("    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'\n")
97
        file.write("    print(\"You must implements Crawl method first!\")\n")
98

    
99

    
100
def prepare_dataset_structure(dataset_name):
101
    """
102
    Prepares folders for new dataset
103
    Args:
104
        dataset_name: Name of newly created dataset
105
    """
106

    
107
    # create folder for crawled data
108
    path = CRAWLED_DATA_PATH+dataset_name
109
    try:
110
        os.mkdir(path)
111
    except os.error as e:
112
        print(e)
113
        print("Creation of the directory %s failed" % path)
114

    
115
    # create folder for processed data
116
    path = PROCESSED_DATA_PATH + dataset_name
117
    try:
118
        os.mkdir(path)
119
    except OSError:
120
        print("Creation of the directory %s failed" % path)
121

    
122
    create_default_crawler(dataset_name)
123
    create_default_processor(dataset_name)
124
    create_default_config_file(dataset_name)
125

    
126
print("Zadejte jméno nového datasetu:\n")
127
prepare_dataset_structure(input().upper())
(7-7/11)