/ - Diff - Aplikace nad otevřenými daty (KIV) – BHVS - Redmine

« Předchozí | Další »

Revize 43697fec

Přidáno uživatelem Petr Hlaváč před téměř 5 roky(ů)

ID 43697fec59f7d0cc9af2604af6351ff75f0ed089
Rodič 2494ea3a
Potomek 587b1c57

Re #7937
Pridano logovani do slozky CrawlerLogs/CommonRecords

     from Utilities import FolderProcessor, ConfigureFunctions
     from Utilities.Database import DatabaseLoader
     import logging
     from datetime import date
     # Path to crawled data
     CRAWLED_DATA_PATH = "CrawledData/"
     # Path to processed data
-...
     PROCESSOR_LIB_PATH = "DatasetProcessing."
     #logger
     logging.basicConfig(filename=CRAWLER_LOGS_PATH + "CommonRecords/" + 'Applicationlog-' + date.today().strftime("%b-%Y") + '.log',
                         level=logging.INFO,
                         format='%(asctime)s %(message)s'
+                        )
     def check_last_update(config):
         """
         Loads integer from updated.txt in CrawlerLogs/"dataset_name"
-...
             confing_update_period = int(config["update-period"])
             if config["update-period"] <= last_update:
                 print("Dataset " + dataset_name + " is being updated")
                 logging.info("Dataset " + dataset_name + " is being updated today")
                 file.write("0")
                 file.truncate()
                 return True
             else:
                 last_update_days = last_update + 1
                 print("Dataset " + dataset_name + " will be updated in " + str(confing_update_period - last_update_days) + "days")
                 logging.info("Dataset " + dataset_name + " will be updated in " + str(confing_update_period - last_update_days) + "days")
                 file.write(str(last_update_days))
                 file.truncate()
                 return False
-...
         """
         dataset_name = config["dataset-name"]
         my_function = __import__(CRAWLER_LIB_PATH + dataset_name + "Crawler", globals(), locals(), ['crawl']).crawl
         my_function(config)
         crawl_func = __import__(CRAWLER_LIB_PATH + dataset_name + "Crawler", globals(), locals(), ['crawl']).crawl
         crawl_func(config)
         dataset_name += '/'
-...
                                        ['process_file']).process_file
         not_processed_files = FolderProcessor.list_of_all_files(CRAWLED_DATA_PATH + dataset_path)
         logging.info(dataset_name + " has downloaded " + str(len(not_processed_files)) + " new files")
         for not_processed_file in not_processed_files:
             process_file_func(CRAWLED_DATA_PATH + dataset_path + not_processed_file)
             FolderProcessor.update_ignore_set(CRAWLED_DATA_PATH + dataset_path, not_processed_file)
         logging.info(dataset_name + " has processed " + str(len(not_processed_files)) + " newly crawled files")
     def validate_process_data(config):
         """
-...
         unknown_devices_size = len(unknown_devices_set)
         if unknown_devices_size != 0:
             print("There is " + str(unknown_devices_size) + " unknown devices\n")
             print("Adding devices to " + config["dataset-name"] + " config file\n")
             logging.info("There is " + str(unknown_devices_size) + " unknown devices")
             logging.info("Adding devices to " + config["dataset-name"] + " config file")
             ConfigureFunctions.update_configuration(config["dataset-name"], unknown_devices_set)
             return False
         for device in config["devices"]:
             device = config["devices"][device]
             if device["x"] == "UNKNOWN!" or device["y"] == "UNKNOWN!":
                 print(config["dataset-name"] + " Config file contains devices with UNKOWN! values please update them\n")
                 logging.info(config["dataset-name"] + " config file contains devices with UNKOWN! values please update them!!")
                 return False
         return True
-...
             DatabaseLoader.load_data_to_database(dataset_name, processed_data)
             FolderProcessor.update_ignore_set(PROCESSED_DATA_PATH + dataset_path, not_loaded_file)
         logging.info(dataset_name + " has loaded to databse " + str(len(not_loaded_files)) + " newly processed files.")
     def run_full_pipeline(dataset_name):
         """
-...
         Args:
             dataset_name: name of dataset that has existing configuration file
         """
         logging.info("Starting pipeline for dataset " + dataset_name)
         config = ConfigureFunctions.load_configuration(dataset_name)
         crawl_data(config)
         process_data(config["dataset-name"])
-...
         if validation_test:
             load_data_to_database(config)
             print("Dataset " + dataset_name + " has been sucessfully updated\n")
-...
         Args:
             dataset_name: name of dataset that has existing configuration file
         """
         logging.info("Starting pipeline for dataset " + dataset_name)
         config = ConfigureFunctions.load_configuration(dataset_name)
         update_test = check_last_update(config)
         if update_test:
-...
             validation_test = validate_process_data(config)
             if validation_test:
                 load_data_to_database(config)
                 load_data_to_database(config)

Také k dispozici: Unified diff

Projekt

Obecné

Profil

ASWI - Pokročilé softwarové inženýrství » ASWI 2020 » Aplikace nad otevřenými daty (KIV) – BHVS

Revize 43697fec

Přidáno uživatelem Petr Hlaváč před téměř 5 roky(ů)

Projekt

Obecné

Profil

ASWI - Pokročilé softwarové inženýrství » ASWI 2020 » Aplikace nad otevřenými daty (KIV) – BHVS

Revize 43697fec

Přidáno uživatelem Petr Hlaváč před téměř 5 roky(ů)

Související úkoly