Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 34cf65cd

Přidáno uživatelem Petr Hlaváč před asi 4 roky(ů)

Fixed issues found in code documentation

Zobrazit rozdíly:

python-module/DatasetCrawler/KOLOBEZKYCrawler.py
1 1
from Utilities import FolderProcessor
2
from Utilities.Crawler import BasicCrawler
2
from Utilities.Crawler import BasicCrawlerFunctions
3 3

  
4 4
# Path to crawled data
5 5
CRAWLED_DATA_PATH = "CrawledData/"
......
20 20
    regex = config['regex']
21 21
    path_for_files = CRAWLED_DATA_PATH + dataset_name + '/'
22 22

  
23
    first_level_links = BasicCrawler.get_all_links(url)
24
    filtered_first_level_links = BasicCrawler.filter_links(first_level_links, "^OD_ZCU")
25
    absolute_first_level_links = BasicCrawler.create_absolute_links(filtered_first_level_links, url)
23
    first_level_links = BasicCrawlerFunctions.get_all_links(url)
24
    filtered_first_level_links = BasicCrawlerFunctions.filter_links(first_level_links, "^OD_ZCU")
25
    absolute_first_level_links = BasicCrawlerFunctions.create_absolute_links(filtered_first_level_links, url)
26 26

  
27 27
    files = []
28 28

  
29 29
    for link in absolute_first_level_links:
30
        second_level_links = BasicCrawler.get_all_links(link)
31
        filtered_second_level_links = BasicCrawler.filter_links(second_level_links, regex)
32
        absolute_second_level_links = BasicCrawler.create_absolute_links(filtered_second_level_links, link)
33
        final_links = BasicCrawler.remove_downloaded_links(absolute_second_level_links, dataset_name)
30
        second_level_links = BasicCrawlerFunctions.get_all_links(link)
31
        filtered_second_level_links = BasicCrawlerFunctions.filter_links(second_level_links, regex)
32
        absolute_second_level_links = BasicCrawlerFunctions.create_absolute_links(filtered_second_level_links, link)
33
        final_links = BasicCrawlerFunctions.remove_downloaded_links(absolute_second_level_links, dataset_name)
34 34

  
35 35
        for file_link in final_links:
36 36
            files.append(file_link)
37 37

  
38 38
    for file in files:
39
        BasicCrawler.download_file_from_url(file, "CrawledData/" + dataset_name + "/", dataset_name)
39
        BasicCrawlerFunctions.download_file_from_url(file, CRAWLED_DATA_PATH + dataset_name + "/", dataset_name)
40 40

  
41
    FolderProcessor.unzip_all_csv_zip_files_in_folder("CrawledData/" + dataset_name + "/")
41
    FolderProcessor.unzip_all_csv_zip_files_in_folder(CRAWLED_DATA_PATH + dataset_name + "/")

Také k dispozici: Unified diff