Projekt

Obecné

Profil

Stáhnout (1.91 KB) Statistiky
| Větev: | Revize:
1
from Utilities.CSV import csv_data_line
2
from Utilities import date_formating
3
import logging
4
from datetime import date
5
import time
6
import datetime
7

    
8
from shared_types import DateDict
9

    
10
logging.basicConfig(filename='../../CrawlerLogs' + 'Crawlerlog-' +
11
                    date.today().strftime("%b-%Y") + '.log',
12
                    level=logging.INFO,
13
                    format='%(asctime)s %(message)s')
14

    
15

    
16
def process_file(filename: str) -> DateDict:
17
    """
18
    Method that take path to crawled file and outputs date dictionary:
19
    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)
20
    and value is dictionary where keys are devices (specified in configuration file)
21
    and value is CSVDataLine.csv_data_line with device,date and occurrence
22

    
23
    Args:
24
    filename: name of processed file
25

    
26
    Returns:
27
    None if not implemented
28
    date_dict when implemented
29
    """
30
    date_dict = {}
31

    
32
    with open(filename, "r") as file:
33

    
34
        YEAR_START = 1
35
        YEAR_END = 11
36
        for line in file:
37

    
38
            array = line.split(";")
39
            
40
            if (array[2][1:-1] == ""):
41
                continue;
42

    
43
            #pick later time
44
            time_ = max(
45
                array[2][1:-1],
46
                array[3][1:-1],
47
                key=lambda x: time.mktime(
48
                    datetime.datetime.strptime(x, "%H:%M").timetuple()))
49

    
50
            date = date_formating.date_time_formatter(
51
                array[14][YEAR_START:YEAR_END] + " " + time_)
52

    
53
            name = array[10][1:-1]
54
            
55
            if name == "":
56
                continue
57

    
58
            if date not in date_dict:
59
                date_dict[date] = {}
60

    
61
            if name in date_dict[date]:
62
                date_dict[date][name].occurrence = int(array[12])
63
            else:
64
                date_dict[date][name] = csv_data_line.CSVDataLine(
65
                    name, date, int(array[12]))
66

    
67
    return date_dict
(3-3/4)