Projekt

Obecné

Profil

Stáhnout (1.91 KB) Statistiky
| Větev: | Revize:
1 81980e82 ballakt
from Utilities.CSV import csv_data_line
2
from Utilities import date_formating
3
import logging
4
from datetime import date
5
import time
6
import datetime
7
8 af7609b5 Tomáš Ballák
from shared_types import DateDict
9
10 81980e82 ballakt
logging.basicConfig(filename='../../CrawlerLogs' + 'Crawlerlog-' +
11
                    date.today().strftime("%b-%Y") + '.log',
12
                    level=logging.INFO,
13
                    format='%(asctime)s %(message)s')
14
15
16 af7609b5 Tomáš Ballák
def process_file(filename: str) -> DateDict:
17 81980e82 ballakt
    """
18
    Method that take path to crawled file and outputs date dictionary:
19
    Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)
20
    and value is dictionary where keys are devices (specified in configuration file)
21
    and value is CSVDataLine.csv_data_line with device,date and occurrence
22
23
    Args:
24
    filename: name of processed file
25
26
    Returns:
27
    None if not implemented
28
    date_dict when implemented
29
    """
30 af7609b5 Tomáš Ballák
    date_dict = {}
31 81980e82 ballakt
32
    with open(filename, "r") as file:
33
34
        YEAR_START = 1
35
        YEAR_END = 11
36
        for line in file:
37
38
            array = line.split(";")
39 4fe3c311 msebela
            
40
            if (array[2][1:-1] == ""):
41
                continue;
42 81980e82 ballakt
43
            #pick later time
44
            time_ = max(
45
                array[2][1:-1],
46
                array[3][1:-1],
47
                key=lambda x: time.mktime(
48
                    datetime.datetime.strptime(x, "%H:%M").timetuple()))
49
50
            date = date_formating.date_time_formatter(
51
                array[14][YEAR_START:YEAR_END] + " " + time_)
52
53
            name = array[10][1:-1]
54 4fe3c311 msebela
            
55 81980e82 ballakt
            if name == "":
56
                continue
57
58
            if date not in date_dict:
59
                date_dict[date] = {}
60
61
            if name in date_dict[date]:
62
                date_dict[date][name].occurrence = int(array[12])
63
            else:
64
                date_dict[date][name] = csv_data_line.CSVDataLine(
65
                    name, date, int(array[12]))
66
67
    return date_dict