1 |
81980e82
|
ballakt
|
from Utilities.CSV import csv_data_line
|
2 |
|
|
from Utilities import date_formating
|
3 |
|
|
import logging
|
4 |
|
|
from datetime import date
|
5 |
|
|
import time
|
6 |
|
|
import datetime
|
7 |
|
|
|
8 |
af7609b5
|
Tomáš Ballák
|
from shared_types import DateDict
|
9 |
|
|
|
10 |
81980e82
|
ballakt
|
logging.basicConfig(filename='../../CrawlerLogs' + 'Crawlerlog-' +
|
11 |
|
|
date.today().strftime("%b-%Y") + '.log',
|
12 |
|
|
level=logging.INFO,
|
13 |
|
|
format='%(asctime)s %(message)s')
|
14 |
|
|
|
15 |
|
|
|
16 |
af7609b5
|
Tomáš Ballák
|
def process_file(filename: str) -> DateDict:
|
17 |
81980e82
|
ballakt
|
"""
|
18 |
|
|
Method that take path to crawled file and outputs date dictionary:
|
19 |
|
|
Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15)
|
20 |
|
|
and value is dictionary where keys are devices (specified in configuration file)
|
21 |
|
|
and value is CSVDataLine.csv_data_line with device,date and occurrence
|
22 |
|
|
|
23 |
|
|
Args:
|
24 |
|
|
filename: name of processed file
|
25 |
|
|
|
26 |
|
|
Returns:
|
27 |
|
|
None if not implemented
|
28 |
|
|
date_dict when implemented
|
29 |
|
|
"""
|
30 |
af7609b5
|
Tomáš Ballák
|
date_dict = {}
|
31 |
81980e82
|
ballakt
|
|
32 |
|
|
with open(filename, "r") as file:
|
33 |
|
|
|
34 |
|
|
YEAR_START = 1
|
35 |
|
|
YEAR_END = 11
|
36 |
|
|
for line in file:
|
37 |
|
|
|
38 |
|
|
array = line.split(";")
|
39 |
4fe3c311
|
msebela
|
|
40 |
|
|
if (array[2][1:-1] == ""):
|
41 |
|
|
continue;
|
42 |
81980e82
|
ballakt
|
|
43 |
|
|
#pick later time
|
44 |
|
|
time_ = max(
|
45 |
|
|
array[2][1:-1],
|
46 |
|
|
array[3][1:-1],
|
47 |
|
|
key=lambda x: time.mktime(
|
48 |
|
|
datetime.datetime.strptime(x, "%H:%M").timetuple()))
|
49 |
|
|
|
50 |
|
|
date = date_formating.date_time_formatter(
|
51 |
|
|
array[14][YEAR_START:YEAR_END] + " " + time_)
|
52 |
|
|
|
53 |
|
|
name = array[10][1:-1]
|
54 |
4fe3c311
|
msebela
|
|
55 |
81980e82
|
ballakt
|
if name == "":
|
56 |
|
|
continue
|
57 |
|
|
|
58 |
|
|
if date not in date_dict:
|
59 |
|
|
date_dict[date] = {}
|
60 |
|
|
|
61 |
|
|
if name in date_dict[date]:
|
62 |
|
|
date_dict[date][name].occurrence = int(array[12])
|
63 |
|
|
else:
|
64 |
|
|
date_dict[date][name] = csv_data_line.CSVDataLine(
|
65 |
|
|
name, date, int(array[12]))
|
66 |
|
|
|
67 |
|
|
return date_dict
|