Revize 81980e82
Přidáno uživatelem Tomáš Ballák před více než 4 roky(ů)
modules/crawler/.devcontainer/devcontainer.json | ||
---|---|---|
1 | 1 |
{ |
2 | 2 |
//TODO: dodat extensionu pro yaml soubory!!! |
3 | 3 |
"forwardPorts": [ |
4 |
8000 |
|
4 |
8000, |
|
5 |
443, |
|
6 |
80 |
|
5 | 7 |
], |
6 | 8 |
"extensions": [ |
7 | 9 |
"vscode-icons-team.vscode-icons", |
... | ... | |
16 | 18 |
"../../../docker-compose.yml", |
17 | 19 |
"../../../docker-compose-dev.yml" |
18 | 20 |
], |
21 |
"settings": { |
|
22 |
"terminal.integrated.shell.linux": "/bin/sh" |
|
23 |
}, |
|
19 | 24 |
"mounts": [ |
20 | 25 |
"source=vscode-extensions-python,target=/src/.vscode-server/extensions,type=volume" |
21 | 26 |
] |
modules/crawler/DatasetConfigs/JIS.yaml | ||
---|---|---|
131 | 131 |
y: 13.3586728 |
132 | 132 |
|
133 | 133 |
- STUD_UB211: |
134 |
x: 49.7251661
|
|
135 |
y: 13.3530436
|
|
134 |
x: 49.7256172
|
|
135 |
y: 13.3532294
|
|
136 | 136 |
|
137 | 137 |
- KL-Posilovna: |
138 | 138 |
x: 49.7368708 |
... | ... | |
179 | 179 |
y: 13.37270 |
180 | 180 |
|
181 | 181 |
- STUD_UB113: |
182 |
x: 49.7253256
|
|
183 |
y: 13.3531456
|
|
182 |
x: 49.7256406
|
|
183 |
y: 13.3531944
|
|
184 | 184 |
|
185 | 185 |
- Menza1-kasa-p: |
186 | 186 |
x: 49.7473178 |
modules/crawler/DatasetConfigs/OBSAZENIMISTNOSTI.yaml | ||
---|---|---|
1 |
# jmeno datasetu, pod kterym bude zobrazen v aplikaci |
|
2 |
display-name: Obsazení místností |
|
3 |
# jednoslovný název datasetu, pod kterym bude reprezentovana v architektuře |
|
4 |
dataset-name: OBSAZENIMISTNOSTI |
|
5 |
# root slozka, ktera obsahuje odkazy na dataset |
|
6 |
url: https://openstore.zcu.cz/ |
|
7 |
# volitelny parameter, ktery specifikuje vzor jmena datasetu, ktera se budou stahovat |
|
8 |
regex: OD_ZCU_OBSAZENI_[0-9][0-9]_[0-9][0-9][0-9][0-9]_CSV.zip |
|
9 |
# volitelny parametr, ktery udava jednou za kolik dní se budou nová data vyhledávat |
|
10 |
update-period: 24 |
|
11 |
# pozice jednotlivych zarizeni, ktera jsou v datasetu |
|
12 |
devices: |
|
13 |
- EP-110: |
|
14 |
x: 49.7240758 |
|
15 |
y: 13.3503422 |
|
16 |
- EP-120: |
|
17 |
x: 49.7242717 |
|
18 |
y: 13.3502872 |
|
19 |
- EP-130: |
|
20 |
x: 49.7242311 |
|
21 |
y: 13.3506561 |
|
22 |
- EP-206: |
|
23 |
x: 49.7240550 |
|
24 |
y: 13.3506817 |
|
25 |
- EP-208: |
|
26 |
x: 49.7239544 |
|
27 |
y: 13.3507031 |
|
28 |
- UF-124: |
|
29 |
x: 49.7253672 |
|
30 |
y: 13.3522111 |
|
31 |
- UP-101: |
|
32 |
x: 49.7247431 |
|
33 |
y: 13.3504881 |
|
34 |
- UP-104: |
|
35 |
x: 49.7247206 |
|
36 |
y: 13.3502172 |
|
37 |
- UP-108: |
|
38 |
x: 49.7249114 |
|
39 |
y: 13.3501636 |
|
40 |
- UP-112: |
|
41 |
x: 49.7249600 |
|
42 |
y: 13.3504022 |
|
43 |
- UP-115: |
|
44 |
x: 49.7249669 |
|
45 |
y: 13.3504667 |
|
46 |
- US-207: |
|
47 |
x: 49.7268203 |
|
48 |
y: 13.3517944 |
|
49 |
- US-217: |
|
50 |
x: 49.7268950 |
|
51 |
y: 13.3515047 |
|
52 |
- UU-104: |
|
53 |
x: 49.7248192 |
|
54 |
y: 13.3521414 |
|
55 |
- UU-106: |
|
56 |
x: 49.7248089 |
|
57 |
y: 13.3519550 |
|
58 |
- UU-206: |
|
59 |
x: 49.7248089 |
|
60 |
y: 13.3519550 |
|
61 |
- UU-305b: |
|
62 |
x: 49.7248072 |
|
63 |
y: 13.3520450 |
|
64 |
- UU-305: |
|
65 |
x: 49.7248089 |
|
66 |
y: 13.3519550 |
|
67 |
- UU-306: |
|
68 |
x: 49.7249978 |
|
69 |
y: 13.3518961 |
|
70 |
- UU-307: |
|
71 |
x: 49.7246900 |
|
72 |
y: 13.3518664 |
|
73 |
- UU-308: |
|
74 |
x: 49.7248919 |
|
75 |
y: 13.3518222 |
|
76 |
- UU-309: |
|
77 |
x: 49.7248764 |
|
78 |
y: 13.3516278 |
|
79 |
|
|
80 |
- SP-107: |
|
81 |
x: UNKNOWN! |
|
82 |
y: UNKNOWN! |
|
83 |
|
|
84 |
- JJ-302: |
|
85 |
x: UNKNOWN! |
|
86 |
y: UNKNOWN! |
|
87 |
|
|
88 |
- VC-333: |
|
89 |
x: UNKNOWN! |
|
90 |
y: UNKNOWN! |
|
91 |
|
|
92 |
- UL-303: |
|
93 |
x: UNKNOWN! |
|
94 |
y: UNKNOWN! |
|
95 |
|
|
96 |
- JJ-310: |
|
97 |
x: UNKNOWN! |
|
98 |
y: UNKNOWN! |
|
99 |
|
|
100 |
- JJ-215: |
|
101 |
x: UNKNOWN! |
|
102 |
y: UNKNOWN! |
|
103 |
|
|
104 |
- UL-608: |
|
105 |
x: UNKNOWN! |
|
106 |
y: UNKNOWN! |
|
107 |
|
|
108 |
- RJ-208: |
|
109 |
x: UNKNOWN! |
|
110 |
y: UNKNOWN! |
|
111 |
|
|
112 |
- KL-311: |
|
113 |
x: UNKNOWN! |
|
114 |
y: UNKNOWN! |
|
115 |
|
|
116 |
- CH-118: |
|
117 |
x: UNKNOWN! |
|
118 |
y: UNKNOWN! |
|
119 |
|
|
120 |
- UC-234: |
|
121 |
x: UNKNOWN! |
|
122 |
y: UNKNOWN! |
|
123 |
|
|
124 |
- SP-108: |
|
125 |
x: UNKNOWN! |
|
126 |
y: UNKNOWN! |
|
127 |
|
|
128 |
- JJ-207: |
|
129 |
x: UNKNOWN! |
|
130 |
y: UNKNOWN! |
|
131 |
|
|
132 |
- UC-452: |
|
133 |
x: UNKNOWN! |
|
134 |
y: UNKNOWN! |
|
135 |
|
|
136 |
- CH-214: |
|
137 |
x: UNKNOWN! |
|
138 |
y: UNKNOWN! |
|
139 |
|
|
140 |
- CH-105: |
|
141 |
x: UNKNOWN! |
|
142 |
y: UNKNOWN! |
|
143 |
|
|
144 |
- UC-334: |
|
145 |
x: 49.7268169 |
|
146 |
y: 13.3525456 |
|
147 |
|
|
148 |
- KL-310: |
|
149 |
x: UNKNOWN! |
|
150 |
y: UNKNOWN! |
|
151 |
|
|
152 |
- PC-124: |
|
153 |
x: UNKNOWN! |
|
154 |
y: UNKNOWN! |
|
155 |
|
|
156 |
- CD-121: |
|
157 |
x: UNKNOWN! |
|
158 |
y: UNKNOWN! |
|
159 |
|
|
160 |
- UB-106: |
|
161 |
x: UNKNOWN! |
|
162 |
y: UNKNOWN! |
|
163 |
|
|
164 |
- KL-117: |
|
165 |
x: UNKNOWN! |
|
166 |
y: UNKNOWN! |
|
167 |
|
|
168 |
- UL-109: |
|
169 |
x: UNKNOWN! |
|
170 |
y: UNKNOWN! |
|
171 |
|
|
172 |
- UC-411: |
|
173 |
x: UNKNOWN! |
|
174 |
y: UNKNOWN! |
|
175 |
|
|
176 |
- UL-610: |
|
177 |
x: UNKNOWN! |
|
178 |
y: UNKNOWN! |
|
179 |
|
|
180 |
- CH-112: |
|
181 |
x: UNKNOWN! |
|
182 |
y: UNKNOWN! |
|
183 |
|
|
184 |
- VC-222: |
|
185 |
x: UNKNOWN! |
|
186 |
y: UNKNOWN! |
|
187 |
|
|
188 |
- UC-439: |
|
189 |
x: UNKNOWN! |
|
190 |
y: UNKNOWN! |
|
191 |
|
|
192 |
- JJ-304: |
|
193 |
x: UNKNOWN! |
|
194 |
y: UNKNOWN! |
|
195 |
|
|
196 |
- JJ-202: |
|
197 |
x: UNKNOWN! |
|
198 |
y: UNKNOWN! |
|
199 |
|
|
200 |
- KL-137: |
|
201 |
x: UNKNOWN! |
|
202 |
y: UNKNOWN! |
|
203 |
|
|
204 |
- KL-308: |
|
205 |
x: UNKNOWN! |
|
206 |
y: UNKNOWN! |
|
207 |
|
|
208 |
- UC-410: |
|
209 |
x: UNKNOWN! |
|
210 |
y: UNKNOWN! |
|
211 |
|
|
212 |
- UX-236: |
|
213 |
x: UNKNOWN! |
|
214 |
y: UNKNOWN! |
|
215 |
|
|
216 |
- KL-018: |
|
217 |
x: UNKNOWN! |
|
218 |
y: UNKNOWN! |
|
219 |
|
|
220 |
- KL-321: |
|
221 |
x: UNKNOWN! |
|
222 |
y: UNKNOWN! |
|
223 |
|
|
224 |
- HJ-308: |
|
225 |
x: UNKNOWN! |
|
226 |
y: UNKNOWN! |
|
227 |
|
|
228 |
- JJ-203: |
|
229 |
x: UNKNOWN! |
|
230 |
y: UNKNOWN! |
|
231 |
|
|
232 |
- JJ-126: |
|
233 |
x: UNKNOWN! |
|
234 |
y: UNKNOWN! |
|
235 |
|
|
236 |
- KL-210: |
|
237 |
x: UNKNOWN! |
|
238 |
y: UNKNOWN! |
|
239 |
|
|
240 |
- CH-210: |
|
241 |
x: UNKNOWN! |
|
242 |
y: UNKNOWN! |
|
243 |
|
|
244 |
- VC-331: |
|
245 |
x: UNKNOWN! |
|
246 |
y: UNKNOWN! |
|
247 |
|
|
248 |
- CH-213: |
|
249 |
x: UNKNOWN! |
|
250 |
y: UNKNOWN! |
|
251 |
|
|
252 |
- EK-715: |
|
253 |
x: UNKNOWN! |
|
254 |
y: UNKNOWN! |
|
255 |
|
|
256 |
- UC-328: |
|
257 |
x: UNKNOWN! |
|
258 |
y: UNKNOWN! |
|
259 |
|
|
260 |
- SP-312: |
|
261 |
x: UNKNOWN! |
|
262 |
y: UNKNOWN! |
|
263 |
|
|
264 |
- TS-330: |
|
265 |
x: UNKNOWN! |
|
266 |
y: UNKNOWN! |
|
267 |
|
|
268 |
- UL-210: |
|
269 |
x: UNKNOWN! |
|
270 |
y: UNKNOWN! |
|
271 |
|
|
272 |
- EL-209: |
|
273 |
x: UNKNOWN! |
|
274 |
y: UNKNOWN! |
|
275 |
|
|
276 |
- KL-326: |
|
277 |
x: UNKNOWN! |
|
278 |
y: UNKNOWN! |
|
279 |
|
|
280 |
- UL-305: |
|
281 |
x: UNKNOWN! |
|
282 |
y: UNKNOWN! |
|
283 |
|
|
284 |
- CH-120: |
|
285 |
x: UNKNOWN! |
|
286 |
y: UNKNOWN! |
|
287 |
|
|
288 |
- SD-200: |
|
289 |
x: UNKNOWN! |
|
290 |
y: UNKNOWN! |
|
291 |
|
|
292 |
- UL-607: |
|
293 |
x: UNKNOWN! |
|
294 |
y: UNKNOWN! |
|
295 |
|
|
296 |
- EU-303: |
|
297 |
x: UNKNOWN! |
|
298 |
y: UNKNOWN! |
|
299 |
|
|
300 |
- JJ-305: |
|
301 |
x: UNKNOWN! |
|
302 |
y: UNKNOWN! |
|
303 |
|
|
304 |
- UL-307: |
|
305 |
x: UNKNOWN! |
|
306 |
y: UNKNOWN! |
|
307 |
|
|
308 |
- UL-211: |
|
309 |
x: UNKNOWN! |
|
310 |
y: UNKNOWN! |
|
311 |
|
|
312 |
- UF-024: |
|
313 |
x: UNKNOWN! |
|
314 |
y: UNKNOWN! |
|
315 |
|
|
316 |
- UF-106: |
|
317 |
x: UNKNOWN! |
|
318 |
y: UNKNOWN! |
|
319 |
|
|
320 |
- UC-233: |
|
321 |
x: UNKNOWN! |
|
322 |
y: UNKNOWN! |
|
323 |
|
|
324 |
- CH-110: |
|
325 |
x: UNKNOWN! |
|
326 |
y: UNKNOWN! |
|
327 |
|
|
328 |
- UK-311: |
|
329 |
x: UNKNOWN! |
|
330 |
y: UNKNOWN! |
|
331 |
|
|
332 |
- EU-108: |
|
333 |
x: UNKNOWN! |
|
334 |
y: UNKNOWN! |
|
335 |
|
|
336 |
- HJ-117: |
|
337 |
x: UNKNOWN! |
|
338 |
y: UNKNOWN! |
|
339 |
|
|
340 |
- UF-140: |
|
341 |
x: UNKNOWN! |
|
342 |
y: UNKNOWN! |
|
343 |
|
|
344 |
- CH-211: |
|
345 |
x: UNKNOWN! |
|
346 |
y: UNKNOWN! |
|
347 |
|
|
348 |
- KL-416: |
|
349 |
x: UNKNOWN! |
|
350 |
y: UNKNOWN! |
|
351 |
|
|
352 |
- JJ-303: |
|
353 |
x: UNKNOWN! |
|
354 |
y: UNKNOWN! |
|
355 |
|
|
356 |
- UF-207: |
|
357 |
x: UNKNOWN! |
|
358 |
y: UNKNOWN! |
|
359 |
|
|
360 |
- UC-409: |
|
361 |
x: UNKNOWN! |
|
362 |
y: UNKNOWN! |
|
363 |
|
|
364 |
- CH-207: |
|
365 |
x: UNKNOWN! |
|
366 |
y: UNKNOWN! |
|
367 |
|
|
368 |
- UL-202: |
|
369 |
x: UNKNOWN! |
|
370 |
y: UNKNOWN! |
|
371 |
|
|
372 |
- UL-410: |
|
373 |
x: UNKNOWN! |
|
374 |
y: UNKNOWN! |
|
375 |
|
|
376 |
- SD-325: |
|
377 |
x: UNKNOWN! |
|
378 |
y: UNKNOWN! |
|
379 |
|
|
380 |
- EU-305: |
|
381 |
x: UNKNOWN! |
|
382 |
y: UNKNOWN! |
|
383 |
|
|
384 |
- UL-105: |
|
385 |
x: UNKNOWN! |
|
386 |
y: UNKNOWN! |
|
387 |
|
|
388 |
- UL-112: |
|
389 |
x: UNKNOWN! |
|
390 |
y: UNKNOWN! |
|
391 |
|
|
392 |
- CH-104: |
|
393 |
x: UNKNOWN! |
|
394 |
y: UNKNOWN! |
|
395 |
|
|
396 |
- KO-106: |
|
397 |
x: UNKNOWN! |
|
398 |
y: UNKNOWN! |
|
399 |
|
|
400 |
- CH-309: |
|
401 |
x: UNKNOWN! |
|
402 |
y: UNKNOWN! |
|
403 |
|
|
404 |
- UC-422: |
|
405 |
x: UNKNOWN! |
|
406 |
y: UNKNOWN! |
|
407 |
|
|
408 |
- UL-208: |
|
409 |
x: UNKNOWN! |
|
410 |
y: UNKNOWN! |
|
411 |
|
|
412 |
- UL-103: |
|
413 |
x: UNKNOWN! |
|
414 |
y: UNKNOWN! |
|
415 |
|
|
416 |
- VC-105: |
|
417 |
x: UNKNOWN! |
|
418 |
y: UNKNOWN! |
|
419 |
|
|
420 |
- VC-211: |
|
421 |
x: UNKNOWN! |
|
422 |
y: UNKNOWN! |
|
423 |
|
|
424 |
- JJ-219: |
|
425 |
x: UNKNOWN! |
|
426 |
y: UNKNOWN! |
|
427 |
|
|
428 |
- ST-210: |
|
429 |
x: UNKNOWN! |
|
430 |
y: UNKNOWN! |
|
431 |
|
|
432 |
- EU-311: |
|
433 |
x: UNKNOWN! |
|
434 |
y: UNKNOWN! |
|
435 |
|
|
436 |
- EU-109: |
|
437 |
x: UNKNOWN! |
|
438 |
y: UNKNOWN! |
|
439 |
|
|
440 |
- UL-309: |
|
441 |
x: UNKNOWN! |
|
442 |
y: UNKNOWN! |
|
443 |
|
|
444 |
- UL-104: |
|
445 |
x: UNKNOWN! |
|
446 |
y: UNKNOWN! |
|
447 |
|
|
448 |
- SP-503: |
|
449 |
x: UNKNOWN! |
|
450 |
y: UNKNOWN! |
|
451 |
|
|
452 |
- CH-212: |
|
453 |
x: UNKNOWN! |
|
454 |
y: UNKNOWN! |
|
455 |
|
|
456 |
- UF-142: |
|
457 |
x: UNKNOWN! |
|
458 |
y: UNKNOWN! |
|
459 |
|
|
460 |
- UU-208: |
|
461 |
x: UNKNOWN! |
|
462 |
y: UNKNOWN! |
|
463 |
|
|
464 |
- KL-220: |
|
465 |
x: UNKNOWN! |
|
466 |
y: UNKNOWN! |
|
467 |
|
|
468 |
- UF-026: |
|
469 |
x: UNKNOWN! |
|
470 |
y: UNKNOWN! |
|
471 |
|
|
472 |
- CD-201: |
|
473 |
x: UNKNOWN! |
|
474 |
y: UNKNOWN! |
|
475 |
|
|
476 |
- VC-329: |
|
477 |
x: UNKNOWN! |
|
478 |
y: UNKNOWN! |
|
479 |
|
|
480 |
- SP-210: |
|
481 |
x: UNKNOWN! |
|
482 |
y: UNKNOWN! |
|
483 |
|
|
484 |
- UC-107: |
|
485 |
x: UNKNOWN! |
|
486 |
y: UNKNOWN! |
|
487 |
|
|
488 |
- EL-311: |
|
489 |
x: UNKNOWN! |
|
490 |
y: UNKNOWN! |
|
491 |
|
|
492 |
- SP-319: |
|
493 |
x: UNKNOWN! |
|
494 |
y: UNKNOWN! |
|
495 |
|
|
496 |
- HJ-103: |
|
497 |
x: UNKNOWN! |
|
498 |
y: UNKNOWN! |
|
499 |
|
|
500 |
- UU-105: |
|
501 |
x: UNKNOWN! |
|
502 |
y: UNKNOWN! |
|
503 |
|
|
504 |
- HJ-200: |
|
505 |
x: UNKNOWN! |
|
506 |
y: UNKNOWN! |
|
507 |
|
|
508 |
- KL-236: |
|
509 |
x: UNKNOWN! |
|
510 |
y: UNKNOWN! |
|
511 |
|
|
512 |
- ST-304: |
|
513 |
x: UNKNOWN! |
|
514 |
y: UNKNOWN! |
|
515 |
|
|
516 |
- JJ-221: |
|
517 |
x: UNKNOWN! |
|
518 |
y: UNKNOWN! |
|
519 |
|
|
520 |
- EU-111: |
|
521 |
x: UNKNOWN! |
|
522 |
y: UNKNOWN! |
|
523 |
|
|
524 |
- SO-118: |
|
525 |
x: UNKNOWN! |
|
526 |
y: UNKNOWN! |
|
527 |
|
|
528 |
- UU-108: |
|
529 |
x: UNKNOWN! |
|
530 |
y: UNKNOWN! |
|
531 |
|
|
532 |
- EU-106: |
|
533 |
x: UNKNOWN! |
|
534 |
y: UNKNOWN! |
|
535 |
|
|
536 |
- UC-236: |
|
537 |
x: UNKNOWN! |
|
538 |
y: UNKNOWN! |
|
539 |
|
|
540 |
- UL-311: |
|
541 |
x: UNKNOWN! |
|
542 |
y: UNKNOWN! |
|
543 |
|
|
544 |
- CH-303: |
|
545 |
x: UNKNOWN! |
|
546 |
y: UNKNOWN! |
|
547 |
|
|
548 |
- UF-114: |
|
549 |
x: UNKNOWN! |
|
550 |
y: UNKNOWN! |
|
551 |
|
|
552 |
- UL-602: |
|
553 |
x: UNKNOWN! |
|
554 |
y: UNKNOWN! |
|
555 |
|
|
556 |
- SD-206a: |
|
557 |
x: UNKNOWN! |
|
558 |
y: UNKNOWN! |
|
559 |
|
|
560 |
- ST-306: |
|
561 |
x: UNKNOWN! |
|
562 |
y: UNKNOWN! |
|
563 |
|
|
564 |
- UL-205: |
|
565 |
x: UNKNOWN! |
|
566 |
y: UNKNOWN! |
|
567 |
|
|
568 |
- VC-330: |
|
569 |
x: UNKNOWN! |
|
570 |
y: UNKNOWN! |
|
571 |
|
|
572 |
- UL-203: |
|
573 |
x: UNKNOWN! |
|
574 |
y: UNKNOWN! |
|
575 |
|
|
576 |
- UL-207: |
|
577 |
x: UNKNOWN! |
|
578 |
y: UNKNOWN! |
|
579 |
|
|
580 |
- EU-506: |
|
581 |
x: UNKNOWN! |
|
582 |
y: UNKNOWN! |
|
583 |
|
|
584 |
- VC-208: |
|
585 |
x: UNKNOWN! |
|
586 |
y: UNKNOWN! |
|
587 |
|
|
588 |
- SP-309: |
|
589 |
x: UNKNOWN! |
|
590 |
y: UNKNOWN! |
|
591 |
|
|
592 |
- KL-228: |
|
593 |
x: UNKNOWN! |
|
594 |
y: UNKNOWN! |
|
595 |
|
|
596 |
- EU-302: |
|
597 |
x: UNKNOWN! |
|
598 |
y: UNKNOWN! |
|
599 |
|
|
600 |
- VC-224: |
|
601 |
x: UNKNOWN! |
|
602 |
y: UNKNOWN! |
|
603 |
|
|
604 |
- CH-307: |
|
605 |
x: UNKNOWN! |
|
606 |
y: UNKNOWN! |
|
607 |
|
|
608 |
- UL-108: |
|
609 |
x: UNKNOWN! |
|
610 |
y: UNKNOWN! |
|
611 |
|
|
612 |
- EU-104: |
|
613 |
x: UNKNOWN! |
|
614 |
y: UNKNOWN! |
|
615 |
|
|
616 |
- UK-221: |
|
617 |
x: UNKNOWN! |
|
618 |
y: UNKNOWN! |
|
619 |
|
|
620 |
- VC-223: |
|
621 |
x: UNKNOWN! |
|
622 |
y: UNKNOWN! |
|
623 |
|
|
624 |
- SP-504: |
|
625 |
x: UNKNOWN! |
|
626 |
y: UNKNOWN! |
|
627 |
|
|
628 |
- UX-127: |
|
629 |
x: UNKNOWN! |
|
630 |
y: UNKNOWN! |
|
631 |
|
|
632 |
- SP-208: |
|
633 |
x: UNKNOWN! |
|
634 |
y: UNKNOWN! |
|
635 |
|
|
636 |
- UC-453: |
|
637 |
x: UNKNOWN! |
|
638 |
y: UNKNOWN! |
|
639 |
|
|
640 |
- CH-209: |
|
641 |
x: UNKNOWN! |
|
642 |
y: UNKNOWN! |
|
643 |
|
|
644 |
- CD-154: |
|
645 |
x: UNKNOWN! |
|
646 |
y: UNKNOWN! |
|
647 |
|
|
648 |
- UL-302: |
|
649 |
x: UNKNOWN! |
|
650 |
y: UNKNOWN! |
|
651 |
|
|
652 |
- EU-505: |
|
653 |
x: UNKNOWN! |
|
654 |
y: UNKNOWN! |
|
655 |
|
|
656 |
- UC-241: |
|
657 |
x: UNKNOWN! |
|
658 |
y: UNKNOWN! |
|
659 |
|
|
660 |
- HJ-301: |
|
661 |
x: UNKNOWN! |
|
662 |
y: UNKNOWN! |
|
663 |
|
|
664 |
- UF-122: |
|
665 |
x: UNKNOWN! |
|
666 |
y: UNKNOWN! |
|
667 |
|
|
668 |
- CH-208: |
|
669 |
x: UNKNOWN! |
|
670 |
y: UNKNOWN! |
|
671 |
|
|
672 |
- UK-226: |
|
673 |
x: UNKNOWN! |
|
674 |
y: UNKNOWN! |
|
675 |
|
|
676 |
- UC-432: |
|
677 |
x: UNKNOWN! |
|
678 |
y: UNKNOWN! |
|
679 |
|
|
680 |
- EU-102: |
|
681 |
x: UNKNOWN! |
|
682 |
y: UNKNOWN! |
|
683 |
|
|
684 |
- PC-116: |
|
685 |
x: UNKNOWN! |
|
686 |
y: UNKNOWN! |
|
687 |
|
|
688 |
- CH-308: |
|
689 |
x: UNKNOWN! |
|
690 |
y: UNKNOWN! |
|
691 |
|
|
692 |
- CD-254: |
|
693 |
x: UNKNOWN! |
|
694 |
y: UNKNOWN! |
|
695 |
|
|
696 |
- VC-302: |
|
697 |
x: UNKNOWN! |
|
698 |
y: UNKNOWN! |
|
699 |
|
|
700 |
- RJ-209: |
|
701 |
x: UNKNOWN! |
|
702 |
y: UNKNOWN! |
|
703 |
|
|
704 |
- HJ-306: |
|
705 |
x: UNKNOWN! |
|
706 |
y: UNKNOWN! |
|
707 |
|
|
708 |
- HJ-100: |
|
709 |
x: UNKNOWN! |
|
710 |
y: UNKNOWN! |
|
711 |
|
|
712 |
- RJ-317: |
|
713 |
x: UNKNOWN! |
|
714 |
y: UNKNOWN! |
|
715 |
|
|
716 |
- RJ-315: |
|
717 |
x: UNKNOWN! |
|
718 |
y: UNKNOWN! |
|
719 |
|
|
720 |
- KL-206: |
|
721 |
x: UNKNOWN! |
|
722 |
y: UNKNOWN! |
|
723 |
|
|
724 |
- SP-111: |
|
725 |
x: UNKNOWN! |
|
726 |
y: UNKNOWN! |
|
727 |
|
|
728 |
- CH-203: |
|
729 |
x: UNKNOWN! |
|
730 |
y: UNKNOWN! |
|
731 |
|
|
732 |
- ST-201: |
|
733 |
x: UNKNOWN! |
|
734 |
y: UNKNOWN! |
|
735 |
|
|
736 |
- KL-323: |
|
737 |
x: UNKNOWN! |
|
738 |
y: UNKNOWN! |
|
739 |
|
|
740 |
- UL-206: |
|
741 |
x: UNKNOWN! |
|
742 |
y: UNKNOWN! |
|
743 |
|
|
744 |
- UC-210: |
|
745 |
x: UNKNOWN! |
|
746 |
y: UNKNOWN! |
|
747 |
|
|
748 |
- UL-110: |
|
749 |
x: UNKNOWN! |
|
750 |
y: UNKNOWN! |
|
751 |
|
|
752 |
- HJ-300: |
|
753 |
x: UNKNOWN! |
|
754 |
y: UNKNOWN! |
|
755 |
|
|
756 |
- UL-209: |
|
757 |
x: UNKNOWN! |
|
758 |
y: UNKNOWN! |
|
759 |
|
|
760 |
- JJ-325: |
|
761 |
x: UNKNOWN! |
|
762 |
y: UNKNOWN! |
|
763 |
|
|
764 |
- VC-332: |
|
765 |
x: UNKNOWN! |
|
766 |
y: UNKNOWN! |
|
767 |
|
|
768 |
- KL-302: |
|
769 |
x: UNKNOWN! |
|
770 |
y: UNKNOWN! |
|
771 |
|
|
772 |
- HJ-309: |
|
773 |
x: UNKNOWN! |
|
774 |
y: UNKNOWN! |
|
775 |
|
|
776 |
- UL-107: |
|
777 |
x: UNKNOWN! |
|
778 |
y: UNKNOWN! |
|
779 |
|
|
780 |
- EL-510: |
|
781 |
x: UNKNOWN! |
|
782 |
y: UNKNOWN! |
|
783 |
|
|
784 |
- HJ-310: |
|
785 |
x: UNKNOWN! |
|
786 |
y: UNKNOWN! |
|
787 |
|
|
788 |
- VC-221: |
|
789 |
x: UNKNOWN! |
|
790 |
y: UNKNOWN! |
|
791 |
|
|
792 |
- UF-223: |
|
793 |
x: UNKNOWN! |
|
794 |
y: UNKNOWN! |
|
795 |
|
|
796 |
- RJ-205: |
|
797 |
x: UNKNOWN! |
|
798 |
y: UNKNOWN! |
|
799 |
|
|
800 |
- KL-201: |
|
801 |
x: UNKNOWN! |
|
802 |
y: UNKNOWN! |
|
803 |
|
|
804 |
- CH-304: |
|
805 |
x: UNKNOWN! |
|
806 |
y: UNKNOWN! |
|
807 |
|
|
808 |
- SP-320: |
|
809 |
x: UNKNOWN! |
|
810 |
y: UNKNOWN! |
|
811 |
|
|
812 |
- HJ-302: |
|
813 |
x: UNKNOWN! |
|
814 |
y: UNKNOWN! |
|
815 |
|
|
816 |
- SD-203: |
|
817 |
x: UNKNOWN! |
|
818 |
y: UNKNOWN! |
|
819 |
|
|
820 |
- CD-304: |
|
821 |
x: UNKNOWN! |
|
822 |
y: UNKNOWN! |
|
823 |
|
|
824 |
- SP-417: |
|
825 |
x: UNKNOWN! |
|
826 |
y: UNKNOWN! |
|
827 |
|
|
828 |
- SP-420: |
|
829 |
x: UNKNOWN! |
|
830 |
y: UNKNOWN! |
|
831 |
|
|
832 |
- UL-409: |
|
833 |
x: UNKNOWN! |
|
834 |
y: UNKNOWN! |
|
835 |
|
|
836 |
- UU-209: |
|
837 |
x: UNKNOWN! |
|
838 |
y: UNKNOWN! |
|
839 |
|
|
840 |
- PC-423: |
|
841 |
x: UNKNOWN! |
|
842 |
y: UNKNOWN! |
|
843 |
|
|
844 |
- UX-234a: |
|
845 |
x: UNKNOWN! |
|
846 |
y: UNKNOWN! |
|
847 |
|
|
848 |
- KL-213: |
|
849 |
x: UNKNOWN! |
|
850 |
y: UNKNOWN! |
|
851 |
|
|
852 |
- KL-147: |
|
853 |
x: UNKNOWN! |
|
854 |
y: UNKNOWN! |
|
855 |
|
|
856 |
- UC-311: |
|
857 |
x: UNKNOWN! |
|
858 |
y: UNKNOWN! |
|
859 |
|
|
860 |
- UC-237: |
|
861 |
x: UNKNOWN! |
|
862 |
y: UNKNOWN! |
|
863 |
|
|
864 |
- VC-202: |
|
865 |
x: UNKNOWN! |
|
866 |
y: UNKNOWN! |
|
867 |
|
|
868 |
- JJ-324: |
|
869 |
x: UNKNOWN! |
|
870 |
y: UNKNOWN! |
|
871 |
|
|
872 |
- VC-107: |
|
873 |
x: UNKNOWN! |
|
874 |
y: UNKNOWN! |
|
875 |
|
|
876 |
- UC-235: |
|
877 |
x: UNKNOWN! |
|
878 |
y: UNKNOWN! |
|
879 |
|
|
880 |
- VC-301: |
|
881 |
x: UNKNOWN! |
|
882 |
y: UNKNOWN! |
|
883 |
|
|
884 |
- UF-138: |
|
885 |
x: UNKNOWN! |
|
886 |
y: UNKNOWN! |
|
887 |
|
|
888 |
- KL-305: |
|
889 |
x: UNKNOWN! |
|
890 |
y: UNKNOWN! |
|
891 |
|
|
892 |
- HJ-008: |
|
893 |
x: UNKNOWN! |
|
894 |
y: UNKNOWN! |
|
895 |
|
|
896 |
- HJ-303: |
|
897 |
x: UNKNOWN! |
|
898 |
y: UNKNOWN! |
|
899 |
|
|
900 |
- KL-242: |
|
901 |
x: UNKNOWN! |
|
902 |
y: UNKNOWN! |
|
903 |
|
|
904 |
- JJ-331: |
|
905 |
x: UNKNOWN! |
|
906 |
y: UNKNOWN! |
|
907 |
|
|
908 |
- CH-311: |
|
909 |
x: UNKNOWN! |
|
910 |
y: UNKNOWN! |
|
911 |
|
|
912 |
- EU-304: |
|
913 |
x: UNKNOWN! |
|
914 |
y: UNKNOWN! |
|
915 |
|
|
916 |
- ST-309: |
|
917 |
x: UNKNOWN! |
|
918 |
y: UNKNOWN! |
|
919 |
|
|
920 |
- UC-332: |
|
921 |
x: UNKNOWN! |
|
922 |
y: UNKNOWN! |
|
923 |
|
|
924 |
- RJ-211: |
|
925 |
x: UNKNOWN! |
|
926 |
y: UNKNOWN! |
|
927 |
|
|
928 |
- SD-204: |
|
929 |
x: UNKNOWN! |
|
930 |
y: UNKNOWN! |
|
931 |
|
|
932 |
- UC-333: |
|
933 |
x: UNKNOWN! |
|
934 |
y: UNKNOWN! |
|
935 |
|
|
936 |
- KL-317: |
|
937 |
x: UNKNOWN! |
|
938 |
y: UNKNOWN! |
|
939 |
|
|
940 |
- CH-310: |
|
941 |
x: UNKNOWN! |
|
942 |
y: UNKNOWN! |
|
943 |
|
|
944 |
- EU-503: |
|
945 |
x: UNKNOWN! |
|
946 |
y: UNKNOWN! |
|
947 |
|
|
948 |
- UC-329: |
|
949 |
x: UNKNOWN! |
|
950 |
y: UNKNOWN! |
|
951 |
|
|
952 |
- PC-408: |
|
953 |
x: UNKNOWN! |
|
954 |
y: UNKNOWN! |
|
955 |
|
|
956 |
- UC-326: |
|
957 |
x: UNKNOWN! |
|
958 |
y: UNKNOWN! |
|
959 |
|
|
960 |
- HJ-102: |
|
961 |
x: UNKNOWN! |
|
962 |
y: UNKNOWN! |
|
963 |
|
|
964 |
- CH-107: |
|
965 |
x: UNKNOWN! |
|
966 |
y: UNKNOWN! |
|
967 |
|
|
968 |
- UC-327: |
|
969 |
x: UNKNOWN! |
|
970 |
y: UNKNOWN! |
|
971 |
|
|
972 |
- HJ-305: |
|
973 |
x: UNKNOWN! |
|
974 |
y: UNKNOWN! |
|
975 |
|
|
976 |
- CH-204: |
|
977 |
x: UNKNOWN! |
|
978 |
y: UNKNOWN! |
|
979 |
|
|
980 |
- ST-408: |
|
981 |
x: UNKNOWN! |
|
982 |
y: UNKNOWN! |
|
983 |
|
|
984 |
- KL-315: |
|
985 |
x: UNKNOWN! |
|
986 |
y: UNKNOWN! |
|
987 |
|
|
988 |
- KL-303: |
|
989 |
x: UNKNOWN! |
|
990 |
y: UNKNOWN! |
|
991 |
|
|
992 |
- UN-657: |
|
993 |
x: UNKNOWN! |
|
994 |
y: UNKNOWN! |
|
995 |
|
|
996 |
- HJ-218: |
|
997 |
x: UNKNOWN! |
|
998 |
y: UNKNOWN! |
modules/crawler/DatasetCrawler/OBSAZENIMISTNOSTI_crawler.py | ||
---|---|---|
1 |
from Utilities import folder_processor |
|
2 |
from Utilities.Crawler import basic_crawler_functions |
|
3 |
|
|
4 |
# Path to crawled data |
|
5 |
CRAWLED_DATA_PATH = "CrawledData/" |
|
6 |
|
|
7 |
|
|
8 |
def crawl(config): |
|
9 |
""" |
|
10 |
Implement crawl method that downloads new data to path_for_files |
|
11 |
For keeping the project structure |
|
12 |
url , regex, and dataset_name from config |
|
13 |
You can use already implemented functions from Utilities/Crawler/basic_crawler_functions.py |
|
14 |
|
|
15 |
Args: |
|
16 |
config: loaded configuration file of dataset |
|
17 |
""" |
|
18 |
dataset_name = config["dataset-name"] |
|
19 |
url = config['url'] |
|
20 |
regex = config['regex'] |
|
21 |
path_for_files = CRAWLED_DATA_PATH + dataset_name + '/' |
|
22 |
|
|
23 |
first_level_links = basic_crawler_functions.get_all_links(url) |
|
24 |
|
|
25 |
filtered_first_level_links = basic_crawler_functions.filter_links( |
|
26 |
first_level_links, "^OD_ZCU") |
|
27 |
|
|
28 |
OFFSET_YEAR_START = -5 |
|
29 |
OFFSET_YEAR_END = -1 |
|
30 |
MONTH_SIZE = 2 |
|
31 |
|
|
32 |
#Seperate links by year |
|
33 |
links_by_year = {} |
|
34 |
for item in filtered_first_level_links: |
|
35 |
if item[OFFSET_YEAR_START:OFFSET_YEAR_END] not in links_by_year: |
|
36 |
links_by_year[item[OFFSET_YEAR_START:OFFSET_YEAR_END]] = [] |
|
37 |
else: |
|
38 |
links_by_year[item[OFFSET_YEAR_START:OFFSET_YEAR_END]].append(item) |
|
39 |
|
|
40 |
#Latest links of years to array |
|
41 |
links = [] |
|
42 |
for _key, value in links_by_year.items(): |
|
43 |
links.append( |
|
44 |
max(value, |
|
45 |
key=lambda x: int(x[OFFSET_YEAR_START - MONTH_SIZE - 1: |
|
46 |
OFFSET_YEAR_START - 1]))) |
|
47 |
|
|
48 |
absolute_first_level_links = basic_crawler_functions.create_absolute_links( |
|
49 |
links, url) |
|
50 |
|
|
51 |
files = [] |
|
52 |
|
|
53 |
for link in absolute_first_level_links: |
|
54 |
second_level_links = basic_crawler_functions.get_all_links(link) |
|
55 |
filtered_second_level_links = basic_crawler_functions.filter_links( |
|
56 |
second_level_links, regex) |
|
57 |
absolute_second_level_links = basic_crawler_functions.create_absolute_links( |
|
58 |
filtered_second_level_links, link) |
|
59 |
|
|
60 |
for file_link in absolute_second_level_links: |
|
61 |
files.append(file_link) |
|
62 |
|
|
63 |
files = basic_crawler_functions.remove_downloaded_links( |
|
64 |
files, dataset_name) |
|
65 |
|
|
66 |
for file in files: |
|
67 |
basic_crawler_functions.download_file_from_url(file, dataset_name) |
|
68 |
|
|
69 |
folder_processor.unzip_all_csv_zip_files_in_folder(path_for_files) |
modules/crawler/DatasetProcessing/OBSAZENIMISTNOSTI_processor.py | ||
---|---|---|
1 |
from Utilities.CSV import csv_data_line |
|
2 |
from Utilities import date_formating |
|
3 |
import logging |
|
4 |
from datetime import date |
|
5 |
import time |
|
6 |
import datetime |
|
7 |
|
|
8 |
logging.basicConfig(filename='../../CrawlerLogs' + 'Crawlerlog-' + |
|
9 |
date.today().strftime("%b-%Y") + '.log', |
|
10 |
level=logging.INFO, |
|
11 |
format='%(asctime)s %(message)s') |
|
12 |
|
|
13 |
|
|
14 |
def process_file(filename): |
|
15 |
""" |
|
16 |
Method that take path to crawled file and outputs date dictionary: |
|
17 |
Date dictionary is a dictionary where keys are dates in format YYYY-mm-dd-hh (2018-04-08-15) |
|
18 |
and value is dictionary where keys are devices (specified in configuration file) |
|
19 |
and value is CSVDataLine.csv_data_line with device,date and occurrence |
|
20 |
|
|
21 |
Args: |
|
22 |
filename: name of processed file |
|
23 |
|
|
24 |
Returns: |
|
25 |
None if not implemented |
|
26 |
date_dict when implemented |
|
27 |
""" |
|
28 |
date_dict = dict() |
|
29 |
|
|
30 |
with open(filename, "r") as file: |
|
31 |
|
|
32 |
YEAR_START = 1 |
|
33 |
YEAR_END = 11 |
|
34 |
for line in file: |
|
35 |
|
|
36 |
array = line.split(";") |
|
37 |
|
|
38 |
#pick later time |
|
39 |
time_ = max( |
|
40 |
array[2][1:-1], |
|
41 |
array[3][1:-1], |
|
42 |
key=lambda x: time.mktime( |
|
43 |
datetime.datetime.strptime(x, "%H:%M").timetuple())) |
|
44 |
|
|
45 |
date = date_formating.date_time_formatter( |
|
46 |
array[14][YEAR_START:YEAR_END] + " " + time_) |
|
47 |
|
|
48 |
name = array[10][1:-1] |
|
49 |
if name == "": |
|
50 |
continue |
|
51 |
|
|
52 |
if date not in date_dict: |
|
53 |
date_dict[date] = {} |
|
54 |
|
|
55 |
if name in date_dict[date]: |
|
56 |
date_dict[date][name].occurrence = int(array[12]) |
|
57 |
else: |
|
58 |
date_dict[date][name] = csv_data_line.CSVDataLine( |
|
59 |
name, date, int(array[12])) |
|
60 |
|
|
61 |
return date_dict |
modules/crawler/Utilities/Crawler/basic_crawler_functions.py | ||
---|---|---|
99 | 99 |
|
100 | 100 |
# splits url and extract last part that contains filename |
101 | 101 |
url_parts = url.split("/") |
102 |
file_name = url_parts[len(url_parts)-1]
|
|
102 |
file_name = url_parts[len(url_parts) - 1]
|
|
103 | 103 |
|
104 | 104 |
data_path = CRAWLED_DATA_PATH + dataset_name + '/' |
105 | 105 |
|
modules/crawler/Utilities/Database/database_loader.py | ||
---|---|---|
1 | 1 |
from Utilities.Database import database_data_line, database_record_logs |
2 | 2 |
from Utilities import configure_functions |
3 |
from Utilities.helpers import should_skip |
|
3 | 4 |
import pymongo |
4 | 5 |
import re |
5 | 6 |
|
... | ... | |
67 | 68 |
|
68 | 69 |
name = csv_column[0] |
69 | 70 |
|
70 |
if devices[name]["x"] == "SKIP" or devices[name]["y"] == "SKIP":
|
|
71 |
if should_skip(devices[name]):
|
|
71 | 72 |
continue |
72 | 73 |
|
73 | 74 |
occurrence = csv_column[1] |
... | ... | |
80 | 81 |
if date_without_hours not in date_dict: |
81 | 82 |
date_dict[date_without_hours] = list() |
82 | 83 |
|
83 |
date_dict[date_without_hours].append( |
|
84 |
data_line.to_dictionary()) |
|
84 |
date_dict[date_without_hours].append(data_line.to_dictionary()) |
|
85 | 85 |
|
86 | 86 |
return date_dict |
87 | 87 |
|
88 | 88 |
|
89 |
def load_data_to_database(database_connection,dataset_name, data_dic, file_name): |
|
89 |
def load_data_to_database(database_connection, dataset_name, data_dic, |
|
90 |
file_name): |
|
90 | 91 |
""" |
91 | 92 |
Takes data_dic created in method get_data_from_file |
92 | 93 |
and loads into into database where collection name is dataset_name + data_dic key |
... | ... | |
106 | 107 |
date_dataset.insert_many(data_dic[date]) |
107 | 108 |
|
108 | 109 |
|
109 |
|
|
110 |
def check_or_update_datasets_collection(database_connection,config): |
|
110 |
def check_or_update_datasets_collection(database_connection, config): |
|
111 | 111 |
""" |
112 | 112 |
Checks if DATASETS collection contains dataset and if display name was not updated |
113 | 113 |
|
... | ... | |
127 | 127 |
dataset_present = collection_datasets.find_one(query) |
128 | 128 |
|
129 | 129 |
if dataset_present is None: |
130 |
collection_datasets.insert_one({'key-name': dataset_name, 'display-name': display_name,'updated': 0}) |
|
130 |
collection_datasets.insert_one({ |
|
131 |
'key-name': dataset_name, |
|
132 |
'display-name': display_name, |
|
133 |
'updated': 0 |
|
134 |
}) |
|
131 | 135 |
elif dataset_present['display-name'] != display_name: |
132 |
newvalues = { "$set": { 'display-name': display_name } }
|
|
136 |
newvalues = {"$set": {'display-name': display_name}}
|
|
133 | 137 |
collection_datasets.update_one(query, newvalues) |
134 | 138 |
|
135 | 139 |
|
... | ... | |
153 | 157 |
|
154 | 158 |
change_in_devices = False |
155 | 159 |
|
156 |
collection_devices = database_connection[dataset_name + MONGODB_DATASET_DEVICES_COLLECTION] |
|
160 |
collection_devices = database_connection[ |
|
161 |
dataset_name + MONGODB_DATASET_DEVICES_COLLECTION] |
|
157 | 162 |
|
158 | 163 |
devices_cursor = collection_devices.find() |
159 | 164 |
|
... | ... | |
161 | 166 |
|
162 | 167 |
for device in devices_cursor: |
163 | 168 |
name = device['name'] |
164 |
db_device_dict[name] = {'name': name, 'x': device['x'] , 'y': device['y']} |
|
165 |
|
|
169 |
db_device_dict[name] = { |
|
170 |
'name': name, |
|
171 |
'x': device['x'], |
|
172 |
'y': device['y'] |
|
173 |
} |
|
166 | 174 |
|
167 |
valid_devices = configure_functions.return_dictionary_of_valid_devices(devices) |
|
175 |
valid_devices = configure_functions.return_dictionary_of_valid_devices( |
|
176 |
devices) |
|
168 | 177 |
|
169 | 178 |
if len(valid_devices.keys()) != len(db_device_dict.keys()): |
170 | 179 |
change_in_devices = True |
171 |
|
|
180 |
|
|
172 | 181 |
if change_in_devices == False: |
173 | 182 |
for device in valid_devices.keys(): |
174 | 183 |
if device in db_device_dict: |
... | ... | |
180 | 189 |
change_in_devices = True |
181 | 190 |
break |
182 | 191 |
|
183 |
|
|
184 | 192 |
if change_in_devices == True: |
185 | 193 |
collection_devices.delete_many({}) |
186 | 194 |
devices_list = list() |
187 | 195 |
|
188 | 196 |
for device in devices.keys(): |
189 |
x = devices[device]['x'] |
|
190 |
y = devices[device]['y'] |
|
191 |
if not (x == "SKIP" or x == "UNKNOWN!" or y == "SKIP" or y == "UNKNOWN!"): |
|
192 |
devices_list.append({'name': device , 'x': x , 'y': y }) |
|
197 |
if not (should_skip(devices[device])): |
|
198 |
devices_list.append({ |
|
199 |
'name': device, |
|
200 |
'x': devices[device]['x'], |
|
201 |
'y': devices[device]['y'] |
|
202 |
}) |
|
193 | 203 |
|
194 | 204 |
collection_devices.insert_many(devices_list) |
195 | 205 |
|
196 | 206 |
return change_in_devices |
197 | 207 |
|
198 |
|
|
208 |
|
|
199 | 209 |
def remove_dataset_database(dataset_name): |
200 | 210 |
""" |
201 | 211 |
Removes dataset entries from database |
... | ... | |
211 | 221 |
collection_datasets.delete_one({"key-name": dataset_name}) |
212 | 222 |
print("Removing record from DATASETS collection") |
213 | 223 |
|
214 |
|
|
215 | 224 |
# Retrieve list of all collections |
216 | 225 |
collections = mydb.list_collection_names() |
217 | 226 |
|
... | ... | |
232 | 241 |
# Creating connection |
233 | 242 |
mydb = create_database_connection() |
234 | 243 |
|
235 |
pattern = re.compile(dataset_name+'[0-9]+-[0-9]+-+[0-9]+')
|
|
244 |
pattern = re.compile(dataset_name + '[0-9]+-[0-9]+-+[0-9]+')
|
|
236 | 245 |
|
237 | 246 |
# Retrieve list of all collections |
238 | 247 |
collections = mydb.list_collection_names() |
modules/crawler/Utilities/configure_functions.py | ||
---|---|---|
1 | 1 |
import yaml |
2 | 2 |
import os |
3 | 3 |
from Utilities.Database import database_record_logs |
4 |
from Utilities.helpers import should_skip |
|
4 | 5 |
|
5 | 6 |
# Path to dataset configuration files |
6 | 7 |
CONFIG_FILES_PATH = "DatasetConfigs/" |
... | ... | |
41 | 42 |
new_devices: list or set of new devices for dataset |
42 | 43 |
""" |
43 | 44 |
|
44 |
with open(CONFIG_FILES_PATH + dataset_name + CONFIG_FILE_TYPE, "a") as file: |
|
45 |
with open(CONFIG_FILES_PATH + dataset_name + CONFIG_FILE_TYPE, |
|
46 |
"a") as file: |
|
45 | 47 |
for device in new_devices: |
46 |
file.write(" - "+device+":\n") |
|
48 |
if device == "": |
|
49 |
continue |
|
50 |
file.write(" - " + device + ":\n") |
|
47 | 51 |
file.write(" x: UNKNOWN!\n") |
48 | 52 |
file.write(" y: UNKNOWN!\n") |
49 | 53 |
file.write("\n") |
... | ... | |
65 | 69 |
|
66 | 70 |
for dataset in datasets: |
67 | 71 |
name = dataset.split('.') |
68 |
if name[0] == dataset_name :
|
|
72 |
if name[0] == dataset_name: |
|
69 | 73 |
return True |
70 | 74 |
|
71 | 75 |
return False |
72 | 76 |
|
77 |
|
|
73 | 78 |
def return_dictionary_of_valid_devices(devices): |
74 | 79 |
""" |
75 | 80 |
Iterates over all devices specified in config file |
... | ... | |
85 | 90 |
valid_devices = dict() |
86 | 91 |
|
87 | 92 |
for device in devices.keys(): |
88 |
x = devices[device]['x'] |
|
89 |
y = devices[device]['y'] |
|
90 |
if not (x == "SKIP" or x == "UNKNOWN!" or y == "SKIP" or y == "UNKNOWN!"): |
|
91 |
valid_devices[device] = {'name': device, 'x': x , 'y': y} |
|
93 |
if not should_skip(devices[device]): |
|
94 |
valid_devices[device] = { |
|
95 |
'name': device, |
|
96 |
'x': devices[device]['x'], |
|
97 |
'y': devices[device]['y'] |
|
98 |
} |
|
92 | 99 |
|
93 | 100 |
return valid_devices |
modules/crawler/Utilities/helpers.py | ||
---|---|---|
1 |
SKIP = "SKIP" |
|
2 |
UNKNOWN = "UNKNOWN!" |
|
3 |
|
|
4 |
|
|
5 |
def should_skip(device) -> bool: |
|
6 |
return device['x'] == SKIP or device['y'] == SKIP or device[ |
|
7 |
'x'] == UNKNOWN or device['y'] == UNKNOWN |
modules/crawler/fully_clean_database.py | ||
---|---|---|
1 | 1 |
from Utilities.Database import database_loader |
2 | 2 |
|
3 |
|
|
4 |
#TODO: smazat vsechny pomocny soubory po cisteni databaze + prejmenovat |
|
3 | 5 |
def clean_database(): |
4 | 6 |
""" |
5 | 7 |
Drops every collection in database |
modules/crawler/pipeline.py | ||
---|---|---|
7 | 7 |
import logging |
8 | 8 |
from datetime import date |
9 | 9 |
|
10 |
|
|
11 | 10 |
# Path to crawled data |
12 | 11 |
CRAWLED_DATA_PATH = "CrawledData/" |
13 | 12 |
# Path to processed data |
... | ... | |
19 | 18 |
# Path to dataset processor implementations |
20 | 19 |
PROCESSOR_LIB_PATH = "DatasetProcessing." |
21 | 20 |
|
22 |
|
|
23 | 21 |
#logger |
24 |
logging.basicConfig(filename=CRAWLER_LOGS_PATH + 'Applicationlog-' + date.today().strftime("%b-%Y") + '.log',
|
|
25 |
level=logging.INFO,
|
|
26 |
format='%(asctime)s %(message)s'
|
|
27 |
) |
|
22 |
logging.basicConfig(filename=CRAWLER_LOGS_PATH + 'Applicationlog-' + |
|
23 |
date.today().strftime("%b-%Y") + '.log',
|
|
24 |
level=logging.INFO,
|
|
25 |
format='%(asctime)s %(message)s')
|
|
28 | 26 |
|
29 | 27 |
|
30 | 28 |
def check_last_update(config): |
... | ... | |
47 | 45 |
|
48 | 46 |
if config["update-period"] <= last_update: |
49 | 47 |
logging.info("Dataset " + dataset_name + " is being updated today") |
50 |
database_record_logs.update_updated(dataset_name,0) |
|
48 |
database_record_logs.update_updated(dataset_name, 0)
|
|
51 | 49 |
return True |
52 | 50 |
else: |
53 | 51 |
last_update_days = last_update + 1 |
54 |
logging.info("Dataset " + dataset_name + " will be updated in " + str(int(config["update-period"]) - last_update_days) + "days") |
|
55 |
database_record_logs.update_updated(dataset_name,last_update + 1) |
|
52 |
logging.info("Dataset " + dataset_name + " will be updated in " + |
|
53 |
str(int(config["update-period"]) - last_update_days) + |
|
54 |
"days") |
|
55 |
database_record_logs.update_updated(dataset_name, last_update + 1) |
|
56 | 56 |
return False |
57 | 57 |
|
58 | 58 |
|
59 |
|
|
60 | 59 |
def crawl_data(config): |
61 | 60 |
""" |
62 | 61 |
Imports dataset crawler in DatasetCrawler/"dataset_name"_crawler.py |
... | ... | |
67 | 66 |
""" |
68 | 67 |
dataset_name = config["dataset-name"] |
69 | 68 |
|
70 |
crawl_func = __import__(CRAWLER_LIB_PATH + dataset_name + "_crawler", globals(), locals(), ['crawl']).crawl |
|
69 |
crawl_func = __import__(CRAWLER_LIB_PATH + dataset_name + "_crawler", |
|
70 |
globals(), locals(), ['crawl']).crawl |
Také k dispozici: Unified diff
Re #8160 new dataset