1
|
//
|
2
|
// Author: Eliska Mourycova
|
3
|
//
|
4
|
|
5
|
using System;
|
6
|
using System.Collections.Generic;
|
7
|
using System.IO;
|
8
|
using System.IO.Compression;
|
9
|
using System.Net;
|
10
|
|
11
|
namespace ServerApp.DataDownload
|
12
|
{
|
13
|
/// <summary>
|
14
|
/// Enum representing all of the available data types (not all will be used in this project)
|
15
|
/// They are in Czech for easier handling file names.
|
16
|
/// TBD: They might be translated to English later.
|
17
|
/// </summary>
|
18
|
public enum DataType
|
19
|
{
|
20
|
POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
|
21
|
}
|
22
|
|
23
|
/// <summary>
|
24
|
/// Represent all available data formats.
|
25
|
/// </summary>
|
26
|
public enum DataFormat
|
27
|
{
|
28
|
XML, JSON, CSV
|
29
|
}
|
30
|
|
31
|
/// <summary>
|
32
|
/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
|
33
|
/// </summary>
|
34
|
public class DataDownloader : IDataDownloader
|
35
|
{
|
36
|
/// <summary>
|
37
|
/// The root directory containing all downloaded data
|
38
|
/// </summary>
|
39
|
public string RootDataDirectory { get; }
|
40
|
|
41
|
/// <summary>
|
42
|
/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
|
43
|
/// </summary>
|
44
|
public Dictionary<DataType, string> DataSubDirectories { get; }
|
45
|
|
46
|
/// <summary>
|
47
|
/// Flag stating whether files which already exist should be overwritten when downloaded again
|
48
|
/// </summary>
|
49
|
public bool OverwriteExisting { get; set; }
|
50
|
|
51
|
// the main site where the data can be downloaded from
|
52
|
private string site;
|
53
|
|
54
|
// weather prediction site
|
55
|
private string weatherPredictionSite;
|
56
|
|
57
|
// the substring at the start of every file name
|
58
|
private string dataStr;
|
59
|
|
60
|
// WebClient instance used for the actual download
|
61
|
private WebClient webClient;
|
62
|
|
63
|
// a shortcut to writing Path.DirectorySeparatorChar
|
64
|
private char sep = Path.DirectorySeparatorChar;
|
65
|
|
66
|
// lists used for parsing the file names:
|
67
|
private List<string> separatedFileName;
|
68
|
private List<string> variablesInsertions;
|
69
|
private List<char> nameTurns;
|
70
|
|
71
|
public DataDownloader(string rootDataDir, string website, string namingConvention, string weatherPredSite) // todo: take naming conventons specifiaction into account
|
72
|
{
|
73
|
// initialize all needed variables:
|
74
|
|
75
|
//Console.WriteLine(Directory.GetCurrentDirectory());
|
76
|
site = website;//"http://openstore.zcu.cz/";
|
77
|
weatherPredictionSite = weatherPredSite;
|
78
|
|
79
|
ParseNaming(namingConvention);
|
80
|
dataStr = "OD_ZCU_";
|
81
|
|
82
|
RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
|
83
|
OverwriteExisting = false;
|
84
|
|
85
|
DataSubDirectories = new Dictionary<DataType, string>();
|
86
|
CreateSubdirectories(); // todo should we do it like this?
|
87
|
|
88
|
webClient = new WebClient();
|
89
|
}
|
90
|
|
91
|
/// <summary>
|
92
|
/// Downloads json file - returns contents of said file
|
93
|
/// </summary>
|
94
|
/// <returns> Path to file </returns>
|
95
|
public string DownloadWeatherPrediction()
|
96
|
{
|
97
|
// TODO either set this path as attribute or if parameter JsonParser needs an attribute that would be set through constructor
|
98
|
//string predictionSite = "http://wttr.in/Plzen,czechia?format=j1";
|
99
|
|
100
|
//DateTime now = DateTime.Now;
|
101
|
//WebClient webClient = new WebClient();
|
102
|
//webClient.DownloadFile(predictionSite, $"data/{now.Year}{now.Month}{now.Day}.json");
|
103
|
|
104
|
try
|
105
|
{
|
106
|
return webClient.DownloadString(weatherPredictionSite);// $"data/{now.Year}{now.Month}{now.Day}.json";
|
107
|
}
|
108
|
catch(Exception e)
|
109
|
{
|
110
|
// this shouldn't happen
|
111
|
Console.WriteLine("Weather prediction download failed!");
|
112
|
return null;
|
113
|
}
|
114
|
|
115
|
}
|
116
|
|
117
|
/// <summary>
|
118
|
/// Creates subdirectories for all data types
|
119
|
/// TBD if we want to do it this way
|
120
|
/// </summary>
|
121
|
private void CreateSubdirectories()
|
122
|
{
|
123
|
foreach (DataType type in (DataType[])Enum.GetValues(typeof(DataType)))
|
124
|
{
|
125
|
string subDirectory = RootDataDirectory + sep + type;
|
126
|
DirectoryInfo di = Directory.CreateDirectory(subDirectory);
|
127
|
|
128
|
// create subdirectory record if it doesn't exist:
|
129
|
if (!DataSubDirectories.ContainsKey(type))
|
130
|
DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
|
131
|
}
|
132
|
}
|
133
|
|
134
|
/// <summary>
|
135
|
/// Parses the naming convention to be later used for determining URLs for data download
|
136
|
/// </summary>
|
137
|
/// <param name="namingConvention">The configured naming convention</param>
|
138
|
private void ParseNaming(string namingConvention)
|
139
|
{
|
140
|
separatedFileName = new List<string>();
|
141
|
variablesInsertions = new List<string>();
|
142
|
nameTurns = new List<char>();
|
143
|
|
144
|
string currPart = "";
|
145
|
string currVar = "";
|
146
|
bool readingNormal = true;
|
147
|
foreach (char c in namingConvention)
|
148
|
{
|
149
|
if (c == '{')
|
150
|
{
|
151
|
AddToNameParts(currPart);
|
152
|
readingNormal = false;
|
153
|
currPart = "";
|
154
|
}
|
155
|
else if (c == '}')
|
156
|
{
|
157
|
AddToVariables(currVar);
|
158
|
readingNormal = true;
|
159
|
currVar = "";
|
160
|
}
|
161
|
else
|
162
|
{
|
163
|
// normal char
|
164
|
if (readingNormal)
|
165
|
currPart += c;
|
166
|
else
|
167
|
currVar += c;
|
168
|
}
|
169
|
}
|
170
|
|
171
|
// add the rest if there is any:
|
172
|
if (readingNormal)
|
173
|
AddToNameParts(currPart);
|
174
|
else
|
175
|
AddToVariables(currVar);
|
176
|
|
177
|
Console.WriteLine();
|
178
|
}
|
179
|
|
180
|
// Adds to name parts
|
181
|
private void AddToNameParts(string s)
|
182
|
{
|
183
|
if (s.Length > 0)
|
184
|
{
|
185
|
separatedFileName.Add(s);
|
186
|
nameTurns.Add('n');
|
187
|
}
|
188
|
|
189
|
}
|
190
|
|
191
|
// Adds to variable name parts
|
192
|
private void AddToVariables(string s)
|
193
|
{
|
194
|
if (s.Length > 0)
|
195
|
{
|
196
|
variablesInsertions.Add(s);
|
197
|
nameTurns.Add('v');
|
198
|
}
|
199
|
|
200
|
}
|
201
|
|
202
|
/// <summary>
|
203
|
/// Builds the name of the downloaded file. Takes naming convention into account
|
204
|
/// </summary>
|
205
|
/// <param name="type">The type of data</param>
|
206
|
/// <param name="format">The data format</param>
|
207
|
/// <param name="year">The year</param>
|
208
|
/// <param name="month">The month</param>
|
209
|
/// <returns></returns>
|
210
|
private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
|
211
|
{
|
212
|
string nameZip = "";
|
213
|
|
214
|
int partInd = 0;
|
215
|
int varInd = 0;
|
216
|
for(int i = 0; i < nameTurns.Count; i++)
|
217
|
{
|
218
|
if (nameTurns[i] == 'n')
|
219
|
{
|
220
|
nameZip += separatedFileName[partInd];
|
221
|
partInd++;
|
222
|
}
|
223
|
else if(nameTurns[i] == 'v')
|
224
|
{
|
225
|
string add = "";
|
226
|
switch (variablesInsertions[varInd])
|
227
|
{
|
228
|
case "type":
|
229
|
add = "" + type;
|
230
|
break;
|
231
|
case "month":
|
232
|
add = month < 10 ? "0" + month : "" + month;
|
233
|
break;
|
234
|
case "year":
|
235
|
add = "" + year;
|
236
|
break;
|
237
|
case "format":
|
238
|
add = "" + format;
|
239
|
break;
|
240
|
default: throw new Exception("Config file error - naming conventions can only contain variables with the following names: type, month, year, format");
|
241
|
}
|
242
|
nameZip += add;
|
243
|
varInd++;
|
244
|
}
|
245
|
}
|
246
|
|
247
|
return nameZip;
|
248
|
}
|
249
|
|
250
|
|
251
|
/// <summary>
|
252
|
/// Downloads a specific archive.
|
253
|
/// </summary>
|
254
|
/// <param name="type">The type of data</param>
|
255
|
/// <param name="format">The format of the data</param>
|
256
|
/// <param name="year">The year</param>
|
257
|
/// <param name="month">The month</param>
|
258
|
/// <returns>A list of all extracted file names (should be only one)</returns>
|
259
|
private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
|
260
|
{
|
261
|
// the list of all files potentially relevant to the caller
|
262
|
List<string> extractedFiles = new List<string>();
|
263
|
|
264
|
// Prepare the url string to be downloaded from:
|
265
|
string monthStr = month < 10 ? "0" + month : "" + month;
|
266
|
string yearStr = "" + year;
|
267
|
string monthYr = monthStr + "_" + yearStr;
|
268
|
|
269
|
|
270
|
string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
271
|
string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
272
|
|
273
|
string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
|
274
|
|
275
|
try
|
276
|
{
|
277
|
//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
|
278
|
|
279
|
// Download the zip file:
|
280
|
webClient.DownloadFile(url, nameZip);
|
281
|
|
282
|
//ZipFile.ExtractToDirectory(nameZip, nameFolder);
|
283
|
ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
|
284
|
// Go through all the extracted files:
|
285
|
foreach (ZipArchiveEntry entry in zipArchive.Entries)
|
286
|
{
|
287
|
// get the relative path to the file:
|
288
|
string newFileName = $"{month}-{year}.{format}";
|
289
|
string extractedFile = nameFolder + newFileName; //+ entry.Name;
|
290
|
|
291
|
// add full path to the list:
|
292
|
extractedFiles.Add(Path.GetFullPath(extractedFile));
|
293
|
|
294
|
if (OverwriteExisting)
|
295
|
{
|
296
|
// if overwrite is desired, execute it:
|
297
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
298
|
|
299
|
}
|
300
|
else
|
301
|
{
|
302
|
// if overwrite is not desired, check if the file exists first:
|
303
|
if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
|
304
|
{
|
305
|
continue;
|
306
|
}
|
307
|
else
|
308
|
{
|
309
|
// if it doesn't exist, save it:
|
310
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
311
|
}
|
312
|
}
|
313
|
|
314
|
|
315
|
}
|
316
|
// dispose of the archive:
|
317
|
zipArchive.Dispose();
|
318
|
|
319
|
//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
|
320
|
//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
|
321
|
//Console.WriteLine("Finished downloading " + nameZip);
|
322
|
|
323
|
// delete the previously downloaded zip file, files contained in it have been extracted:
|
324
|
File.Delete(nameZip); // todo check?
|
325
|
|
326
|
}
|
327
|
catch(System.Net.WebException we)
|
328
|
{
|
329
|
// download fails, if the specified url is invalid
|
330
|
//Console.WriteLine("Download from " + url + " failed.");
|
331
|
//Console.WriteLine(we.Message);
|
332
|
}
|
333
|
|
334
|
|
335
|
|
336
|
return extractedFiles;
|
337
|
}
|
338
|
|
339
|
|
340
|
|
341
|
/// <summary>
|
342
|
/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files.
|
343
|
/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
|
344
|
/// </summary>
|
345
|
/// <param name="type">The requested data type</param>
|
346
|
/// <param name="format">The data format</param>
|
347
|
/// <param name="startDate">The start date</param>
|
348
|
/// <param name="endDate">The end date</param>
|
349
|
/// <returns>A list of full paths to all saved files</returns>
|
350
|
public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate)
|
351
|
{
|
352
|
if (startDate > endDate)
|
353
|
throw new ArgumentException("startDate must be the same as or before the endDate.");
|
354
|
|
355
|
// initialize:
|
356
|
List<string> savedFiles = new List<string>();
|
357
|
//string subDirectory = RootDataDirectory + sep + type;
|
358
|
//DirectoryInfo di = Directory.CreateDirectory(subDirectory);
|
359
|
|
360
|
//// create subdirectory record if it doesn't exist:
|
361
|
//if (!DataSubDirectories.ContainsKey(type))
|
362
|
// DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
|
363
|
|
364
|
|
365
|
Date currentDate = startDate;
|
366
|
bool firstLoop = true;
|
367
|
do
|
368
|
{
|
369
|
//Console.WriteLine("current date: " + currentDate);
|
370
|
savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
|
371
|
Date nextDate = currentDate.IncreaseMonthByOne();
|
372
|
|
373
|
// also try to find the 00 file for each year:
|
374
|
if(nextDate.Year > currentDate.Year || firstLoop)
|
375
|
{
|
376
|
savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
|
377
|
if (firstLoop)
|
378
|
firstLoop = false; // so that we don't download the same thing all the time
|
379
|
|
380
|
}
|
381
|
|
382
|
// assign the increased date to the current date:
|
383
|
currentDate = nextDate;
|
384
|
|
385
|
|
386
|
} while (currentDate <= endDate);
|
387
|
|
388
|
|
389
|
|
390
|
|
391
|
//for (int y = startYear; y <= endYear; y++)
|
392
|
//{
|
393
|
// for (int m = startMonth; m <= endMonth; m++)
|
394
|
// {
|
395
|
// savedFiles.AddRange(DownloadData(type, format, y, m));
|
396
|
// }
|
397
|
//}
|
398
|
|
399
|
return savedFiles;
|
400
|
}
|
401
|
|
402
|
public bool CheckForNewData()
|
403
|
{
|
404
|
throw new NotImplementedException();
|
405
|
}
|
406
|
|
407
|
|
408
|
/// <summary>
|
409
|
/// Retrieves all data files with dates falling within the specified range. If not all data for the specified range is found
|
410
|
/// then returns also file/s with month 0 if exists.
|
411
|
/// If startDate and/or endDate are null, then returns all files from the given subdirectory.
|
412
|
/// </summary>
|
413
|
/// <param name="subDirectory">The subdirectory to search</param>
|
414
|
/// <param name="startDate">The start date</param>
|
415
|
/// <param name="endDate">The end date</param>
|
416
|
/// <returns>A list of all retrieved data files from the requested time span</returns>
|
417
|
public List<string> GetData(string subDirectory, Date startDate, Date endDate)
|
418
|
{
|
419
|
if (startDate == null || endDate == null)
|
420
|
return GetData(subDirectory);
|
421
|
|
422
|
string[] files = Directory.GetFiles(subDirectory);
|
423
|
List<string> found00Files = new List<string>();
|
424
|
List<string> relevantFiles = new List<string>();
|
425
|
List<Date> requestedDates = new List<Date>();
|
426
|
|
427
|
// prepare a list of requested dates:
|
428
|
Date currentDate = startDate;
|
429
|
do
|
430
|
{
|
431
|
requestedDates.Add(currentDate);
|
432
|
Date nextDate = currentDate.IncreaseMonthByOne();
|
433
|
// assign the increased date to the current date:
|
434
|
currentDate = nextDate;
|
435
|
|
436
|
} while (currentDate <= endDate);
|
437
|
|
438
|
|
439
|
|
440
|
for (int i = 0; i < files.Length; i++)
|
441
|
{
|
442
|
string currFileName = Path.GetFileName(files[i]);
|
443
|
Console.WriteLine("curr file: " + currFileName);
|
444
|
string[] splits = currFileName.Split(new char[] { '-', '.' });
|
445
|
|
446
|
int month = int.Parse(splits[0]);
|
447
|
int year = int.Parse(splits[1]);
|
448
|
|
449
|
|
450
|
|
451
|
if (month == 0)
|
452
|
{
|
453
|
found00Files.Add(files[i]);
|
454
|
continue;
|
455
|
}
|
456
|
|
457
|
Date d = new Date((uint)month, (uint)year);
|
458
|
|
459
|
if (d >= startDate && d <= endDate)
|
460
|
{
|
461
|
// we want to add this
|
462
|
relevantFiles.Add(files[i]);
|
463
|
requestedDates.Remove(d);
|
464
|
}
|
465
|
}
|
466
|
|
467
|
|
468
|
|
469
|
|
470
|
// 00 will only appear once for every year?
|
471
|
foreach (string file00 in found00Files)
|
472
|
{
|
473
|
string fileName = Path.GetFileName(file00);
|
474
|
string[] splits = fileName.Split(new char[] { '-', '.' });
|
475
|
|
476
|
int month = int.Parse(splits[0]);
|
477
|
int year = int.Parse(splits[1]);
|
478
|
|
479
|
// now we have the year of one 00 file
|
480
|
// dates not found in the directory now remain in requested dates
|
481
|
if(requestedDates.Exists(d => d.Year == year))
|
482
|
{
|
483
|
// if the year of this 00 file remains in the list:
|
484
|
relevantFiles.Add(file00);
|
485
|
}
|
486
|
}
|
487
|
|
488
|
|
489
|
return relevantFiles;
|
490
|
}
|
491
|
|
492
|
/// <summary>
|
493
|
/// Returns all file paths in a given directory
|
494
|
/// </summary>
|
495
|
/// <param name="subDirectory"></param>
|
496
|
/// <returns></returns>
|
497
|
private List<string> GetData(string subDirectory)
|
498
|
{
|
499
|
string[] files = Directory.GetFiles(subDirectory);
|
500
|
List<string> relevantFiles = new List<string>(files.Length);
|
501
|
|
502
|
for (int i = 0; i < files.Length; i++)
|
503
|
relevantFiles.Add(files[i]);
|
504
|
|
505
|
return relevantFiles;
|
506
|
}
|
507
|
}
|
508
|
}
|