1
|
//
|
2
|
// Author: Eliska Mourycova
|
3
|
//
|
4
|
|
5
|
using System;
|
6
|
using System.Collections.Generic;
|
7
|
using System.IO;
|
8
|
using System.IO.Compression;
|
9
|
using System.Net;
|
10
|
|
11
|
namespace ServerApp.DataDownload
|
12
|
{
|
13
|
/// <summary>
|
14
|
/// Enum representing all of the available data types (not all will be used in this project)
|
15
|
/// They are in Czech for easier handling file names.
|
16
|
/// TBD: They might be translated to English later.
|
17
|
/// </summary>
|
18
|
public enum DataType
|
19
|
{
|
20
|
POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
|
21
|
}
|
22
|
|
23
|
/// <summary>
|
24
|
/// Represent all available data formats.
|
25
|
/// </summary>
|
26
|
public enum DataFormat
|
27
|
{
|
28
|
XML, JSON, CSV
|
29
|
}
|
30
|
|
31
|
/// <summary>
|
32
|
/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
|
33
|
/// </summary>
|
34
|
public class DataDownloader
|
35
|
{
|
36
|
/// <summary>
|
37
|
/// The root directory containing all downloaded data
|
38
|
/// </summary>
|
39
|
public string RootDataDirectory { get; }
|
40
|
|
41
|
/// <summary>
|
42
|
/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
|
43
|
/// </summary>
|
44
|
public Dictionary<DataType, string> DataSubDirectories { get; }
|
45
|
|
46
|
/// <summary>
|
47
|
/// Flag stating whether files which already exist should be overwritten when downloaded again
|
48
|
/// </summary>
|
49
|
public bool OverwriteExisting { get; set; }
|
50
|
|
51
|
// the main site where the data can be downloaded from
|
52
|
private string site;
|
53
|
|
54
|
// the substring at the start of every file name
|
55
|
private string dataStr;
|
56
|
|
57
|
// WebClient instance used for the actual download
|
58
|
private WebClient webClient;
|
59
|
|
60
|
// a shortcut to writing Path.DirectorySeparatorChar
|
61
|
private char sep = Path.DirectorySeparatorChar;
|
62
|
|
63
|
private List<string> separatedFileName;
|
64
|
private List<string> variablesInsertions;
|
65
|
private List<char> nameTurns;
|
66
|
|
67
|
public DataDownloader(string rootDataDir, string website, string namingConvention) // todo: take naming conventons specifiaction into account
|
68
|
{
|
69
|
// initialize all needed variables:
|
70
|
|
71
|
DataSubDirectories = new Dictionary<DataType, string>();
|
72
|
|
73
|
//Console.WriteLine(Directory.GetCurrentDirectory());
|
74
|
site = website;//"http://openstore.zcu.cz/";
|
75
|
|
76
|
ParseNaming(namingConvention);
|
77
|
dataStr = "OD_ZCU_";
|
78
|
|
79
|
RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
|
80
|
OverwriteExisting = false;
|
81
|
|
82
|
webClient = new WebClient();
|
83
|
}
|
84
|
|
85
|
private void ParseNaming(string namingConvention)
|
86
|
{
|
87
|
separatedFileName = new List<string>();
|
88
|
variablesInsertions = new List<string>();
|
89
|
nameTurns = new List<char>();
|
90
|
|
91
|
string currPart = "";
|
92
|
string currVar = "";
|
93
|
bool readingNormal = true;
|
94
|
foreach (char c in namingConvention)
|
95
|
{
|
96
|
if (c == '{')
|
97
|
{
|
98
|
AddToNameParts(currPart);
|
99
|
readingNormal = false;
|
100
|
currPart = "";
|
101
|
}
|
102
|
else if (c == '}')
|
103
|
{
|
104
|
AddToVariables(currVar);
|
105
|
readingNormal = true;
|
106
|
currVar = "";
|
107
|
}
|
108
|
else
|
109
|
{
|
110
|
// normal char
|
111
|
if (readingNormal)
|
112
|
currPart += c;
|
113
|
else
|
114
|
currVar += c;
|
115
|
}
|
116
|
}
|
117
|
|
118
|
// add the rest if there is any:
|
119
|
if (readingNormal)
|
120
|
AddToNameParts(currPart);
|
121
|
else
|
122
|
AddToVariables(currVar);
|
123
|
|
124
|
Console.WriteLine();
|
125
|
}
|
126
|
|
127
|
private void AddToNameParts(string s)
|
128
|
{
|
129
|
if (s.Length > 0)
|
130
|
{
|
131
|
separatedFileName.Add(s);
|
132
|
nameTurns.Add('n');
|
133
|
}
|
134
|
|
135
|
}
|
136
|
|
137
|
private void AddToVariables(string s)
|
138
|
{
|
139
|
if (s.Length > 0)
|
140
|
{
|
141
|
variablesInsertions.Add(s);
|
142
|
nameTurns.Add('v');
|
143
|
}
|
144
|
|
145
|
}
|
146
|
|
147
|
|
148
|
private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
|
149
|
{
|
150
|
string nameZip = "";
|
151
|
|
152
|
int partInd = 0;
|
153
|
int varInd = 0;
|
154
|
for(int i = 0; i < nameTurns.Count; i++)
|
155
|
{
|
156
|
if (nameTurns[i] == 'n')
|
157
|
{
|
158
|
nameZip += separatedFileName[partInd];
|
159
|
partInd++;
|
160
|
}
|
161
|
else if(nameTurns[i] == 'v')
|
162
|
{
|
163
|
string add = "";
|
164
|
switch (variablesInsertions[varInd])
|
165
|
{
|
166
|
case "type":
|
167
|
add = "" + type;
|
168
|
break;
|
169
|
case "month":
|
170
|
add = month < 10 ? "0" + month : "" + month;
|
171
|
break;
|
172
|
case "year":
|
173
|
add = "" + year;
|
174
|
break;
|
175
|
case "format":
|
176
|
add = "" + format;
|
177
|
break;
|
178
|
default: throw new Exception("Config file error - naming conventions can only contain variables with following names: type, month, year, format");
|
179
|
}
|
180
|
nameZip += add;
|
181
|
varInd++;
|
182
|
}
|
183
|
}
|
184
|
|
185
|
return nameZip;
|
186
|
}
|
187
|
|
188
|
|
189
|
private void ExtractDateFromFileName(string fileName, ref int month, ref int year)
|
190
|
{
|
191
|
|
192
|
}
|
193
|
|
194
|
/// <summary>
|
195
|
/// Downloads a specific archive.
|
196
|
/// </summary>
|
197
|
/// <param name="type">The type of data</param>
|
198
|
/// <param name="format">The format of the data</param>
|
199
|
/// <param name="year">The year</param>
|
200
|
/// <param name="month">The month</param>
|
201
|
/// <returns>A list of all extracted file names (should be only one)</returns>
|
202
|
private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
|
203
|
{
|
204
|
// the list of all files potentially relevant to the caller
|
205
|
List<string> extractedFiles = new List<string>();
|
206
|
|
207
|
// Prepare the url string to be downloaded from:
|
208
|
string monthStr = month < 10 ? "0" + month : "" + month;
|
209
|
string yearStr = "" + year;
|
210
|
string monthYr = monthStr + "_" + yearStr;
|
211
|
|
212
|
|
213
|
string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
214
|
string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
215
|
|
216
|
string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
|
217
|
|
218
|
try
|
219
|
{
|
220
|
//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
|
221
|
|
222
|
// Download the zip file:
|
223
|
webClient.DownloadFile(url, nameZip);
|
224
|
|
225
|
//ZipFile.ExtractToDirectory(nameZip, nameFolder);
|
226
|
ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
|
227
|
// Go through all the extracted files:
|
228
|
foreach (ZipArchiveEntry entry in zipArchive.Entries)
|
229
|
{
|
230
|
// get the relative path to the file:
|
231
|
string newFileName = $"{month}-{year}.{format}";
|
232
|
string extractedFile = nameFolder + newFileName; //+ entry.Name;
|
233
|
|
234
|
// add full path to the list:
|
235
|
extractedFiles.Add(Path.GetFullPath(extractedFile));
|
236
|
|
237
|
if (OverwriteExisting)
|
238
|
{
|
239
|
// if overwrite is desired, execute it:
|
240
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
241
|
|
242
|
}
|
243
|
else
|
244
|
{
|
245
|
// if overwrite is not desired, check if the file exists first:
|
246
|
if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
|
247
|
{
|
248
|
continue;
|
249
|
}
|
250
|
else
|
251
|
{
|
252
|
// if it doesn't exist, save it:
|
253
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
254
|
}
|
255
|
}
|
256
|
|
257
|
|
258
|
}
|
259
|
// dispose of the archive:
|
260
|
zipArchive.Dispose();
|
261
|
|
262
|
//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
|
263
|
//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
|
264
|
//Console.WriteLine("Finished downloading " + nameZip);
|
265
|
|
266
|
// delete the previously downloaded zip file, files contained in it have been extracted:
|
267
|
File.Delete(nameZip); // todo check?
|
268
|
|
269
|
}
|
270
|
catch(System.Net.WebException we)
|
271
|
{
|
272
|
// download fails, if the specified url is invalid
|
273
|
//Console.WriteLine("Download from " + url + " failed.");
|
274
|
//Console.WriteLine(we.Message);
|
275
|
}
|
276
|
|
277
|
|
278
|
|
279
|
return extractedFiles;
|
280
|
}
|
281
|
|
282
|
|
283
|
|
284
|
/// <summary>
|
285
|
/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files.
|
286
|
/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
|
287
|
/// </summary>
|
288
|
public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate/*int startYear, int endYear, int startMonth, int endMonth*/)
|
289
|
{
|
290
|
if (startDate > endDate)
|
291
|
throw new ArgumentException("startDate must be the same as or before the endDate.");
|
292
|
|
293
|
// initialize:
|
294
|
List<string> savedFiles = new List<string>();
|
295
|
string subDirectory = RootDataDirectory + sep + type;
|
296
|
DirectoryInfo di = Directory.CreateDirectory(subDirectory);
|
297
|
|
298
|
// create subdirectory record if it doesn't exist:
|
299
|
if (!DataSubDirectories.ContainsKey(type))
|
300
|
DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
|
301
|
|
302
|
|
303
|
Date currentDate = startDate;
|
304
|
bool firstLoop = true;
|
305
|
do
|
306
|
{
|
307
|
Console.WriteLine("current date: " + currentDate);
|
308
|
savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
|
309
|
Date nextDate = currentDate.IncreaseMonthByOne();
|
310
|
|
311
|
// also try to find the 00 file for each year:
|
312
|
if(nextDate.Year > currentDate.Year || firstLoop)
|
313
|
{
|
314
|
savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
|
315
|
if (firstLoop)
|
316
|
firstLoop = false; // so that we don't download the same thing all the time
|
317
|
|
318
|
}
|
319
|
|
320
|
// assign the increased date to the current date:
|
321
|
currentDate = nextDate;
|
322
|
|
323
|
|
324
|
} while (currentDate <= endDate);
|
325
|
|
326
|
|
327
|
|
328
|
|
329
|
//for (int y = startYear; y <= endYear; y++)
|
330
|
//{
|
331
|
// for (int m = startMonth; m <= endMonth; m++)
|
332
|
// {
|
333
|
// savedFiles.AddRange(DownloadData(type, format, y, m));
|
334
|
// }
|
335
|
//}
|
336
|
|
337
|
return savedFiles;
|
338
|
}
|
339
|
|
340
|
public bool CheckForNewData()
|
341
|
{
|
342
|
throw new NotImplementedException();
|
343
|
}
|
344
|
|
345
|
|
346
|
|
347
|
public List<string> GetData(string subDirectory, Date startDate, Date endDate)
|
348
|
{
|
349
|
string[] files = Directory.GetFiles(subDirectory);
|
350
|
for(int i = 0; i < files.Length; i++)
|
351
|
{
|
352
|
string currFileName = Path.GetFileName(files[i]);
|
353
|
Console.WriteLine("curr file: " + currFileName);
|
354
|
string[] splits = currFileName.Split(new char[] { '-', '.' });
|
355
|
|
356
|
int month = int.Parse(splits[0]);
|
357
|
int year = int.Parse(splits[1]);
|
358
|
|
359
|
|
360
|
Date d = new Date((uint)month, (uint)year);
|
361
|
|
362
|
|
363
|
}
|
364
|
|
365
|
throw new NotImplementedException();
|
366
|
}
|
367
|
|
368
|
|
369
|
#region UNUSED
|
370
|
//public string GetDirectoryListingRegexForUrl(string url)
|
371
|
//{
|
372
|
// if (url.Equals(site))
|
373
|
// {
|
374
|
// //return "\\\"([^\"]*)\\\"";
|
375
|
// return @"\bOD_ZCU_\w*\b";
|
376
|
// //return @"\A[OD_ZCU_]";
|
377
|
// }
|
378
|
// else return null;
|
379
|
//}
|
380
|
//public void ListDirectory()
|
381
|
//{
|
382
|
// string url = site;
|
383
|
// HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
|
384
|
// using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
385
|
// {
|
386
|
// using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
387
|
// {
|
388
|
// string html = reader.ReadToEnd();
|
389
|
|
390
|
|
391
|
|
392
|
// Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
|
393
|
// MatchCollection matches = regex.Matches(html);
|
394
|
// Console.WriteLine(matches.Count);
|
395
|
|
396
|
// if (matches.Count > 0)
|
397
|
// {
|
398
|
// foreach (Match match in matches)
|
399
|
// {
|
400
|
// //if (match.Success)
|
401
|
// //{
|
402
|
// Console.WriteLine(match.ToString());
|
403
|
// //}
|
404
|
// }
|
405
|
// }
|
406
|
// }
|
407
|
// Console.ReadLine();
|
408
|
// }
|
409
|
//}
|
410
|
|
411
|
#endregion
|
412
|
}
|
413
|
}
|