Projekt

Obecné

Profil

Stáhnout (14.5 KB) Statistiky
| Větev: | Tag: | Revize:
1 34bf7aa2 Eliška Mourycová
//
2
// Author: Eliska Mourycova
3
//
4
5 8800fb76 Eliška Mourycová
using log4net;
6 34bf7aa2 Eliška Mourycová
using System;
7 d2d1c86a Eliška Mourycová
using System.Collections.Generic;
8
using System.IO;
9
using System.IO.Compression;
10
using System.Net;
11
12 7a998d66 Eliška Mourycová
namespace ServerApp.DataDownload
13 d2d1c86a Eliška Mourycová
{
14 34bf7aa2 Eliška Mourycová
	/// <summary>
15
	/// Enum representing all of the available data types (not all will be used in this project)
16
	/// They are in Czech for easier handling file names.
17
	/// TBD: They might be translated to English later.
18
	/// </summary>
19 d2d1c86a Eliška Mourycová
	public enum DataType
20
	{
21
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
22
	}
23 34bf7aa2 Eliška Mourycová
24
	/// <summary>
25
	/// Represent all available data formats.
26
	/// </summary>
27 d2d1c86a Eliška Mourycová
	public enum DataFormat
28
	{
29
		XML, JSON, CSV
30
	}
31
32 34bf7aa2 Eliška Mourycová
	/// <summary>
33
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
34
	/// </summary>
35 9547fd4a Eliška Mourycová
	public class DataDownloader : IDataDownloader
36 d2d1c86a Eliška Mourycová
	{
37 34bf7aa2 Eliška Mourycová
		/// <summary>
38
		/// The root directory containing all downloaded data
39
		/// </summary>
40 7a998d66 Eliška Mourycová
		public string RootDataDirectory { get; }
41
42
		/// <summary>
43
		/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
44
		/// </summary>
45
		public Dictionary<DataType, string> DataSubDirectories { get; }
46 34bf7aa2 Eliška Mourycová
47
		/// <summary>
48
		/// Flag stating whether files which already exist should be overwritten when downloaded again
49
		/// </summary>
50 d2d1c86a Eliška Mourycová
		public bool OverwriteExisting { get; set; }
51
52 34bf7aa2 Eliška Mourycová
		// the main site where the data can be downloaded from
53 d2d1c86a Eliška Mourycová
		private string site;
54 34bf7aa2 Eliška Mourycová
55 38a18391 Eliška Mourycová
		// weather prediction site
56
		private string weatherPredictionSite;
57
58 34bf7aa2 Eliška Mourycová
		// the substring at the start of every file name
59 d2d1c86a Eliška Mourycová
		private string dataStr;
60 34bf7aa2 Eliška Mourycová
61
		// WebClient instance used for the actual download
62 d2d1c86a Eliška Mourycová
		private WebClient webClient;
63 34bf7aa2 Eliška Mourycová
64
		// a shortcut to writing Path.DirectorySeparatorChar
65 d2d1c86a Eliška Mourycová
		private char sep = Path.DirectorySeparatorChar;
66
67 55d35561 Eliška Mourycová
		// lists used for parsing the file names:
68 4a417b8b Eliška Mourycová
		private List<string> separatedFileName;
69
		private List<string> variablesInsertions;
70
		private List<char> nameTurns;
71
72 8800fb76 Eliška Mourycová
		// logger class instance
73
		private static readonly ILog logger = LogManager.GetLogger(typeof(DataDownloader));
74
75 38a18391 Eliška Mourycová
		public DataDownloader(string rootDataDir, string website, string namingConvention, string weatherPredSite) // todo: take naming conventons specifiaction into account
76 d2d1c86a Eliška Mourycová
		{
77 34bf7aa2 Eliška Mourycová
			// initialize all needed variables:
78
79 4a417b8b Eliška Mourycová
			//Console.WriteLine(Directory.GetCurrentDirectory());
80 7a998d66 Eliška Mourycová
			site = website;//"http://openstore.zcu.cz/";
81 38a18391 Eliška Mourycová
			weatherPredictionSite = weatherPredSite;
82 4a417b8b Eliška Mourycová
83
			ParseNaming(namingConvention);
84 d2d1c86a Eliška Mourycová
			dataStr = "OD_ZCU_";
85
86 7a998d66 Eliška Mourycová
			RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
87 d2d1c86a Eliška Mourycová
			OverwriteExisting = false;
88
89 55d35561 Eliška Mourycová
			DataSubDirectories = new Dictionary<DataType, string>();
90
			CreateSubdirectories(); // todo should we do it like this?
91
92 d2d1c86a Eliška Mourycová
			webClient = new WebClient();
93 127a92b1 Eliška Mourycová
94
			logger.Info("Data downlader ready.");
95 d2d1c86a Eliška Mourycová
		}
96
97 4b847de5 A-Konig
		/// <summary>
98 bfd5a848 A-Konig
		/// Downloads json file - returns contents of said file
99 4b847de5 A-Konig
		/// </summary>
100 bf95cbdd Eliška Mourycová
		/// <returns> String containing the weather prediction </returns>
101 bf906ac5 A-Konig
		public string DownloadWeatherPrediction()
102 4b847de5 A-Konig
		{
103
			// TODO either set this path as attribute or if parameter JsonParser needs an attribute that would be set through constructor
104 38a18391 Eliška Mourycová
			//string predictionSite = "http://wttr.in/Plzen,czechia?format=j1";
105 4b847de5 A-Konig
106 38a18391 Eliška Mourycová
			//DateTime now = DateTime.Now;
107
			//WebClient webClient = new WebClient();
108
			//webClient.DownloadFile(predictionSite, $"data/{now.Year}{now.Month}{now.Day}.json");
109
			
110
			try
111
			{
112
				return webClient.DownloadString(weatherPredictionSite);// $"data/{now.Year}{now.Month}{now.Day}.json";
113
			}
114
			catch(Exception e)
115
			{
116
				// this shouldn't happen
117 8800fb76 Eliška Mourycová
				//Console.WriteLine("Weather prediction download failed!");
118
				logger.Error("Weather prediction download failed!");
119 127a92b1 Eliška Mourycová
				logger.Error(e.Message);
120 38a18391 Eliška Mourycová
				return null;
121
			}
122
			
123 4b847de5 A-Konig
		}
124
125 55d35561 Eliška Mourycová
		/// <summary>
126
		/// Creates subdirectories for all data types
127
		/// TBD if we want to do it this way
128
		/// </summary>
129
		private void CreateSubdirectories()
130
		{
131
			foreach (DataType type in (DataType[])Enum.GetValues(typeof(DataType)))
132
			{
133
				string subDirectory = RootDataDirectory + sep + type;
134
				DirectoryInfo di = Directory.CreateDirectory(subDirectory);
135
136
				// create subdirectory record if it doesn't exist:
137
				if (!DataSubDirectories.ContainsKey(type))
138
					DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
139
			}
140
		}
141
142
		/// <summary>
143
		/// Parses the naming convention to be later used for determining URLs for data download
144
		/// </summary>
145
		/// <param name="namingConvention">The configured naming convention</param>
146 4a417b8b Eliška Mourycová
		private void ParseNaming(string namingConvention)
147
		{
148
			separatedFileName = new List<string>();
149
			variablesInsertions = new List<string>();
150
			nameTurns = new List<char>();
151
152
			string currPart = "";
153
			string currVar = "";
154
			bool readingNormal = true;
155
			foreach (char c in namingConvention)
156
			{
157
				if (c == '{')
158
				{
159
					AddToNameParts(currPart);
160
					readingNormal = false;
161
					currPart = "";
162
				}
163
				else if (c == '}')
164
				{
165
					AddToVariables(currVar);
166
					readingNormal = true;
167
					currVar = "";
168
				}
169
				else
170
				{
171
					// normal char
172
					if (readingNormal)
173
						currPart += c;
174
					else
175
						currVar += c;
176
				}
177
			}
178
179
			// add the rest if there is any:
180
			if (readingNormal)
181
				AddToNameParts(currPart);
182
			else
183
				AddToVariables(currVar);
184
185 8800fb76 Eliška Mourycová
			//Console.WriteLine();
186 4a417b8b Eliška Mourycová
		}
187
188 55d35561 Eliška Mourycová
		// Adds to name parts
189 4a417b8b Eliška Mourycová
		private void AddToNameParts(string s)
190
		{
191
			if (s.Length > 0)
192
			{
193
				separatedFileName.Add(s);
194
				nameTurns.Add('n');
195
			}
196
				
197
		}
198
199 55d35561 Eliška Mourycová
		// Adds to variable name parts
200 4a417b8b Eliška Mourycová
		private void AddToVariables(string s)
201
		{
202
			if (s.Length > 0)
203
			{
204
				variablesInsertions.Add(s);
205
				nameTurns.Add('v');
206
			}
207
				
208
		}
209
210 55d35561 Eliška Mourycová
		/// <summary>
211
		/// Builds the name of the downloaded file. Takes naming convention into account
212
		/// </summary>
213
		/// <param name="type">The type of data</param>
214
		/// <param name="format">The data format</param>
215
		/// <param name="year">The year</param>
216
		/// <param name="month">The month</param>
217
		/// <returns></returns>
218 4a417b8b Eliška Mourycová
		private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
219
		{
220
			string nameZip = "";
221
222
			int partInd = 0;
223
			int varInd = 0;
224
			for(int i = 0; i < nameTurns.Count; i++)
225
			{
226
				if (nameTurns[i] == 'n')
227
				{
228
					nameZip += separatedFileName[partInd];
229
					partInd++;
230
				}
231
				else if(nameTurns[i] == 'v')
232
				{
233
					string add = "";
234
					switch (variablesInsertions[varInd])
235
					{
236
						case "type":
237
							add = "" + type;
238
							break;
239
						case "month": 
240
							add = month < 10 ? "0" + month : "" + month;
241
							break;
242
						case "year":
243
							add = "" + year;
244
							break;
245
						case "format":
246
							add = "" + format;
247
							break;
248 38a18391 Eliška Mourycová
						default: throw new Exception("Config file error - naming conventions can only contain variables with the following names: type, month, year, format");
249 4a417b8b Eliška Mourycová
					}
250
					nameZip += add;
251
					varInd++;
252
				}
253
			}
254
255
			return nameZip;
256
		}
257
258 085453be Eliška Mourycová
259 34bf7aa2 Eliška Mourycová
		/// <summary>
260
		/// Downloads a specific archive.
261
		/// </summary>
262
		/// <param name="type">The type of data</param>
263
		/// <param name="format">The format of the data</param>
264
		/// <param name="year">The year</param>
265
		/// <param name="month">The month</param>
266
		/// <returns>A list of all extracted file names (should be only one)</returns>
267 d2d1c86a Eliška Mourycová
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
268
		{
269
			// the list of all files potentially relevant to the caller
270
			List<string> extractedFiles = new List<string>();
271
272
			// Prepare the url string to be downloaded from:
273
			string monthStr = month < 10 ? "0" + month : "" + month;
274
			string yearStr = "" + year;
275
			string monthYr = monthStr + "_" + yearStr;
276 4a417b8b Eliška Mourycová
277
278
			string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
279
			string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
280
			
281 7a998d66 Eliška Mourycová
			string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
282 d2d1c86a Eliška Mourycová
283
			try
284
			{
285 7a998d66 Eliška Mourycová
				//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
286 127a92b1 Eliška Mourycová
				logger.Info("Trying to download " + url);
287 d2d1c86a Eliška Mourycová
288
				// Download the zip file:
289
				webClient.DownloadFile(url, nameZip);
290
291
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
292
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
293
				// Go through all the extracted files:
294
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
295
				{
296
					// get the relative path to the file:
297 085453be Eliška Mourycová
					string newFileName = $"{month}-{year}.{format}";
298
					string extractedFile = nameFolder + newFileName; //+ entry.Name;
299 d2d1c86a Eliška Mourycová
300
					// add full path to the list:
301
					extractedFiles.Add(Path.GetFullPath(extractedFile));
302
303
					if (OverwriteExisting)
304
					{
305
						// if overwrite is desired, execute it:
306
						entry.ExtractToFile(extractedFile, OverwriteExisting);
307
						
308
					}
309
					else
310
					{
311
						// if overwrite is not desired, check if the file exists first:
312 085453be Eliška Mourycová
						if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
313 d2d1c86a Eliška Mourycová
						{
314
							continue;
315
						}
316
						else
317
						{
318
							// if it doesn't exist, save it:
319
							entry.ExtractToFile(extractedFile, OverwriteExisting);
320
						}
321
					}
322
323
					
324
				}
325
				// dispose of the archive:
326
				zipArchive.Dispose();
327
328 7a998d66 Eliška Mourycová
				//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
329
				//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
330
				//Console.WriteLine("Finished downloading " + nameZip);
331 d2d1c86a Eliška Mourycová
332
				// delete the previously downloaded zip file, files contained in it have been extracted:
333
				File.Delete(nameZip); // todo check?
334
335
			}
336
			catch(System.Net.WebException we)
337
			{
338
				// download fails, if the specified url is invalid
339 127a92b1 Eliška Mourycová
				logger.Info("Download of " + url + " was not succesful. That is no problem if the given time span does not contain respective open data.");
340
				logger.Info(we.Message);
341
				
342 d2d1c86a Eliška Mourycová
			}
343
344
345
346
			return extractedFiles;
347
		}
348
349
350
351
		/// <summary>
352
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
353
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
354
		/// </summary>
355 55d35561 Eliška Mourycová
		/// <param name="type">The requested data type</param>
356
		/// <param name="format">The data format</param>
357
		/// <param name="startDate">The start date</param>
358
		/// <param name="endDate">The end date</param>
359 9547fd4a Eliška Mourycová
		/// <returns>A list of full paths to all saved files</returns>
360
		public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate)
361 d2d1c86a Eliška Mourycová
		{
362 7a998d66 Eliška Mourycová
			if (startDate > endDate)
363
				throw new ArgumentException("startDate must be the same as or before the endDate.");
364
365
			// initialize:
366 d2d1c86a Eliška Mourycová
			List<string> savedFiles = new List<string>();
367 55d35561 Eliška Mourycová
			//string subDirectory = RootDataDirectory + sep + type;
368
			//DirectoryInfo di = Directory.CreateDirectory(subDirectory);
369 7a998d66 Eliška Mourycová
370 55d35561 Eliška Mourycová
			//// create subdirectory record if it doesn't exist:
371
			//if (!DataSubDirectories.ContainsKey(type))
372
			//	DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
373 d2d1c86a Eliška Mourycová
374 7a998d66 Eliška Mourycová
375
			Date currentDate = startDate;
376
			bool firstLoop = true;
377
			do
378 d2d1c86a Eliška Mourycová
			{
379 40f56e57 Eliška Mourycová
				//Console.WriteLine("current date: " + currentDate);
380 7a998d66 Eliška Mourycová
				savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
381
				Date nextDate = currentDate.IncreaseMonthByOne();
382
383
				// also try to find the 00 file for each year:
384
				if(nextDate.Year > currentDate.Year || firstLoop)
385 d2d1c86a Eliška Mourycová
				{
386 7a998d66 Eliška Mourycová
					savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
387
					if (firstLoop)
388
						firstLoop = false; // so that we don't download the same thing all the time
389
					
390 d2d1c86a Eliška Mourycová
				}
391 7a998d66 Eliška Mourycová
392
				// assign the increased date to the current date:
393
				currentDate = nextDate;
394
395
396 4a417b8b Eliška Mourycová
			} while (currentDate <= endDate);
397 7a998d66 Eliška Mourycová
398
399
			
400
401
			//for (int y = startYear; y <= endYear; y++)
402
			//{
403
			//	for (int m = startMonth; m <= endMonth; m++)
404
			//	{
405
			//		savedFiles.AddRange(DownloadData(type, format, y, m));
406
			//	}
407
			//}
408 d2d1c86a Eliška Mourycová
409
			return savedFiles;
410
		}
411
412 085453be Eliška Mourycová
413 55d35561 Eliška Mourycová
		/// <summary>
414
		/// Retrieves all data files with dates falling within the specified range. If not all data for the specified range is found
415
		/// then returns also file/s with month 0 if exists.
416 9547fd4a Eliška Mourycová
		/// If startDate and/or endDate are null, then returns all files from the given subdirectory.
417 55d35561 Eliška Mourycová
		/// </summary>
418
		/// <param name="subDirectory">The subdirectory to search</param>
419
		/// <param name="startDate">The start date</param>
420
		/// <param name="endDate">The end date</param>
421 9547fd4a Eliška Mourycová
		/// <returns>A list of all retrieved data files from the requested time span</returns>
422 4a417b8b Eliška Mourycová
		public List<string> GetData(string subDirectory, Date startDate, Date endDate)
423 7a998d66 Eliška Mourycová
		{
424 4129ce12 Eliška Mourycová
			if (startDate == null || endDate == null)
425
				return GetData(subDirectory);
426
427 4a417b8b Eliška Mourycová
			string[] files = Directory.GetFiles(subDirectory);
428 55d35561 Eliška Mourycová
			List<string> found00Files = new List<string>();
429
			List<string> relevantFiles = new List<string>();
430
			List<Date> requestedDates = new List<Date>();
431
432
			// prepare a list of requested dates:
433
			Date currentDate = startDate;
434
			do
435
			{
436
				requestedDates.Add(currentDate);
437
				Date nextDate = currentDate.IncreaseMonthByOne();
438
				// assign the increased date to the current date:
439
				currentDate = nextDate;
440
441
			} while (currentDate <= endDate);
442
443
444
445
			for (int i = 0; i < files.Length; i++)
446 4a417b8b Eliška Mourycová
			{
447 085453be Eliška Mourycová
				string currFileName = Path.GetFileName(files[i]);
448 8800fb76 Eliška Mourycová
				//Console.WriteLine("curr file: " + currFileName);
449 085453be Eliška Mourycová
				string[] splits = currFileName.Split(new char[] { '-', '.' });
450
451
				int month = int.Parse(splits[0]);
452
				int year = int.Parse(splits[1]);
453
454
455 55d35561 Eliška Mourycová
				
456
				if (month == 0)
457
				{
458
					found00Files.Add(files[i]);
459
					continue;
460
				}
461
462 085453be Eliška Mourycová
				Date d = new Date((uint)month, (uint)year);
463
464 55d35561 Eliška Mourycová
				if (d >= startDate && d <= endDate)
465
				{
466
					// we want to add this
467
					relevantFiles.Add(files[i]);
468
					requestedDates.Remove(d);
469
				}
470
			}
471
472
473
474
475
			// 00 will only appear once for every year?
476
			foreach (string file00 in found00Files)
477
			{
478
				string fileName = Path.GetFileName(file00);
479
				string[] splits = fileName.Split(new char[] { '-', '.' });
480 4a417b8b Eliška Mourycová
481 55d35561 Eliška Mourycová
				int month = int.Parse(splits[0]);
482
				int year = int.Parse(splits[1]);
483
484
				// now we have the year of one 00 file
485
				// dates not found in the directory now remain in requested dates
486
				if(requestedDates.Exists(d => d.Year == year))
487
				{
488
					// if the year of this 00 file remains in the list:
489
					relevantFiles.Add(file00);
490
				}
491 4a417b8b Eliška Mourycová
			}
492
493 55d35561 Eliška Mourycová
494
			return relevantFiles;
495 7a998d66 Eliška Mourycová
		}
496
497 4129ce12 Eliška Mourycová
		/// <summary>
498
		/// Returns all file paths in a given directory
499
		/// </summary>
500
		/// <param name="subDirectory"></param>
501
		/// <returns></returns>
502
		private List<string> GetData(string subDirectory)
503
		{
504
			string[] files = Directory.GetFiles(subDirectory);
505
			List<string> relevantFiles = new List<string>(files.Length);
506
507
			for (int i = 0; i < files.Length; i++)
508
				relevantFiles.Add(files[i]);
509
510
			return relevantFiles;
511
		}
512 d2d1c86a Eliška Mourycová
	}
513
}