Projekt

Obecné

Profil

Stáhnout (14.4 KB) Statistiky
| Větev: | Tag: | Revize:
1
//
2
// Author: Eliska Mourycova
3
//
4

    
5
using log4net;
6
using System;
7
using System.Collections.Generic;
8
using System.IO;
9
using System.IO.Compression;
10
using System.Net;
11

    
12
namespace ServerApp.DataDownload
13
{
14
	/// <summary>
15
	/// Enum representing all of the available data types (not all will be used in this project)
16
	/// They are in Czech for easier handling file names.
17
	/// TBD: They might be translated to English later.
18
	/// </summary>
19
	public enum DataType
20
	{
21
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
22
	}
23

    
24
	/// <summary>
25
	/// Represent all available data formats.
26
	/// </summary>
27
	public enum DataFormat
28
	{
29
		XML, JSON, CSV
30
	}
31

    
32
	/// <summary>
33
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
34
	/// </summary>
35
	public class DataDownloader : IDataDownloader
36
	{
37
		/// <summary>
38
		/// The root directory containing all downloaded data
39
		/// </summary>
40
		public string RootDataDirectory { get; }
41

    
42
		/// <summary>
43
		/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
44
		/// </summary>
45
		public Dictionary<DataType, string> DataSubDirectories { get; }
46

    
47
		/// <summary>
48
		/// Flag stating whether files which already exist should be overwritten when downloaded again
49
		/// </summary>
50
		public bool OverwriteExisting { get; set; }
51

    
52
		// the main site where the data can be downloaded from
53
		private string site;
54

    
55
		// weather prediction site
56
		private string weatherPredictionSite;
57

    
58
		// the substring at the start of every file name
59
		private string dataStr;
60

    
61
		// WebClient instance used for the actual download
62
		private WebClient webClient;
63

    
64
		// a shortcut to writing Path.DirectorySeparatorChar
65
		private char sep = Path.DirectorySeparatorChar;
66

    
67
		// lists used for parsing the file names:
68
		private List<string> separatedFileName;
69
		private List<string> variablesInsertions;
70
		private List<char> nameTurns;
71

    
72
		// logger class instance
73
		private static readonly ILog logger = LogManager.GetLogger(typeof(DataDownloader));
74

    
75
		public DataDownloader(string rootDataDir, string website, string namingConvention, string weatherPredSite) // todo: take naming conventons specifiaction into account
76
		{
77
			// initialize all needed variables:
78

    
79
			//Console.WriteLine(Directory.GetCurrentDirectory());
80
			site = website;//"http://openstore.zcu.cz/";
81
			weatherPredictionSite = weatherPredSite;
82

    
83
			ParseNaming(namingConvention);
84
			dataStr = "OD_ZCU_";
85

    
86
			RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
87
			OverwriteExisting = false;
88

    
89
			DataSubDirectories = new Dictionary<DataType, string>();
90
			CreateSubdirectories(); // todo should we do it like this?
91

    
92
			webClient = new WebClient();
93
		}
94

    
95
		/// <summary>
96
		/// Downloads json file - returns contents of said file
97
		/// </summary>
98
		/// <returns> String containing the weather prediction </returns>
99
		public string DownloadWeatherPrediction()
100
		{
101
			// TODO either set this path as attribute or if parameter JsonParser needs an attribute that would be set through constructor
102
			//string predictionSite = "http://wttr.in/Plzen,czechia?format=j1";
103

    
104
			//DateTime now = DateTime.Now;
105
			//WebClient webClient = new WebClient();
106
			//webClient.DownloadFile(predictionSite, $"data/{now.Year}{now.Month}{now.Day}.json");
107
			
108
			try
109
			{
110
				return webClient.DownloadString(weatherPredictionSite);// $"data/{now.Year}{now.Month}{now.Day}.json";
111
			}
112
			catch(Exception e)
113
			{
114
				// this shouldn't happen
115
				//Console.WriteLine("Weather prediction download failed!");
116
				logger.Error("Weather prediction download failed!");
117
				return null;
118
			}
119
			
120
		}
121

    
122
		/// <summary>
123
		/// Creates subdirectories for all data types
124
		/// TBD if we want to do it this way
125
		/// </summary>
126
		private void CreateSubdirectories()
127
		{
128
			foreach (DataType type in (DataType[])Enum.GetValues(typeof(DataType)))
129
			{
130
				string subDirectory = RootDataDirectory + sep + type;
131
				DirectoryInfo di = Directory.CreateDirectory(subDirectory);
132

    
133
				// create subdirectory record if it doesn't exist:
134
				if (!DataSubDirectories.ContainsKey(type))
135
					DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
136
			}
137
		}
138

    
139
		/// <summary>
140
		/// Parses the naming convention to be later used for determining URLs for data download
141
		/// </summary>
142
		/// <param name="namingConvention">The configured naming convention</param>
143
		private void ParseNaming(string namingConvention)
144
		{
145
			separatedFileName = new List<string>();
146
			variablesInsertions = new List<string>();
147
			nameTurns = new List<char>();
148

    
149
			string currPart = "";
150
			string currVar = "";
151
			bool readingNormal = true;
152
			foreach (char c in namingConvention)
153
			{
154
				if (c == '{')
155
				{
156
					AddToNameParts(currPart);
157
					readingNormal = false;
158
					currPart = "";
159
				}
160
				else if (c == '}')
161
				{
162
					AddToVariables(currVar);
163
					readingNormal = true;
164
					currVar = "";
165
				}
166
				else
167
				{
168
					// normal char
169
					if (readingNormal)
170
						currPart += c;
171
					else
172
						currVar += c;
173
				}
174
			}
175

    
176
			// add the rest if there is any:
177
			if (readingNormal)
178
				AddToNameParts(currPart);
179
			else
180
				AddToVariables(currVar);
181

    
182
			//Console.WriteLine();
183
		}
184

    
185
		// Adds to name parts
186
		private void AddToNameParts(string s)
187
		{
188
			if (s.Length > 0)
189
			{
190
				separatedFileName.Add(s);
191
				nameTurns.Add('n');
192
			}
193
				
194
		}
195

    
196
		// Adds to variable name parts
197
		private void AddToVariables(string s)
198
		{
199
			if (s.Length > 0)
200
			{
201
				variablesInsertions.Add(s);
202
				nameTurns.Add('v');
203
			}
204
				
205
		}
206

    
207
		/// <summary>
208
		/// Builds the name of the downloaded file. Takes naming convention into account
209
		/// </summary>
210
		/// <param name="type">The type of data</param>
211
		/// <param name="format">The data format</param>
212
		/// <param name="year">The year</param>
213
		/// <param name="month">The month</param>
214
		/// <returns></returns>
215
		private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
216
		{
217
			string nameZip = "";
218

    
219
			int partInd = 0;
220
			int varInd = 0;
221
			for(int i = 0; i < nameTurns.Count; i++)
222
			{
223
				if (nameTurns[i] == 'n')
224
				{
225
					nameZip += separatedFileName[partInd];
226
					partInd++;
227
				}
228
				else if(nameTurns[i] == 'v')
229
				{
230
					string add = "";
231
					switch (variablesInsertions[varInd])
232
					{
233
						case "type":
234
							add = "" + type;
235
							break;
236
						case "month": 
237
							add = month < 10 ? "0" + month : "" + month;
238
							break;
239
						case "year":
240
							add = "" + year;
241
							break;
242
						case "format":
243
							add = "" + format;
244
							break;
245
						default: throw new Exception("Config file error - naming conventions can only contain variables with the following names: type, month, year, format");
246
					}
247
					nameZip += add;
248
					varInd++;
249
				}
250
			}
251

    
252
			return nameZip;
253
		}
254

    
255

    
256
		/// <summary>
257
		/// Downloads a specific archive.
258
		/// </summary>
259
		/// <param name="type">The type of data</param>
260
		/// <param name="format">The format of the data</param>
261
		/// <param name="year">The year</param>
262
		/// <param name="month">The month</param>
263
		/// <returns>A list of all extracted file names (should be only one)</returns>
264
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
265
		{
266
			// the list of all files potentially relevant to the caller
267
			List<string> extractedFiles = new List<string>();
268

    
269
			// Prepare the url string to be downloaded from:
270
			string monthStr = month < 10 ? "0" + month : "" + month;
271
			string yearStr = "" + year;
272
			string monthYr = monthStr + "_" + yearStr;
273

    
274

    
275
			string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
276
			string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
277
			
278
			string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
279

    
280
			try
281
			{
282
				//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
283

    
284
				// Download the zip file:
285
				webClient.DownloadFile(url, nameZip);
286

    
287
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
288
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
289
				// Go through all the extracted files:
290
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
291
				{
292
					// get the relative path to the file:
293
					string newFileName = $"{month}-{year}.{format}";
294
					string extractedFile = nameFolder + newFileName; //+ entry.Name;
295

    
296
					// add full path to the list:
297
					extractedFiles.Add(Path.GetFullPath(extractedFile));
298

    
299
					if (OverwriteExisting)
300
					{
301
						// if overwrite is desired, execute it:
302
						entry.ExtractToFile(extractedFile, OverwriteExisting);
303
						
304
					}
305
					else
306
					{
307
						// if overwrite is not desired, check if the file exists first:
308
						if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
309
						{
310
							continue;
311
						}
312
						else
313
						{
314
							// if it doesn't exist, save it:
315
							entry.ExtractToFile(extractedFile, OverwriteExisting);
316
						}
317
					}
318

    
319
					
320
				}
321
				// dispose of the archive:
322
				zipArchive.Dispose();
323

    
324
				//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
325
				//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
326
				//Console.WriteLine("Finished downloading " + nameZip);
327

    
328
				// delete the previously downloaded zip file, files contained in it have been extracted:
329
				File.Delete(nameZip); // todo check?
330

    
331
			}
332
			catch(System.Net.WebException we)
333
			{
334
				// download fails, if the specified url is invalid
335
				//Console.WriteLine("Download from " + url + " failed.");
336
				//Console.WriteLine(we.Message);
337
			}
338

    
339

    
340

    
341
			return extractedFiles;
342
		}
343

    
344

    
345

    
346
		/// <summary>
347
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
348
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
349
		/// </summary>
350
		/// <param name="type">The requested data type</param>
351
		/// <param name="format">The data format</param>
352
		/// <param name="startDate">The start date</param>
353
		/// <param name="endDate">The end date</param>
354
		/// <returns>A list of full paths to all saved files</returns>
355
		public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate)
356
		{
357
			if (startDate > endDate)
358
				throw new ArgumentException("startDate must be the same as or before the endDate.");
359

    
360
			// initialize:
361
			List<string> savedFiles = new List<string>();
362
			//string subDirectory = RootDataDirectory + sep + type;
363
			//DirectoryInfo di = Directory.CreateDirectory(subDirectory);
364

    
365
			//// create subdirectory record if it doesn't exist:
366
			//if (!DataSubDirectories.ContainsKey(type))
367
			//	DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
368

    
369

    
370
			Date currentDate = startDate;
371
			bool firstLoop = true;
372
			do
373
			{
374
				//Console.WriteLine("current date: " + currentDate);
375
				savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
376
				Date nextDate = currentDate.IncreaseMonthByOne();
377

    
378
				// also try to find the 00 file for each year:
379
				if(nextDate.Year > currentDate.Year || firstLoop)
380
				{
381
					savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
382
					if (firstLoop)
383
						firstLoop = false; // so that we don't download the same thing all the time
384
					
385
				}
386

    
387
				// assign the increased date to the current date:
388
				currentDate = nextDate;
389

    
390

    
391
			} while (currentDate <= endDate);
392

    
393

    
394
			
395

    
396
			//for (int y = startYear; y <= endYear; y++)
397
			//{
398
			//	for (int m = startMonth; m <= endMonth; m++)
399
			//	{
400
			//		savedFiles.AddRange(DownloadData(type, format, y, m));
401
			//	}
402
			//}
403

    
404
			return savedFiles;
405
		}
406

    
407
		public bool CheckForNewData()
408
		{
409
			throw new NotImplementedException();
410
		}
411

    
412

    
413
		/// <summary>
414
		/// Retrieves all data files with dates falling within the specified range. If not all data for the specified range is found
415
		/// then returns also file/s with month 0 if exists.
416
		/// If startDate and/or endDate are null, then returns all files from the given subdirectory.
417
		/// </summary>
418
		/// <param name="subDirectory">The subdirectory to search</param>
419
		/// <param name="startDate">The start date</param>
420
		/// <param name="endDate">The end date</param>
421
		/// <returns>A list of all retrieved data files from the requested time span</returns>
422
		public List<string> GetData(string subDirectory, Date startDate, Date endDate)
423
		{
424
			if (startDate == null || endDate == null)
425
				return GetData(subDirectory);
426

    
427
			string[] files = Directory.GetFiles(subDirectory);
428
			List<string> found00Files = new List<string>();
429
			List<string> relevantFiles = new List<string>();
430
			List<Date> requestedDates = new List<Date>();
431

    
432
			// prepare a list of requested dates:
433
			Date currentDate = startDate;
434
			do
435
			{
436
				requestedDates.Add(currentDate);
437
				Date nextDate = currentDate.IncreaseMonthByOne();
438
				// assign the increased date to the current date:
439
				currentDate = nextDate;
440

    
441
			} while (currentDate <= endDate);
442

    
443

    
444

    
445
			for (int i = 0; i < files.Length; i++)
446
			{
447
				string currFileName = Path.GetFileName(files[i]);
448
				//Console.WriteLine("curr file: " + currFileName);
449
				string[] splits = currFileName.Split(new char[] { '-', '.' });
450

    
451
				int month = int.Parse(splits[0]);
452
				int year = int.Parse(splits[1]);
453

    
454

    
455
				
456
				if (month == 0)
457
				{
458
					found00Files.Add(files[i]);
459
					continue;
460
				}
461

    
462
				Date d = new Date((uint)month, (uint)year);
463

    
464
				if (d >= startDate && d <= endDate)
465
				{
466
					// we want to add this
467
					relevantFiles.Add(files[i]);
468
					requestedDates.Remove(d);
469
				}
470
			}
471

    
472

    
473

    
474

    
475
			// 00 will only appear once for every year?
476
			foreach (string file00 in found00Files)
477
			{
478
				string fileName = Path.GetFileName(file00);
479
				string[] splits = fileName.Split(new char[] { '-', '.' });
480

    
481
				int month = int.Parse(splits[0]);
482
				int year = int.Parse(splits[1]);
483

    
484
				// now we have the year of one 00 file
485
				// dates not found in the directory now remain in requested dates
486
				if(requestedDates.Exists(d => d.Year == year))
487
				{
488
					// if the year of this 00 file remains in the list:
489
					relevantFiles.Add(file00);
490
				}
491
			}
492

    
493

    
494
			return relevantFiles;
495
		}
496

    
497
		/// <summary>
498
		/// Returns all file paths in a given directory
499
		/// </summary>
500
		/// <param name="subDirectory"></param>
501
		/// <returns></returns>
502
		private List<string> GetData(string subDirectory)
503
		{
504
			string[] files = Directory.GetFiles(subDirectory);
505
			List<string> relevantFiles = new List<string>(files.Length);
506

    
507
			for (int i = 0; i < files.Length; i++)
508
				relevantFiles.Add(files[i]);
509

    
510
			return relevantFiles;
511
		}
512
	}
513
}
(1-1/3)