Projekt

Obecné

Profil

Stáhnout (14.2 KB) Statistiky
| Větev: | Tag: | Revize:
1
//
2
// Author: Eliska Mourycova
3
//
4

    
5
using System;
6
using System.Collections.Generic;
7
using System.IO;
8
using System.IO.Compression;
9
using System.Net;
10

    
11
namespace ServerApp.DataDownload
12
{
13
	/// <summary>
14
	/// Enum representing all of the available data types (not all will be used in this project)
15
	/// They are in Czech for easier handling file names.
16
	/// TBD: They might be translated to English later.
17
	/// </summary>
18
	public enum DataType
19
	{
20
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
21
	}
22

    
23
	/// <summary>
24
	/// Represent all available data formats.
25
	/// </summary>
26
	public enum DataFormat
27
	{
28
		XML, JSON, CSV
29
	}
30

    
31
	/// <summary>
32
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
33
	/// </summary>
34
	public class DataDownloader : IDataDownloader
35
	{
36
		/// <summary>
37
		/// The root directory containing all downloaded data
38
		/// </summary>
39
		public string RootDataDirectory { get; }
40

    
41
		/// <summary>
42
		/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
43
		/// </summary>
44
		public Dictionary<DataType, string> DataSubDirectories { get; }
45

    
46
		/// <summary>
47
		/// Flag stating whether files which already exist should be overwritten when downloaded again
48
		/// </summary>
49
		public bool OverwriteExisting { get; set; }
50

    
51
		// the main site where the data can be downloaded from
52
		private string site;
53

    
54
		// weather prediction site
55
		private string weatherPredictionSite;
56

    
57
		// the substring at the start of every file name
58
		private string dataStr;
59

    
60
		// WebClient instance used for the actual download
61
		private WebClient webClient;
62

    
63
		// a shortcut to writing Path.DirectorySeparatorChar
64
		private char sep = Path.DirectorySeparatorChar;
65

    
66
		// lists used for parsing the file names:
67
		private List<string> separatedFileName;
68
		private List<string> variablesInsertions;
69
		private List<char> nameTurns;
70

    
71
		public DataDownloader(string rootDataDir, string website, string namingConvention, string weatherPredSite) // todo: take naming conventons specifiaction into account
72
		{
73
			// initialize all needed variables:
74

    
75
			//Console.WriteLine(Directory.GetCurrentDirectory());
76
			site = website;//"http://openstore.zcu.cz/";
77
			weatherPredictionSite = weatherPredSite;
78

    
79
			ParseNaming(namingConvention);
80
			dataStr = "OD_ZCU_";
81

    
82
			RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
83
			OverwriteExisting = false;
84

    
85
			DataSubDirectories = new Dictionary<DataType, string>();
86
			CreateSubdirectories(); // todo should we do it like this?
87

    
88
			webClient = new WebClient();
89
		}
90

    
91
		/// <summary>
92
		/// Downloads json file - returns contents of said file
93
		/// </summary>
94
		/// <returns> Path to file </returns>
95
		public string DownloadWeatherPrediction()
96
		{
97
			// TODO either set this path as attribute or if parameter JsonParser needs an attribute that would be set through constructor
98
			//string predictionSite = "http://wttr.in/Plzen,czechia?format=j1";
99

    
100
			//DateTime now = DateTime.Now;
101
			//WebClient webClient = new WebClient();
102
			//webClient.DownloadFile(predictionSite, $"data/{now.Year}{now.Month}{now.Day}.json");
103
			
104
			try
105
			{
106
				return webClient.DownloadString(weatherPredictionSite);// $"data/{now.Year}{now.Month}{now.Day}.json";
107
			}
108
			catch(Exception e)
109
			{
110
				// this shouldn't happen
111
				Console.WriteLine("Weather prediction download failed!");
112
				return null;
113
			}
114
			
115
		}
116

    
117
		/// <summary>
118
		/// Creates subdirectories for all data types
119
		/// TBD if we want to do it this way
120
		/// </summary>
121
		private void CreateSubdirectories()
122
		{
123
			foreach (DataType type in (DataType[])Enum.GetValues(typeof(DataType)))
124
			{
125
				string subDirectory = RootDataDirectory + sep + type;
126
				DirectoryInfo di = Directory.CreateDirectory(subDirectory);
127

    
128
				// create subdirectory record if it doesn't exist:
129
				if (!DataSubDirectories.ContainsKey(type))
130
					DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
131
			}
132
		}
133

    
134
		/// <summary>
135
		/// Parses the naming convention to be later used for determining URLs for data download
136
		/// </summary>
137
		/// <param name="namingConvention">The configured naming convention</param>
138
		private void ParseNaming(string namingConvention)
139
		{
140
			separatedFileName = new List<string>();
141
			variablesInsertions = new List<string>();
142
			nameTurns = new List<char>();
143

    
144
			string currPart = "";
145
			string currVar = "";
146
			bool readingNormal = true;
147
			foreach (char c in namingConvention)
148
			{
149
				if (c == '{')
150
				{
151
					AddToNameParts(currPart);
152
					readingNormal = false;
153
					currPart = "";
154
				}
155
				else if (c == '}')
156
				{
157
					AddToVariables(currVar);
158
					readingNormal = true;
159
					currVar = "";
160
				}
161
				else
162
				{
163
					// normal char
164
					if (readingNormal)
165
						currPart += c;
166
					else
167
						currVar += c;
168
				}
169
			}
170

    
171
			// add the rest if there is any:
172
			if (readingNormal)
173
				AddToNameParts(currPart);
174
			else
175
				AddToVariables(currVar);
176

    
177
			Console.WriteLine();
178
		}
179

    
180
		// Adds to name parts
181
		private void AddToNameParts(string s)
182
		{
183
			if (s.Length > 0)
184
			{
185
				separatedFileName.Add(s);
186
				nameTurns.Add('n');
187
			}
188
				
189
		}
190

    
191
		// Adds to variable name parts
192
		private void AddToVariables(string s)
193
		{
194
			if (s.Length > 0)
195
			{
196
				variablesInsertions.Add(s);
197
				nameTurns.Add('v');
198
			}
199
				
200
		}
201

    
202
		/// <summary>
203
		/// Builds the name of the downloaded file. Takes naming convention into account
204
		/// </summary>
205
		/// <param name="type">The type of data</param>
206
		/// <param name="format">The data format</param>
207
		/// <param name="year">The year</param>
208
		/// <param name="month">The month</param>
209
		/// <returns></returns>
210
		private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
211
		{
212
			string nameZip = "";
213

    
214
			int partInd = 0;
215
			int varInd = 0;
216
			for(int i = 0; i < nameTurns.Count; i++)
217
			{
218
				if (nameTurns[i] == 'n')
219
				{
220
					nameZip += separatedFileName[partInd];
221
					partInd++;
222
				}
223
				else if(nameTurns[i] == 'v')
224
				{
225
					string add = "";
226
					switch (variablesInsertions[varInd])
227
					{
228
						case "type":
229
							add = "" + type;
230
							break;
231
						case "month": 
232
							add = month < 10 ? "0" + month : "" + month;
233
							break;
234
						case "year":
235
							add = "" + year;
236
							break;
237
						case "format":
238
							add = "" + format;
239
							break;
240
						default: throw new Exception("Config file error - naming conventions can only contain variables with the following names: type, month, year, format");
241
					}
242
					nameZip += add;
243
					varInd++;
244
				}
245
			}
246

    
247
			return nameZip;
248
		}
249

    
250

    
251
		/// <summary>
252
		/// Downloads a specific archive.
253
		/// </summary>
254
		/// <param name="type">The type of data</param>
255
		/// <param name="format">The format of the data</param>
256
		/// <param name="year">The year</param>
257
		/// <param name="month">The month</param>
258
		/// <returns>A list of all extracted file names (should be only one)</returns>
259
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
260
		{
261
			// the list of all files potentially relevant to the caller
262
			List<string> extractedFiles = new List<string>();
263

    
264
			// Prepare the url string to be downloaded from:
265
			string monthStr = month < 10 ? "0" + month : "" + month;
266
			string yearStr = "" + year;
267
			string monthYr = monthStr + "_" + yearStr;
268

    
269

    
270
			string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
271
			string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
272
			
273
			string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
274

    
275
			try
276
			{
277
				//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
278

    
279
				// Download the zip file:
280
				webClient.DownloadFile(url, nameZip);
281

    
282
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
283
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
284
				// Go through all the extracted files:
285
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
286
				{
287
					// get the relative path to the file:
288
					string newFileName = $"{month}-{year}.{format}";
289
					string extractedFile = nameFolder + newFileName; //+ entry.Name;
290

    
291
					// add full path to the list:
292
					extractedFiles.Add(Path.GetFullPath(extractedFile));
293

    
294
					if (OverwriteExisting)
295
					{
296
						// if overwrite is desired, execute it:
297
						entry.ExtractToFile(extractedFile, OverwriteExisting);
298
						
299
					}
300
					else
301
					{
302
						// if overwrite is not desired, check if the file exists first:
303
						if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
304
						{
305
							continue;
306
						}
307
						else
308
						{
309
							// if it doesn't exist, save it:
310
							entry.ExtractToFile(extractedFile, OverwriteExisting);
311
						}
312
					}
313

    
314
					
315
				}
316
				// dispose of the archive:
317
				zipArchive.Dispose();
318

    
319
				//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
320
				//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
321
				//Console.WriteLine("Finished downloading " + nameZip);
322

    
323
				// delete the previously downloaded zip file, files contained in it have been extracted:
324
				File.Delete(nameZip); // todo check?
325

    
326
			}
327
			catch(System.Net.WebException we)
328
			{
329
				// download fails, if the specified url is invalid
330
				//Console.WriteLine("Download from " + url + " failed.");
331
				//Console.WriteLine(we.Message);
332
			}
333

    
334

    
335

    
336
			return extractedFiles;
337
		}
338

    
339

    
340

    
341
		/// <summary>
342
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
343
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
344
		/// </summary>
345
		/// <param name="type">The requested data type</param>
346
		/// <param name="format">The data format</param>
347
		/// <param name="startDate">The start date</param>
348
		/// <param name="endDate">The end date</param>
349
		/// <returns>A list of full paths to all saved files</returns>
350
		public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate)
351
		{
352
			if (startDate > endDate)
353
				throw new ArgumentException("startDate must be the same as or before the endDate.");
354

    
355
			// initialize:
356
			List<string> savedFiles = new List<string>();
357
			//string subDirectory = RootDataDirectory + sep + type;
358
			//DirectoryInfo di = Directory.CreateDirectory(subDirectory);
359

    
360
			//// create subdirectory record if it doesn't exist:
361
			//if (!DataSubDirectories.ContainsKey(type))
362
			//	DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
363

    
364

    
365
			Date currentDate = startDate;
366
			bool firstLoop = true;
367
			do
368
			{
369
				//Console.WriteLine("current date: " + currentDate);
370
				savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
371
				Date nextDate = currentDate.IncreaseMonthByOne();
372

    
373
				// also try to find the 00 file for each year:
374
				if(nextDate.Year > currentDate.Year || firstLoop)
375
				{
376
					savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
377
					if (firstLoop)
378
						firstLoop = false; // so that we don't download the same thing all the time
379
					
380
				}
381

    
382
				// assign the increased date to the current date:
383
				currentDate = nextDate;
384

    
385

    
386
			} while (currentDate <= endDate);
387

    
388

    
389
			
390

    
391
			//for (int y = startYear; y <= endYear; y++)
392
			//{
393
			//	for (int m = startMonth; m <= endMonth; m++)
394
			//	{
395
			//		savedFiles.AddRange(DownloadData(type, format, y, m));
396
			//	}
397
			//}
398

    
399
			return savedFiles;
400
		}
401

    
402
		public bool CheckForNewData()
403
		{
404
			throw new NotImplementedException();
405
		}
406

    
407

    
408
		/// <summary>
409
		/// Retrieves all data files with dates falling within the specified range. If not all data for the specified range is found
410
		/// then returns also file/s with month 0 if exists.
411
		/// If startDate and/or endDate are null, then returns all files from the given subdirectory.
412
		/// </summary>
413
		/// <param name="subDirectory">The subdirectory to search</param>
414
		/// <param name="startDate">The start date</param>
415
		/// <param name="endDate">The end date</param>
416
		/// <returns>A list of all retrieved data files from the requested time span</returns>
417
		public List<string> GetData(string subDirectory, Date startDate, Date endDate)
418
		{
419
			if (startDate == null || endDate == null)
420
				return GetData(subDirectory);
421

    
422
			string[] files = Directory.GetFiles(subDirectory);
423
			List<string> found00Files = new List<string>();
424
			List<string> relevantFiles = new List<string>();
425
			List<Date> requestedDates = new List<Date>();
426

    
427
			// prepare a list of requested dates:
428
			Date currentDate = startDate;
429
			do
430
			{
431
				requestedDates.Add(currentDate);
432
				Date nextDate = currentDate.IncreaseMonthByOne();
433
				// assign the increased date to the current date:
434
				currentDate = nextDate;
435

    
436
			} while (currentDate <= endDate);
437

    
438

    
439

    
440
			for (int i = 0; i < files.Length; i++)
441
			{
442
				string currFileName = Path.GetFileName(files[i]);
443
				Console.WriteLine("curr file: " + currFileName);
444
				string[] splits = currFileName.Split(new char[] { '-', '.' });
445

    
446
				int month = int.Parse(splits[0]);
447
				int year = int.Parse(splits[1]);
448

    
449

    
450
				
451
				if (month == 0)
452
				{
453
					found00Files.Add(files[i]);
454
					continue;
455
				}
456

    
457
				Date d = new Date((uint)month, (uint)year);
458

    
459
				if (d >= startDate && d <= endDate)
460
				{
461
					// we want to add this
462
					relevantFiles.Add(files[i]);
463
					requestedDates.Remove(d);
464
				}
465
			}
466

    
467

    
468

    
469

    
470
			// 00 will only appear once for every year?
471
			foreach (string file00 in found00Files)
472
			{
473
				string fileName = Path.GetFileName(file00);
474
				string[] splits = fileName.Split(new char[] { '-', '.' });
475

    
476
				int month = int.Parse(splits[0]);
477
				int year = int.Parse(splits[1]);
478

    
479
				// now we have the year of one 00 file
480
				// dates not found in the directory now remain in requested dates
481
				if(requestedDates.Exists(d => d.Year == year))
482
				{
483
					// if the year of this 00 file remains in the list:
484
					relevantFiles.Add(file00);
485
				}
486
			}
487

    
488

    
489
			return relevantFiles;
490
		}
491

    
492
		/// <summary>
493
		/// Returns all file paths in a given directory
494
		/// </summary>
495
		/// <param name="subDirectory"></param>
496
		/// <returns></returns>
497
		private List<string> GetData(string subDirectory)
498
		{
499
			string[] files = Directory.GetFiles(subDirectory);
500
			List<string> relevantFiles = new List<string>(files.Length);
501

    
502
			for (int i = 0; i < files.Length; i++)
503
				relevantFiles.Add(files[i]);
504

    
505
			return relevantFiles;
506
		}
507
	}
508
}
(1-1/3)