Projekt

Obecné

Profil

Stáhnout (10.6 KB) Statistiky
| Větev: | Tag: | Revize:
1
//
2
// Author: Eliska Mourycova
3
//
4

    
5
using System;
6
using System.Collections.Generic;
7
using System.IO;
8
using System.IO.Compression;
9
using System.Net;
10

    
11
namespace ServerApp.DataDownload
12
{
13
	/// <summary>
14
	/// Enum representing all of the available data types (not all will be used in this project)
15
	/// They are in Czech for easier handling file names.
16
	/// TBD: They might be translated to English later.
17
	/// </summary>
18
	public enum DataType
19
	{
20
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
21
	}
22

    
23
	/// <summary>
24
	/// Represent all available data formats.
25
	/// </summary>
26
	public enum DataFormat
27
	{
28
		XML, JSON, CSV
29
	}
30

    
31
	/// <summary>
32
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
33
	/// </summary>
34
	public class DataDownloader
35
	{
36
		/// <summary>
37
		/// The root directory containing all downloaded data
38
		/// </summary>
39
		public string RootDataDirectory { get; }
40

    
41
		/// <summary>
42
		/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
43
		/// </summary>
44
		public Dictionary<DataType, string> DataSubDirectories { get; }
45

    
46
		/// <summary>
47
		/// Flag stating whether files which already exist should be overwritten when downloaded again
48
		/// </summary>
49
		public bool OverwriteExisting { get; set; }
50

    
51
		// the main site where the data can be downloaded from
52
		private string site;
53

    
54
		// the substring at the start of every file name
55
		private string dataStr;
56

    
57
		// WebClient instance used for the actual download
58
		private WebClient webClient;
59

    
60
		// a shortcut to writing Path.DirectorySeparatorChar
61
		private char sep = Path.DirectorySeparatorChar;
62

    
63
		private List<string> separatedFileName;
64
		private List<string> variablesInsertions;
65
		private List<char> nameTurns;
66

    
67
		public DataDownloader(string rootDataDir, string website, string namingConvention) // todo: take naming conventons specifiaction into account
68
		{
69
			// initialize all needed variables:
70

    
71
			DataSubDirectories = new Dictionary<DataType, string>();
72
			
73
			//Console.WriteLine(Directory.GetCurrentDirectory());
74
			site = website;//"http://openstore.zcu.cz/";
75

    
76
			ParseNaming(namingConvention);
77
			dataStr = "OD_ZCU_";
78

    
79
			RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
80
			OverwriteExisting = false;
81

    
82
			webClient = new WebClient();
83
		}
84

    
85
		private void ParseNaming(string namingConvention)
86
		{
87
			separatedFileName = new List<string>();
88
			variablesInsertions = new List<string>();
89
			nameTurns = new List<char>();
90

    
91
			string currPart = "";
92
			string currVar = "";
93
			bool readingNormal = true;
94
			foreach (char c in namingConvention)
95
			{
96
				if (c == '{')
97
				{
98
					AddToNameParts(currPart);
99
					readingNormal = false;
100
					currPart = "";
101
				}
102
				else if (c == '}')
103
				{
104
					AddToVariables(currVar);
105
					readingNormal = true;
106
					currVar = "";
107
				}
108
				else
109
				{
110
					// normal char
111
					if (readingNormal)
112
						currPart += c;
113
					else
114
						currVar += c;
115
				}
116
			}
117

    
118
			// add the rest if there is any:
119
			if (readingNormal)
120
				AddToNameParts(currPart);
121
			else
122
				AddToVariables(currVar);
123

    
124
			Console.WriteLine();
125
		}
126

    
127
		private void AddToNameParts(string s)
128
		{
129
			if (s.Length > 0)
130
			{
131
				separatedFileName.Add(s);
132
				nameTurns.Add('n');
133
			}
134
				
135
		}
136

    
137
		private void AddToVariables(string s)
138
		{
139
			if (s.Length > 0)
140
			{
141
				variablesInsertions.Add(s);
142
				nameTurns.Add('v');
143
			}
144
				
145
		}
146

    
147

    
148
		private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
149
		{
150
			string nameZip = "";
151

    
152
			int partInd = 0;
153
			int varInd = 0;
154
			for(int i = 0; i < nameTurns.Count; i++)
155
			{
156
				if (nameTurns[i] == 'n')
157
				{
158
					nameZip += separatedFileName[partInd];
159
					partInd++;
160
				}
161
				else if(nameTurns[i] == 'v')
162
				{
163
					string add = "";
164
					switch (variablesInsertions[varInd])
165
					{
166
						case "type":
167
							add = "" + type;
168
							break;
169
						case "month": 
170
							add = month < 10 ? "0" + month : "" + month;
171
							break;
172
						case "year":
173
							add = "" + year;
174
							break;
175
						case "format":
176
							add = "" + format;
177
							break;
178
						default: throw new Exception("Config file error - naming conventions can only contain variables with following names: type, month, year, format");
179
					}
180
					nameZip += add;
181
					varInd++;
182
				}
183
			}
184

    
185
			return nameZip;
186
		}
187

    
188

    
189
		private void ExtractDateFromFileName(string fileName, ref int month, ref int year)
190
		{
191

    
192
		}
193

    
194
		/// <summary>
195
		/// Downloads a specific archive.
196
		/// </summary>
197
		/// <param name="type">The type of data</param>
198
		/// <param name="format">The format of the data</param>
199
		/// <param name="year">The year</param>
200
		/// <param name="month">The month</param>
201
		/// <returns>A list of all extracted file names (should be only one)</returns>
202
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
203
		{
204
			// the list of all files potentially relevant to the caller
205
			List<string> extractedFiles = new List<string>();
206

    
207
			// Prepare the url string to be downloaded from:
208
			string monthStr = month < 10 ? "0" + month : "" + month;
209
			string yearStr = "" + year;
210
			string monthYr = monthStr + "_" + yearStr;
211

    
212

    
213
			string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
214
			string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
215
			
216
			string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
217

    
218
			try
219
			{
220
				//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
221

    
222
				// Download the zip file:
223
				webClient.DownloadFile(url, nameZip);
224

    
225
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
226
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
227
				// Go through all the extracted files:
228
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
229
				{
230
					// get the relative path to the file:
231
					string newFileName = $"{month}-{year}.{format}";
232
					string extractedFile = nameFolder + newFileName; //+ entry.Name;
233

    
234
					// add full path to the list:
235
					extractedFiles.Add(Path.GetFullPath(extractedFile));
236

    
237
					if (OverwriteExisting)
238
					{
239
						// if overwrite is desired, execute it:
240
						entry.ExtractToFile(extractedFile, OverwriteExisting);
241
						
242
					}
243
					else
244
					{
245
						// if overwrite is not desired, check if the file exists first:
246
						if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
247
						{
248
							continue;
249
						}
250
						else
251
						{
252
							// if it doesn't exist, save it:
253
							entry.ExtractToFile(extractedFile, OverwriteExisting);
254
						}
255
					}
256

    
257
					
258
				}
259
				// dispose of the archive:
260
				zipArchive.Dispose();
261

    
262
				//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
263
				//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
264
				//Console.WriteLine("Finished downloading " + nameZip);
265

    
266
				// delete the previously downloaded zip file, files contained in it have been extracted:
267
				File.Delete(nameZip); // todo check?
268

    
269
			}
270
			catch(System.Net.WebException we)
271
			{
272
				// download fails, if the specified url is invalid
273
				//Console.WriteLine("Download from " + url + " failed.");
274
				//Console.WriteLine(we.Message);
275
			}
276

    
277

    
278

    
279
			return extractedFiles;
280
		}
281

    
282

    
283

    
284
		/// <summary>
285
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
286
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
287
		/// </summary>
288
		public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate/*int startYear, int endYear, int startMonth, int endMonth*/)
289
		{
290
			if (startDate > endDate)
291
				throw new ArgumentException("startDate must be the same as or before the endDate.");
292

    
293
			// initialize:
294
			List<string> savedFiles = new List<string>();
295
			string subDirectory = RootDataDirectory + sep + type;
296
			DirectoryInfo di = Directory.CreateDirectory(subDirectory);
297

    
298
			// create subdirectory record if it doesn't exist:
299
			if (!DataSubDirectories.ContainsKey(type))
300
				DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
301

    
302

    
303
			Date currentDate = startDate;
304
			bool firstLoop = true;
305
			do
306
			{
307
				Console.WriteLine("current date: " + currentDate);
308
				savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
309
				Date nextDate = currentDate.IncreaseMonthByOne();
310

    
311
				// also try to find the 00 file for each year:
312
				if(nextDate.Year > currentDate.Year || firstLoop)
313
				{
314
					savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
315
					if (firstLoop)
316
						firstLoop = false; // so that we don't download the same thing all the time
317
					
318
				}
319

    
320
				// assign the increased date to the current date:
321
				currentDate = nextDate;
322

    
323

    
324
			} while (currentDate <= endDate);
325

    
326

    
327
			
328

    
329
			//for (int y = startYear; y <= endYear; y++)
330
			//{
331
			//	for (int m = startMonth; m <= endMonth; m++)
332
			//	{
333
			//		savedFiles.AddRange(DownloadData(type, format, y, m));
334
			//	}
335
			//}
336

    
337
			return savedFiles;
338
		}
339

    
340
		public bool CheckForNewData()
341
		{
342
			throw new NotImplementedException();
343
		}
344

    
345

    
346

    
347
		public List<string> GetData(string subDirectory, Date startDate, Date endDate)
348
		{
349
			string[] files = Directory.GetFiles(subDirectory);
350
			for(int i = 0; i < files.Length; i++)
351
			{
352
				string currFileName = Path.GetFileName(files[i]);
353
				Console.WriteLine("curr file: " + currFileName);
354
				string[] splits = currFileName.Split(new char[] { '-', '.' });
355

    
356
				int month = int.Parse(splits[0]);
357
				int year = int.Parse(splits[1]);
358

    
359

    
360
				Date d = new Date((uint)month, (uint)year);
361

    
362

    
363
			}
364

    
365
			throw new NotImplementedException();
366
		}
367

    
368

    
369
		#region UNUSED
370
		//public string GetDirectoryListingRegexForUrl(string url)
371
		//{
372
		//	if (url.Equals(site))
373
		//	{
374
		//		//return "\\\"([^\"]*)\\\"";
375
		//		return @"\bOD_ZCU_\w*\b";
376
		//		//return @"\A[OD_ZCU_]";
377
		//	}
378
		//	else return null;
379
		//}
380
		//public void ListDirectory()
381
		//{
382
		//	string url = site;
383
		//	HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
384
		//	using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
385
		//	{
386
		//		using (StreamReader reader = new StreamReader(response.GetResponseStream()))
387
		//		{
388
		//			string html = reader.ReadToEnd();
389

    
390

    
391

    
392
		//			Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
393
		//			MatchCollection matches = regex.Matches(html);
394
		//			Console.WriteLine(matches.Count);
395

    
396
		//			if (matches.Count > 0)
397
		//			{
398
		//				foreach (Match match in matches)
399
		//				{
400
		//					//if (match.Success)
401
		//					//{
402
		//						Console.WriteLine(match.ToString());
403
		//					//}
404
		//				}
405
		//			}
406
		//		}
407
		//		Console.ReadLine();
408
		//	}
409
		//}
410

    
411
		#endregion
412
	}
413
}
(1-1/2)