Projekt

Obecné

Profil

Stáhnout (10.6 KB) Statistiky
| Větev: | Tag: | Revize:
1 34bf7aa2 Eliška Mourycová
//
2
// Author: Eliska Mourycova
3
//
4
5
using System;
6 d2d1c86a Eliška Mourycová
using System.Collections.Generic;
7
using System.IO;
8
using System.IO.Compression;
9
using System.Net;
10
11 7a998d66 Eliška Mourycová
namespace ServerApp.DataDownload
12 d2d1c86a Eliška Mourycová
{
13 34bf7aa2 Eliška Mourycová
	/// <summary>
14
	/// Enum representing all of the available data types (not all will be used in this project)
15
	/// They are in Czech for easier handling file names.
16
	/// TBD: They might be translated to English later.
17
	/// </summary>
18 d2d1c86a Eliška Mourycová
	public enum DataType
19
	{
20
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
21
	}
22 34bf7aa2 Eliška Mourycová
23
	/// <summary>
24
	/// Represent all available data formats.
25
	/// </summary>
26 d2d1c86a Eliška Mourycová
	public enum DataFormat
27
	{
28
		XML, JSON, CSV
29
	}
30
31 34bf7aa2 Eliška Mourycová
	/// <summary>
32
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
33
	/// </summary>
34 d2d1c86a Eliška Mourycová
	public class DataDownloader
35
	{
36 34bf7aa2 Eliška Mourycová
		/// <summary>
37
		/// The root directory containing all downloaded data
38
		/// </summary>
39 7a998d66 Eliška Mourycová
		public string RootDataDirectory { get; }
40
41
		/// <summary>
42
		/// For a DataType key returns full (absolute) path to a direcotry, where this type of data is stored
43
		/// </summary>
44
		public Dictionary<DataType, string> DataSubDirectories { get; }
45 34bf7aa2 Eliška Mourycová
46
		/// <summary>
47
		/// Flag stating whether files which already exist should be overwritten when downloaded again
48
		/// </summary>
49 d2d1c86a Eliška Mourycová
		public bool OverwriteExisting { get; set; }
50
51 34bf7aa2 Eliška Mourycová
		// the main site where the data can be downloaded from
52 d2d1c86a Eliška Mourycová
		private string site;
53 34bf7aa2 Eliška Mourycová
54
		// the substring at the start of every file name
55 d2d1c86a Eliška Mourycová
		private string dataStr;
56 34bf7aa2 Eliška Mourycová
57
		// WebClient instance used for the actual download
58 d2d1c86a Eliška Mourycová
		private WebClient webClient;
59 34bf7aa2 Eliška Mourycová
60
		// a shortcut to writing Path.DirectorySeparatorChar
61 d2d1c86a Eliška Mourycová
		private char sep = Path.DirectorySeparatorChar;
62
63 4a417b8b Eliška Mourycová
		private List<string> separatedFileName;
64
		private List<string> variablesInsertions;
65
		private List<char> nameTurns;
66
67
		public DataDownloader(string rootDataDir, string website, string namingConvention) // todo: take naming conventons specifiaction into account
68 d2d1c86a Eliška Mourycová
		{
69 34bf7aa2 Eliška Mourycová
			// initialize all needed variables:
70
71 7a998d66 Eliška Mourycová
			DataSubDirectories = new Dictionary<DataType, string>();
72 4a417b8b Eliška Mourycová
			
73
			//Console.WriteLine(Directory.GetCurrentDirectory());
74 7a998d66 Eliška Mourycová
			site = website;//"http://openstore.zcu.cz/";
75 4a417b8b Eliška Mourycová
76
			ParseNaming(namingConvention);
77 d2d1c86a Eliška Mourycová
			dataStr = "OD_ZCU_";
78
79 7a998d66 Eliška Mourycová
			RootDataDirectory = rootDataDir;//$"..{sep}..{sep}..{sep}data{sep}auto";
80 d2d1c86a Eliška Mourycová
			OverwriteExisting = false;
81
82
			webClient = new WebClient();
83
		}
84
85 4a417b8b Eliška Mourycová
		private void ParseNaming(string namingConvention)
86
		{
87
			separatedFileName = new List<string>();
88
			variablesInsertions = new List<string>();
89
			nameTurns = new List<char>();
90
91
			string currPart = "";
92
			string currVar = "";
93
			bool readingNormal = true;
94
			foreach (char c in namingConvention)
95
			{
96
				if (c == '{')
97
				{
98
					AddToNameParts(currPart);
99
					readingNormal = false;
100
					currPart = "";
101
				}
102
				else if (c == '}')
103
				{
104
					AddToVariables(currVar);
105
					readingNormal = true;
106
					currVar = "";
107
				}
108
				else
109
				{
110
					// normal char
111
					if (readingNormal)
112
						currPart += c;
113
					else
114
						currVar += c;
115
				}
116
			}
117
118
			// add the rest if there is any:
119
			if (readingNormal)
120
				AddToNameParts(currPart);
121
			else
122
				AddToVariables(currVar);
123
124
			Console.WriteLine();
125
		}
126
127
		private void AddToNameParts(string s)
128
		{
129
			if (s.Length > 0)
130
			{
131
				separatedFileName.Add(s);
132
				nameTurns.Add('n');
133
			}
134
				
135
		}
136
137
		private void AddToVariables(string s)
138
		{
139
			if (s.Length > 0)
140
			{
141
				variablesInsertions.Add(s);
142
				nameTurns.Add('v');
143
			}
144
				
145
		}
146
147
148
		private string BuildDownloadedName(DataType type, DataFormat format, int year, int month)
149
		{
150
			string nameZip = "";
151
152
			int partInd = 0;
153
			int varInd = 0;
154
			for(int i = 0; i < nameTurns.Count; i++)
155
			{
156
				if (nameTurns[i] == 'n')
157
				{
158
					nameZip += separatedFileName[partInd];
159
					partInd++;
160
				}
161
				else if(nameTurns[i] == 'v')
162
				{
163
					string add = "";
164
					switch (variablesInsertions[varInd])
165
					{
166
						case "type":
167
							add = "" + type;
168
							break;
169
						case "month": 
170
							add = month < 10 ? "0" + month : "" + month;
171
							break;
172
						case "year":
173
							add = "" + year;
174
							break;
175
						case "format":
176
							add = "" + format;
177
							break;
178
						default: throw new Exception("Config file error - naming conventions can only contain variables with following names: type, month, year, format");
179
					}
180
					nameZip += add;
181
					varInd++;
182
				}
183
			}
184
185
			return nameZip;
186
		}
187
188 085453be Eliška Mourycová
189
		private void ExtractDateFromFileName(string fileName, ref int month, ref int year)
190
		{
191
192
		}
193
194 34bf7aa2 Eliška Mourycová
		/// <summary>
195
		/// Downloads a specific archive.
196
		/// </summary>
197
		/// <param name="type">The type of data</param>
198
		/// <param name="format">The format of the data</param>
199
		/// <param name="year">The year</param>
200
		/// <param name="month">The month</param>
201
		/// <returns>A list of all extracted file names (should be only one)</returns>
202 d2d1c86a Eliška Mourycová
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
203
		{
204
			// the list of all files potentially relevant to the caller
205
			List<string> extractedFiles = new List<string>();
206
207
			// Prepare the url string to be downloaded from:
208
			string monthStr = month < 10 ? "0" + month : "" + month;
209
			string yearStr = "" + year;
210
			string monthYr = monthStr + "_" + yearStr;
211 4a417b8b Eliška Mourycová
212
213
			string nameZip = BuildDownloadedName(type, format, year, month);//dataStr + type + "_" + monthYr + "_" + format + ".zip";
214
			string url = site + "/" + dataStr + monthYr + "/" + nameZip;//+ dataStr + type + "_" + monthYr + "_" + format + ".zip";
215
			
216 7a998d66 Eliška Mourycová
			string nameFolder = RootDataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
217 d2d1c86a Eliška Mourycová
218
			try
219
			{
220 7a998d66 Eliška Mourycová
				//Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
221 d2d1c86a Eliška Mourycová
222
				// Download the zip file:
223
				webClient.DownloadFile(url, nameZip);
224
225
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
226
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
227
				// Go through all the extracted files:
228
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
229
				{
230
					// get the relative path to the file:
231 085453be Eliška Mourycová
					string newFileName = $"{month}-{year}.{format}";
232
					string extractedFile = nameFolder + newFileName; //+ entry.Name;
233 d2d1c86a Eliška Mourycová
234
					// add full path to the list:
235
					extractedFiles.Add(Path.GetFullPath(extractedFile));
236
237
					if (OverwriteExisting)
238
					{
239
						// if overwrite is desired, execute it:
240
						entry.ExtractToFile(extractedFile, OverwriteExisting);
241
						
242
					}
243
					else
244
					{
245
						// if overwrite is not desired, check if the file exists first:
246 085453be Eliška Mourycová
						if(File.Exists(extractedFile/*nameFolder + entry.Name*/))
247 d2d1c86a Eliška Mourycová
						{
248
							continue;
249
						}
250
						else
251
						{
252
							// if it doesn't exist, save it:
253
							entry.ExtractToFile(extractedFile, OverwriteExisting);
254
						}
255
					}
256
257
					
258
				}
259
				// dispose of the archive:
260
				zipArchive.Dispose();
261
262 7a998d66 Eliška Mourycová
				//Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
263
				//Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
264
				//Console.WriteLine("Finished downloading " + nameZip);
265 d2d1c86a Eliška Mourycová
266
				// delete the previously downloaded zip file, files contained in it have been extracted:
267
				File.Delete(nameZip); // todo check?
268
269
			}
270
			catch(System.Net.WebException we)
271
			{
272
				// download fails, if the specified url is invalid
273 7a998d66 Eliška Mourycová
				//Console.WriteLine("Download from " + url + " failed.");
274
				//Console.WriteLine(we.Message);
275 d2d1c86a Eliška Mourycová
			}
276
277
278
279
			return extractedFiles;
280
		}
281
282
283
284
		/// <summary>
285
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
286
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
287
		/// </summary>
288 7a998d66 Eliška Mourycová
		public List<string> DownloadData(DataType type, DataFormat format, Date startDate, Date endDate/*int startYear, int endYear, int startMonth, int endMonth*/)
289 d2d1c86a Eliška Mourycová
		{
290 7a998d66 Eliška Mourycová
			if (startDate > endDate)
291
				throw new ArgumentException("startDate must be the same as or before the endDate.");
292
293
			// initialize:
294 d2d1c86a Eliška Mourycová
			List<string> savedFiles = new List<string>();
295 7a998d66 Eliška Mourycová
			string subDirectory = RootDataDirectory + sep + type;
296
			DirectoryInfo di = Directory.CreateDirectory(subDirectory);
297
298
			// create subdirectory record if it doesn't exist:
299
			if (!DataSubDirectories.ContainsKey(type))
300
				DataSubDirectories.Add(type, Path.GetFullPath(subDirectory));
301 d2d1c86a Eliška Mourycová
302 7a998d66 Eliška Mourycová
303
			Date currentDate = startDate;
304
			bool firstLoop = true;
305
			do
306 d2d1c86a Eliška Mourycová
			{
307 7a998d66 Eliška Mourycová
				Console.WriteLine("current date: " + currentDate);
308
				savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, (int)currentDate.Month));
309
				Date nextDate = currentDate.IncreaseMonthByOne();
310
311
				// also try to find the 00 file for each year:
312
				if(nextDate.Year > currentDate.Year || firstLoop)
313 d2d1c86a Eliška Mourycová
				{
314 7a998d66 Eliška Mourycová
					savedFiles.AddRange(DownloadData(type, format, (int)currentDate.Year, 0));
315
					if (firstLoop)
316
						firstLoop = false; // so that we don't download the same thing all the time
317
					
318 d2d1c86a Eliška Mourycová
				}
319 7a998d66 Eliška Mourycová
320
				// assign the increased date to the current date:
321
				currentDate = nextDate;
322
323
324 4a417b8b Eliška Mourycová
			} while (currentDate <= endDate);
325 7a998d66 Eliška Mourycová
326
327
			
328
329
			//for (int y = startYear; y <= endYear; y++)
330
			//{
331
			//	for (int m = startMonth; m <= endMonth; m++)
332
			//	{
333
			//		savedFiles.AddRange(DownloadData(type, format, y, m));
334
			//	}
335
			//}
336 d2d1c86a Eliška Mourycová
337
			return savedFiles;
338
		}
339
340 7a998d66 Eliška Mourycová
		public bool CheckForNewData()
341
		{
342
			throw new NotImplementedException();
343
		}
344
345 085453be Eliška Mourycová
346
347 4a417b8b Eliška Mourycová
		public List<string> GetData(string subDirectory, Date startDate, Date endDate)
348 7a998d66 Eliška Mourycová
		{
349 4a417b8b Eliška Mourycová
			string[] files = Directory.GetFiles(subDirectory);
350
			for(int i = 0; i < files.Length; i++)
351
			{
352 085453be Eliška Mourycová
				string currFileName = Path.GetFileName(files[i]);
353
				Console.WriteLine("curr file: " + currFileName);
354
				string[] splits = currFileName.Split(new char[] { '-', '.' });
355
356
				int month = int.Parse(splits[0]);
357
				int year = int.Parse(splits[1]);
358
359
360
				Date d = new Date((uint)month, (uint)year);
361
362 4a417b8b Eliška Mourycová
363
			}
364
365 7a998d66 Eliška Mourycová
			throw new NotImplementedException();
366
		}
367
368 d2d1c86a Eliška Mourycová
369
		#region UNUSED
370
		//public string GetDirectoryListingRegexForUrl(string url)
371
		//{
372
		//	if (url.Equals(site))
373
		//	{
374
		//		//return "\\\"([^\"]*)\\\"";
375
		//		return @"\bOD_ZCU_\w*\b";
376
		//		//return @"\A[OD_ZCU_]";
377
		//	}
378
		//	else return null;
379
		//}
380
		//public void ListDirectory()
381
		//{
382
		//	string url = site;
383
		//	HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
384
		//	using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
385
		//	{
386
		//		using (StreamReader reader = new StreamReader(response.GetResponseStream()))
387
		//		{
388
		//			string html = reader.ReadToEnd();
389
390
391
392
		//			Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
393
		//			MatchCollection matches = regex.Matches(html);
394
		//			Console.WriteLine(matches.Count);
395
396
		//			if (matches.Count > 0)
397
		//			{
398
		//				foreach (Match match in matches)
399
		//				{
400
		//					//if (match.Success)
401
		//					//{
402
		//						Console.WriteLine(match.ToString());
403
		//					//}
404
		//				}
405
		//			}
406
		//		}
407
		//		Console.ReadLine();
408
		//	}
409
		//}
410
411
		#endregion
412
	}
413
}