Projekt

Obecné

Profil

Stáhnout (6.76 KB) Statistiky
| Větev: | Tag: | Revize:
1 34bf7aa2 Eliška Mourycová
//
2
// Author: Eliska Mourycova
3
//
4
5
using System;
6 d2d1c86a Eliška Mourycová
using System.Collections.Generic;
7
using System.IO;
8
using System.IO.Compression;
9
using System.Net;
10
11
namespace DataDownload
12
{
13 34bf7aa2 Eliška Mourycová
	/// <summary>
14
	/// Enum representing all of the available data types (not all will be used in this project)
15
	/// They are in Czech for easier handling file names.
16
	/// TBD: They might be translated to English later.
17
	/// </summary>
18 d2d1c86a Eliška Mourycová
	public enum DataType
19
	{
20
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
21
	}
22 34bf7aa2 Eliška Mourycová
23
	/// <summary>
24
	/// Represent all available data formats.
25
	/// </summary>
26 d2d1c86a Eliška Mourycová
	public enum DataFormat
27
	{
28
		XML, JSON, CSV
29
	}
30
31 34bf7aa2 Eliška Mourycová
	/// <summary>
32
	/// This class takes care of downloading of data. Download happens from http://openstore.zcu.cz/.
33
	/// </summary>
34 d2d1c86a Eliška Mourycová
	public class DataDownloader
35
	{
36 34bf7aa2 Eliška Mourycová
		/// <summary>
37
		/// The root directory containing all downloaded data
38
		/// </summary>
39 d2d1c86a Eliška Mourycová
		public string DataDirectory { get; }
40 34bf7aa2 Eliška Mourycová
41
		/// <summary>
42
		/// Flag stating whether files which already exist should be overwritten when downloaded again
43
		/// </summary>
44 d2d1c86a Eliška Mourycová
		public bool OverwriteExisting { get; set; }
45
46 34bf7aa2 Eliška Mourycová
		// the main site where the data can be downloaded from
47 d2d1c86a Eliška Mourycová
		private string site;
48 34bf7aa2 Eliška Mourycová
49
		// the substring at the start of every file name
50 d2d1c86a Eliška Mourycová
		private string dataStr;
51 34bf7aa2 Eliška Mourycová
52
		// WebClient instance used for the actual download
53 d2d1c86a Eliška Mourycová
		private WebClient webClient;
54 34bf7aa2 Eliška Mourycová
55
		// a shortcut to writing Path.DirectorySeparatorChar
56 d2d1c86a Eliška Mourycová
		private char sep = Path.DirectorySeparatorChar;
57
58
		public DataDownloader()
59
		{
60 34bf7aa2 Eliška Mourycová
			// initialize all needed variables:
61
62 d2d1c86a Eliška Mourycová
			Console.WriteLine(Directory.GetCurrentDirectory());
63
			site = "http://openstore.zcu.cz/";
64
			dataStr = "OD_ZCU_";
65
66
			DataDirectory = $"..{sep}..{sep}data{sep}auto";
67
			OverwriteExisting = false;
68
69
			webClient = new WebClient();
70
		}
71
72 34bf7aa2 Eliška Mourycová
		/// <summary>
73
		/// Downloads a specific archive.
74
		/// </summary>
75
		/// <param name="type">The type of data</param>
76
		/// <param name="format">The format of the data</param>
77
		/// <param name="year">The year</param>
78
		/// <param name="month">The month</param>
79
		/// <returns>A list of all extracted file names (should be only one)</returns>
80 d2d1c86a Eliška Mourycová
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
81
		{
82
			// the list of all files potentially relevant to the caller
83
			List<string> extractedFiles = new List<string>();
84
85
			// Prepare the url string to be downloaded from:
86
			string monthStr = month < 10 ? "0" + month : "" + month;
87
			string yearStr = "" + year;
88
89
			string monthYr = monthStr + "_" + yearStr;
90
			string url = site + "/" + dataStr + monthYr + "/" + dataStr + type + "_" + monthYr + "_" + format + ".zip";
91
			string nameZip = dataStr + type + "_" + monthYr + "_" + format + ".zip";
92
			string nameFolder = DataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
93
94
			try
95
			{
96
				Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
97
98
				// Download the zip file:
99
				webClient.DownloadFile(url, nameZip);
100
101
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
102
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
103
				// Go through all the extracted files:
104
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
105
				{
106
					// get the relative path to the file:
107
					string extractedFile = nameFolder + entry.Name;
108
109
					// add full path to the list:
110
					extractedFiles.Add(Path.GetFullPath(extractedFile));
111
112
					if (OverwriteExisting)
113
					{
114
						// if overwrite is desired, execute it:
115
						entry.ExtractToFile(extractedFile, OverwriteExisting);
116
						
117
					}
118
					else
119
					{
120
						// if overwrite is not desired, check if the file exists first:
121
						if(File.Exists(nameFolder + entry.Name))
122
						{
123
							continue;
124
						}
125
						else
126
						{
127
							// if it doesn't exist, save it:
128
							entry.ExtractToFile(extractedFile, OverwriteExisting);
129
						}
130
					}
131
132
					
133
				}
134
				// dispose of the archive:
135
				zipArchive.Dispose();
136
137
				Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
138
				Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
139
				Console.WriteLine("Finished downloading " + nameZip);
140
141
				// delete the previously downloaded zip file, files contained in it have been extracted:
142
				File.Delete(nameZip); // todo check?
143
144
			}
145
			catch(System.Net.WebException we)
146
			{
147
				// download fails, if the specified url is invalid
148
				Console.WriteLine("Download from " + url + " failed.");
149
				Console.WriteLine(we.Message);
150
			}
151
152
153
154
			return extractedFiles;
155
		}
156
157
158
159
		/// <summary>
160
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
161
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
162
		/// </summary>
163
		/// <param name="type">The type of data, e.g. the jis data</param>
164
		/// <param name="format">The desired format of data files, available are CSV, XML and JSON formats</param>
165
		/// <param name="startYear">The start year to start the download from, inclusive</param>
166
		/// <param name="endYear">The end year to start the download from, inclusive</param>
167
		/// <param name="startMonth">The start month to start the download from, inclusive</param>
168
		/// <param name="endMonth">The end month to start the download from, inclusive</param>
169
		/// <returns>A list of full paths to all successfully saved files</returns>
170
		public List<string> DownloadData(DataType type, DataFormat format, int startYear, int endYear, int startMonth, int endMonth)
171
		{
172
			List<string> savedFiles = new List<string>();
173
			DirectoryInfo di = Directory.CreateDirectory(DataDirectory + sep + type);
174
175
			for (int y = startYear; y <= endYear; y++)
176
			{
177
				for (int m = startMonth; m <= endMonth; m++)
178
				{
179
					savedFiles.AddRange(DownloadData(type, format, y, m));
180
				}
181
			}
182
183
			return savedFiles;
184
		}
185
186
187
		#region UNUSED
188
		//public string GetDirectoryListingRegexForUrl(string url)
189
		//{
190
		//	if (url.Equals(site))
191
		//	{
192
		//		//return "\\\"([^\"]*)\\\"";
193
		//		return @"\bOD_ZCU_\w*\b";
194
		//		//return @"\A[OD_ZCU_]";
195
		//	}
196
		//	else return null;
197
		//}
198
		//public void ListDirectory()
199
		//{
200
		//	string url = site;
201
		//	HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
202
		//	using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
203
		//	{
204
		//		using (StreamReader reader = new StreamReader(response.GetResponseStream()))
205
		//		{
206
		//			string html = reader.ReadToEnd();
207
208
209
210
		//			Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
211
		//			MatchCollection matches = regex.Matches(html);
212
		//			Console.WriteLine(matches.Count);
213
214
		//			if (matches.Count > 0)
215
		//			{
216
		//				foreach (Match match in matches)
217
		//				{
218
		//					//if (match.Success)
219
		//					//{
220
		//						Console.WriteLine(match.ToString());
221
		//					//}
222
		//				}
223
		//			}
224
		//		}
225
		//		Console.ReadLine();
226
		//	}
227
		//}
228
229
		#endregion
230
	}
231
}