Projekt

Obecné

Profil

Stáhnout (5.64 KB) Statistiky
| Větev: | Tag: | Revize:
1
using System;
2
using System.Collections.Generic;
3
using System.IO;
4
using System.IO.Compression;
5
using System.Linq;
6
using System.Net;
7
using System.Text;
8
using System.Text.RegularExpressions;
9
using System.Threading.Tasks;
10
using static System.Net.Mime.MediaTypeNames;
11

    
12
namespace DataDownload
13
{
14
	public enum DataType
15
	{
16
		POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
17
	}
18
	public enum DataFormat
19
	{
20
		XML, JSON, CSV
21
	}
22

    
23
	public class DataDownloader
24
	{
25
		public string DataDirectory { get; }
26
		public bool OverwriteExisting { get; set; }
27

    
28
		private string site;
29
		private string dataStr;
30
		private WebClient webClient;
31
		private char sep = Path.DirectorySeparatorChar;
32

    
33
		public DataDownloader()
34
		{
35
			Console.WriteLine(Directory.GetCurrentDirectory());
36
			site = "http://openstore.zcu.cz/";
37
			dataStr = "OD_ZCU_";
38

    
39
			DataDirectory = $"..{sep}..{sep}data{sep}auto";
40
			OverwriteExisting = false;
41

    
42
			webClient = new WebClient();
43
		}
44

    
45
		private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
46
		{
47
			// the list of all files potentially relevant to the caller
48
			List<string> extractedFiles = new List<string>();
49

    
50
			// Prepare the url string to be downloaded from:
51
			string monthStr = month < 10 ? "0" + month : "" + month;
52
			string yearStr = "" + year;
53

    
54
			string monthYr = monthStr + "_" + yearStr;
55
			string url = site + "/" + dataStr + monthYr + "/" + dataStr + type + "_" + monthYr + "_" + format + ".zip";
56
			string nameZip = dataStr + type + "_" + monthYr + "_" + format + ".zip";
57
			string nameFolder = DataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
58

    
59
			try
60
			{
61
				Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
62

    
63
				// Download the zip file:
64
				webClient.DownloadFile(url, nameZip);
65

    
66
				//ZipFile.ExtractToDirectory(nameZip, nameFolder);
67
				ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
68
				// Go through all the extracted files:
69
				foreach (ZipArchiveEntry entry in zipArchive.Entries)
70
				{
71
					// get the relative path to the file:
72
					string extractedFile = nameFolder + entry.Name;
73

    
74
					// add full path to the list:
75
					extractedFiles.Add(Path.GetFullPath(extractedFile));
76

    
77
					if (OverwriteExisting)
78
					{
79
						// if overwrite is desired, execute it:
80
						entry.ExtractToFile(extractedFile, OverwriteExisting);
81
						
82
					}
83
					else
84
					{
85
						// if overwrite is not desired, check if the file exists first:
86
						if(File.Exists(nameFolder + entry.Name))
87
						{
88
							continue;
89
						}
90
						else
91
						{
92
							// if it doesn't exist, save it:
93
							entry.ExtractToFile(extractedFile, OverwriteExisting);
94
						}
95
					}
96

    
97
					
98
				}
99
				// dispose of the archive:
100
				zipArchive.Dispose();
101

    
102
				Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
103
				Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
104
				Console.WriteLine("Finished downloading " + nameZip);
105

    
106
				// delete the previously downloaded zip file, files contained in it have been extracted:
107
				File.Delete(nameZip); // todo check?
108

    
109
			}
110
			catch(System.Net.WebException we)
111
			{
112
				// download fails, if the specified url is invalid
113
				Console.WriteLine("Download from " + url + " failed.");
114
				Console.WriteLine(we.Message);
115
			}
116

    
117

    
118

    
119
			return extractedFiles;
120
		}
121

    
122

    
123

    
124
		/// <summary>
125
		/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files. 
126
		/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
127
		/// </summary>
128
		/// <param name="type">The type of data, e.g. the jis data</param>
129
		/// <param name="format">The desired format of data files, available are CSV, XML and JSON formats</param>
130
		/// <param name="startYear">The start year to start the download from, inclusive</param>
131
		/// <param name="endYear">The end year to start the download from, inclusive</param>
132
		/// <param name="startMonth">The start month to start the download from, inclusive</param>
133
		/// <param name="endMonth">The end month to start the download from, inclusive</param>
134
		/// <returns>A list of full paths to all successfully saved files</returns>
135
		public List<string> DownloadData(DataType type, DataFormat format, int startYear, int endYear, int startMonth, int endMonth)
136
		{
137
			List<string> savedFiles = new List<string>();
138
			DirectoryInfo di = Directory.CreateDirectory(DataDirectory + sep + type);
139

    
140
			for (int y = startYear; y <= endYear; y++)
141
			{
142
				for (int m = startMonth; m <= endMonth; m++)
143
				{
144
					savedFiles.AddRange(DownloadData(type, format, y, m));
145
				}
146
			}
147

    
148
			return savedFiles;
149
		}
150

    
151

    
152
		#region UNUSED
153
		//public string GetDirectoryListingRegexForUrl(string url)
154
		//{
155
		//	if (url.Equals(site))
156
		//	{
157
		//		//return "\\\"([^\"]*)\\\"";
158
		//		return @"\bOD_ZCU_\w*\b";
159
		//		//return @"\A[OD_ZCU_]";
160
		//	}
161
		//	else return null;
162
		//}
163
		//public void ListDirectory()
164
		//{
165
		//	string url = site;
166
		//	HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
167
		//	using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
168
		//	{
169
		//		using (StreamReader reader = new StreamReader(response.GetResponseStream()))
170
		//		{
171
		//			string html = reader.ReadToEnd();
172

    
173

    
174

    
175
		//			Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
176
		//			MatchCollection matches = regex.Matches(html);
177
		//			Console.WriteLine(matches.Count);
178

    
179
		//			if (matches.Count > 0)
180
		//			{
181
		//				foreach (Match match in matches)
182
		//				{
183
		//					//if (match.Success)
184
		//					//{
185
		//						Console.WriteLine(match.ToString());
186
		//					//}
187
		//				}
188
		//			}
189
		//		}
190
		//		Console.ReadLine();
191
		//	}
192
		//}
193

    
194
		#endregion
195
	}
196
}
    (1-1/1)