1
|
using System;
|
2
|
using System.Collections.Generic;
|
3
|
using System.IO;
|
4
|
using System.IO.Compression;
|
5
|
using System.Linq;
|
6
|
using System.Net;
|
7
|
using System.Text;
|
8
|
using System.Text.RegularExpressions;
|
9
|
using System.Threading.Tasks;
|
10
|
using static System.Net.Mime.MediaTypeNames;
|
11
|
|
12
|
namespace DataDownload
|
13
|
{
|
14
|
public enum DataType
|
15
|
{
|
16
|
POCASI, ENERGO, STROJE, EMAIL, OBSAZENI_MISTNOSTI, JIS, KOLOBEZKY, WIFI
|
17
|
}
|
18
|
public enum DataFormat
|
19
|
{
|
20
|
XML, JSON, CSV
|
21
|
}
|
22
|
|
23
|
public class DataDownloader
|
24
|
{
|
25
|
public string DataDirectory { get; }
|
26
|
public bool OverwriteExisting { get; set; }
|
27
|
|
28
|
private string site;
|
29
|
private string dataStr;
|
30
|
private WebClient webClient;
|
31
|
private char sep = Path.DirectorySeparatorChar;
|
32
|
|
33
|
public DataDownloader()
|
34
|
{
|
35
|
Console.WriteLine(Directory.GetCurrentDirectory());
|
36
|
site = "http://openstore.zcu.cz/";
|
37
|
dataStr = "OD_ZCU_";
|
38
|
|
39
|
DataDirectory = $"..{sep}..{sep}data{sep}auto";
|
40
|
OverwriteExisting = false;
|
41
|
|
42
|
webClient = new WebClient();
|
43
|
}
|
44
|
|
45
|
private List<string> DownloadData(DataType type, DataFormat format, int year, int month)
|
46
|
{
|
47
|
// the list of all files potentially relevant to the caller
|
48
|
List<string> extractedFiles = new List<string>();
|
49
|
|
50
|
// Prepare the url string to be downloaded from:
|
51
|
string monthStr = month < 10 ? "0" + month : "" + month;
|
52
|
string yearStr = "" + year;
|
53
|
|
54
|
string monthYr = monthStr + "_" + yearStr;
|
55
|
string url = site + "/" + dataStr + monthYr + "/" + dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
56
|
string nameZip = dataStr + type + "_" + monthYr + "_" + format + ".zip";
|
57
|
string nameFolder = DataDirectory + sep + type + sep; //+ dataStr + type + "_" + monthYr + "_" + format;
|
58
|
|
59
|
try
|
60
|
{
|
61
|
Console.WriteLine("Downloading .zip to " + Path.GetFullPath(nameZip) + "...");
|
62
|
|
63
|
// Download the zip file:
|
64
|
webClient.DownloadFile(url, nameZip);
|
65
|
|
66
|
//ZipFile.ExtractToDirectory(nameZip, nameFolder);
|
67
|
ZipArchive zipArchive = ZipFile.OpenRead(nameZip);
|
68
|
// Go through all the extracted files:
|
69
|
foreach (ZipArchiveEntry entry in zipArchive.Entries)
|
70
|
{
|
71
|
// get the relative path to the file:
|
72
|
string extractedFile = nameFolder + entry.Name;
|
73
|
|
74
|
// add full path to the list:
|
75
|
extractedFiles.Add(Path.GetFullPath(extractedFile));
|
76
|
|
77
|
if (OverwriteExisting)
|
78
|
{
|
79
|
// if overwrite is desired, execute it:
|
80
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
81
|
|
82
|
}
|
83
|
else
|
84
|
{
|
85
|
// if overwrite is not desired, check if the file exists first:
|
86
|
if(File.Exists(nameFolder + entry.Name))
|
87
|
{
|
88
|
continue;
|
89
|
}
|
90
|
else
|
91
|
{
|
92
|
// if it doesn't exist, save it:
|
93
|
entry.ExtractToFile(extractedFile, OverwriteExisting);
|
94
|
}
|
95
|
}
|
96
|
|
97
|
|
98
|
}
|
99
|
// dispose of the archive:
|
100
|
zipArchive.Dispose();
|
101
|
|
102
|
Console.WriteLine("Extracted to " + Path.GetFullPath(nameFolder));
|
103
|
Console.WriteLine("Deleting .zip from " + Path.GetFullPath(nameZip) + "...");
|
104
|
Console.WriteLine("Finished downloading " + nameZip);
|
105
|
|
106
|
// delete the previously downloaded zip file, files contained in it have been extracted:
|
107
|
File.Delete(nameZip); // todo check?
|
108
|
|
109
|
}
|
110
|
catch(System.Net.WebException we)
|
111
|
{
|
112
|
// download fails, if the specified url is invalid
|
113
|
Console.WriteLine("Download from " + url + " failed.");
|
114
|
Console.WriteLine(we.Message);
|
115
|
}
|
116
|
|
117
|
|
118
|
|
119
|
return extractedFiles;
|
120
|
}
|
121
|
|
122
|
|
123
|
|
124
|
/// <summary>
|
125
|
/// Downloads selected type and time span of data in the desired format, returns a list of full paths to all successfully saved files.
|
126
|
/// If some of the files already existed and were not overwritten, then the returned List contains paths to these files also.
|
127
|
/// </summary>
|
128
|
/// <param name="type">The type of data, e.g. the jis data</param>
|
129
|
/// <param name="format">The desired format of data files, available are CSV, XML and JSON formats</param>
|
130
|
/// <param name="startYear">The start year to start the download from, inclusive</param>
|
131
|
/// <param name="endYear">The end year to start the download from, inclusive</param>
|
132
|
/// <param name="startMonth">The start month to start the download from, inclusive</param>
|
133
|
/// <param name="endMonth">The end month to start the download from, inclusive</param>
|
134
|
/// <returns>A list of full paths to all successfully saved files</returns>
|
135
|
public List<string> DownloadData(DataType type, DataFormat format, int startYear, int endYear, int startMonth, int endMonth)
|
136
|
{
|
137
|
List<string> savedFiles = new List<string>();
|
138
|
DirectoryInfo di = Directory.CreateDirectory(DataDirectory + sep + type);
|
139
|
|
140
|
for (int y = startYear; y <= endYear; y++)
|
141
|
{
|
142
|
for (int m = startMonth; m <= endMonth; m++)
|
143
|
{
|
144
|
savedFiles.AddRange(DownloadData(type, format, y, m));
|
145
|
}
|
146
|
}
|
147
|
|
148
|
return savedFiles;
|
149
|
}
|
150
|
|
151
|
|
152
|
#region UNUSED
|
153
|
//public string GetDirectoryListingRegexForUrl(string url)
|
154
|
//{
|
155
|
// if (url.Equals(site))
|
156
|
// {
|
157
|
// //return "\\\"([^\"]*)\\\"";
|
158
|
// return @"\bOD_ZCU_\w*\b";
|
159
|
// //return @"\A[OD_ZCU_]";
|
160
|
// }
|
161
|
// else return null;
|
162
|
//}
|
163
|
//public void ListDirectory()
|
164
|
//{
|
165
|
// string url = site;
|
166
|
// HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
|
167
|
// using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
168
|
// {
|
169
|
// using (StreamReader reader = new StreamReader(response.GetResponseStream()))
|
170
|
// {
|
171
|
// string html = reader.ReadToEnd();
|
172
|
|
173
|
|
174
|
|
175
|
// Regex regex = new Regex(GetDirectoryListingRegexForUrl(url));
|
176
|
// MatchCollection matches = regex.Matches(html);
|
177
|
// Console.WriteLine(matches.Count);
|
178
|
|
179
|
// if (matches.Count > 0)
|
180
|
// {
|
181
|
// foreach (Match match in matches)
|
182
|
// {
|
183
|
// //if (match.Success)
|
184
|
// //{
|
185
|
// Console.WriteLine(match.ToString());
|
186
|
// //}
|
187
|
// }
|
188
|
// }
|
189
|
// }
|
190
|
// Console.ReadLine();
|
191
|
// }
|
192
|
//}
|
193
|
|
194
|
#endregion
|
195
|
}
|
196
|
}
|