1
|
//
|
2
|
// Author: Roman Kalivoda
|
3
|
//
|
4
|
|
5
|
using System.Collections.Generic;
|
6
|
using System;
|
7
|
using ServerApp.Parser.Parsers;
|
8
|
using ServerApp.Parser.OutputInfo;
|
9
|
using System.Linq;
|
10
|
using Microsoft.ML;
|
11
|
using log4net;
|
12
|
|
13
|
namespace ServerApp.Predictor
|
14
|
{
|
15
|
/// <summary>
|
16
|
/// A class responsible for preparation of features for classifiers.
|
17
|
/// </summary>
|
18
|
class FeatureExtractor
|
19
|
{
|
20
|
private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
|
21
|
|
22
|
/// <summary>
|
23
|
/// A DataParser instance used to access info objects.
|
24
|
/// </summary>
|
25
|
private readonly IDataParser DataParser;
|
26
|
|
27
|
/// <summary>
|
28
|
/// A configuration object of the <c>Predictor</c> package
|
29
|
/// </summary>
|
30
|
private PredictorConfiguration Configuration;
|
31
|
|
32
|
/// <summary>
|
33
|
/// Instantiates new FeatureExtractor class.
|
34
|
/// </summary>
|
35
|
/// <param name="dataParser">Data parser used to access training data.</param>
|
36
|
public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
|
37
|
{
|
38
|
this.DataParser = dataParser;
|
39
|
this.Configuration = configuration;
|
40
|
}
|
41
|
|
42
|
/// <summary>
|
43
|
/// TODO comment
|
44
|
/// </summary>
|
45
|
/// <param name="area"></param>
|
46
|
/// <param name="startDate"></param>
|
47
|
/// <param name="endDate"></param>
|
48
|
/// <param name="interval"></param>
|
49
|
/// <param name="wholeDay"></param>
|
50
|
/// <returns></returns>
|
51
|
public List<ModelInput> PrepareTrainingInput(int area)
|
52
|
{
|
53
|
List<string> buildings = new List<string>();
|
54
|
List<ActivityInfo> attendance = DataParser.AttendanceList;
|
55
|
|
56
|
// find all buildings in area
|
57
|
foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
|
58
|
{
|
59
|
if (kvp.Value == area)
|
60
|
{
|
61
|
buildings.Add(kvp.Key);
|
62
|
}
|
63
|
}
|
64
|
List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
|
65
|
activities = RejectOutliers(activities);
|
66
|
var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
|
67
|
{
|
68
|
amount = activity.amount,
|
69
|
modelInput = new ModelInput
|
70
|
{
|
71
|
Hour = activity.startTime.Hour,
|
72
|
Temp = (float)weatherInfo.temp,
|
73
|
Rain = (float)weatherInfo.rain,
|
74
|
Time = activity.startTime,
|
75
|
Wind = (float)weatherInfo.wind
|
76
|
}
|
77
|
}).ToList();
|
78
|
|
79
|
int max = inputs.Select(e => e.amount).Max();
|
80
|
foreach (var input in inputs)
|
81
|
{
|
82
|
double ratio = input.amount / (double)max;
|
83
|
input.modelInput.Label = RatioToLabel(ratio);
|
84
|
}
|
85
|
|
86
|
return inputs.Select(e => e.modelInput).ToList();
|
87
|
}
|
88
|
|
89
|
private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
|
90
|
{
|
91
|
MLContext mlContext = new MLContext();
|
92
|
IDataView input = mlContext.Data.LoadFromEnumerable(data);
|
93
|
var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), 99.0, data.Count / 4));
|
94
|
ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
|
95
|
IDataView transformedData = transformer.Transform(input);
|
96
|
List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
|
97
|
List<ActivityInfo> result = new List<ActivityInfo>();
|
98
|
|
99
|
for (int i=0; i<predictions.Count; i++)
|
100
|
{
|
101
|
if(predictions[i].Prediction[0] == 1)
|
102
|
{
|
103
|
_log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
|
104
|
} else
|
105
|
{
|
106
|
result.Add(data[i]);
|
107
|
}
|
108
|
}
|
109
|
return result;
|
110
|
}
|
111
|
|
112
|
private string RatioToLabel(double ratio)
|
113
|
{
|
114
|
if (ratio < 0.1f)
|
115
|
{
|
116
|
return "10%";
|
117
|
}
|
118
|
else if (ratio < 0.2f)
|
119
|
{
|
120
|
return "20%";
|
121
|
}
|
122
|
else if (ratio < 0.3f)
|
123
|
{
|
124
|
return "30%";
|
125
|
}
|
126
|
else if (ratio < 0.4f)
|
127
|
{
|
128
|
return "40%";
|
129
|
}
|
130
|
else if (ratio < 0.5f)
|
131
|
{
|
132
|
return "50%";
|
133
|
}
|
134
|
else if (ratio < 0.6f)
|
135
|
{
|
136
|
return "60%";
|
137
|
}
|
138
|
else if (ratio < 0.7f)
|
139
|
{
|
140
|
return "70%";
|
141
|
}
|
142
|
else if (ratio < 0.8f)
|
143
|
{
|
144
|
return "80%";
|
145
|
}
|
146
|
else if (ratio < 0.9f)
|
147
|
{
|
148
|
return "90%";
|
149
|
}
|
150
|
else
|
151
|
{
|
152
|
return "100%";
|
153
|
}
|
154
|
}
|
155
|
|
156
|
internal double LabelToRatio(string label)
|
157
|
{
|
158
|
if (label is null)
|
159
|
{
|
160
|
return -1f;
|
161
|
}
|
162
|
else if (label.Equals("10%"))
|
163
|
{
|
164
|
return 10f;
|
165
|
}
|
166
|
else if (label.Equals("20%"))
|
167
|
{
|
168
|
return 20f;
|
169
|
}
|
170
|
else if (label.Equals("30%"))
|
171
|
{
|
172
|
return 30f;
|
173
|
}
|
174
|
else if (label.Equals("40%"))
|
175
|
{
|
176
|
return 40f;
|
177
|
}
|
178
|
else if (label.Equals("50%"))
|
179
|
{
|
180
|
return 50f;
|
181
|
}
|
182
|
else if (label.Equals("60%"))
|
183
|
{
|
184
|
return 60f;
|
185
|
}
|
186
|
else if (label.Equals("70%"))
|
187
|
{
|
188
|
return 70f;
|
189
|
}
|
190
|
else if (label.Equals("80%"))
|
191
|
{
|
192
|
return 80f;
|
193
|
}
|
194
|
else if (label.Equals("90%"))
|
195
|
{
|
196
|
return 90f;
|
197
|
}
|
198
|
else
|
199
|
{
|
200
|
return 100f;
|
201
|
}
|
202
|
}
|
203
|
}
|
204
|
}
|