1
|
//
|
2
|
// Author: Roman Kalivoda
|
3
|
//
|
4
|
|
5
|
using System.Collections.Generic;
|
6
|
using System;
|
7
|
using ServerApp.Parser.Parsers;
|
8
|
using ServerApp.Parser.OutputInfo;
|
9
|
using System.Linq;
|
10
|
using Microsoft.ML;
|
11
|
using log4net;
|
12
|
|
13
|
namespace ServerApp.Predictor
|
14
|
{
|
15
|
/// <summary>
|
16
|
/// A class responsible for preparation of features for classifiers.
|
17
|
/// </summary>
|
18
|
class FeatureExtractor
|
19
|
{
|
20
|
|
21
|
private const double Confidence = 99.0;
|
22
|
|
23
|
private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
|
24
|
|
25
|
/// <summary>
|
26
|
/// A DataParser instance used to access info objects.
|
27
|
/// </summary>
|
28
|
private readonly IDataParser DataParser;
|
29
|
|
30
|
/// <summary>
|
31
|
/// A configuration object of the <c>Predictor</c> package
|
32
|
/// </summary>
|
33
|
private PredictorConfiguration Configuration;
|
34
|
|
35
|
/// <summary>
|
36
|
/// Instantiates new FeatureExtractor class.
|
37
|
/// </summary>
|
38
|
/// <param name="dataParser">Data parser used to access training data.</param>
|
39
|
public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
|
40
|
{
|
41
|
this.DataParser = dataParser;
|
42
|
this.Configuration = configuration;
|
43
|
}
|
44
|
|
45
|
/// <summary>
|
46
|
/// TODO comment
|
47
|
/// </summary>
|
48
|
/// <param name="area"></param>
|
49
|
/// <param name="startDate"></param>
|
50
|
/// <param name="endDate"></param>
|
51
|
/// <param name="interval"></param>
|
52
|
/// <param name="wholeDay"></param>
|
53
|
/// <returns></returns>
|
54
|
public List<ModelInput> PrepareTrainingInput(int area)
|
55
|
{
|
56
|
List<string> buildings = new List<string>();
|
57
|
List<ActivityInfo> attendance = DataParser.AttendanceList;
|
58
|
|
59
|
// find all buildings in area
|
60
|
foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
|
61
|
{
|
62
|
if (kvp.Value == area)
|
63
|
{
|
64
|
buildings.Add(kvp.Key);
|
65
|
}
|
66
|
}
|
67
|
List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
|
68
|
activities = RejectOutliers(activities);
|
69
|
var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
|
70
|
{
|
71
|
amount = activity.amount,
|
72
|
modelInput = new ModelInput
|
73
|
{
|
74
|
Hour = activity.startTime.Hour,
|
75
|
Temp = (float)weatherInfo.temp,
|
76
|
Rain = (float)weatherInfo.rain,
|
77
|
Time = activity.startTime,
|
78
|
Wind = (float)weatherInfo.wind
|
79
|
}
|
80
|
}).ToList();
|
81
|
|
82
|
int max = inputs.Select(e => e.amount).Max();
|
83
|
foreach (var input in inputs)
|
84
|
{
|
85
|
double ratio = input.amount / (double)max;
|
86
|
input.modelInput.Label = RatioToLabel(ratio);
|
87
|
}
|
88
|
|
89
|
return inputs.Select(e => e.modelInput).ToList();
|
90
|
}
|
91
|
|
92
|
private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
|
93
|
{
|
94
|
MLContext mlContext = new MLContext();
|
95
|
IDataView input = mlContext.Data.LoadFromEnumerable(data);
|
96
|
var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), Confidence, data.Count / 4));
|
97
|
ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
|
98
|
IDataView transformedData = transformer.Transform(input);
|
99
|
List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
|
100
|
List<ActivityInfo> result = new List<ActivityInfo>();
|
101
|
|
102
|
for (int i=0; i<predictions.Count; i++)
|
103
|
{
|
104
|
if(predictions[i].Prediction[2] < (1 - Confidence))
|
105
|
{
|
106
|
_log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
|
107
|
} else
|
108
|
{
|
109
|
result.Add(data[i]);
|
110
|
}
|
111
|
}
|
112
|
return result;
|
113
|
}
|
114
|
|
115
|
private string RatioToLabel(double ratio)
|
116
|
{
|
117
|
if (ratio < 0.1f)
|
118
|
{
|
119
|
return "10%";
|
120
|
}
|
121
|
else if (ratio < 0.2f)
|
122
|
{
|
123
|
return "20%";
|
124
|
}
|
125
|
else if (ratio < 0.3f)
|
126
|
{
|
127
|
return "30%";
|
128
|
}
|
129
|
else if (ratio < 0.4f)
|
130
|
{
|
131
|
return "40%";
|
132
|
}
|
133
|
else if (ratio < 0.5f)
|
134
|
{
|
135
|
return "50%";
|
136
|
}
|
137
|
else if (ratio < 0.6f)
|
138
|
{
|
139
|
return "60%";
|
140
|
}
|
141
|
else if (ratio < 0.7f)
|
142
|
{
|
143
|
return "70%";
|
144
|
}
|
145
|
else if (ratio < 0.8f)
|
146
|
{
|
147
|
return "80%";
|
148
|
}
|
149
|
else if (ratio < 0.9f)
|
150
|
{
|
151
|
return "90%";
|
152
|
}
|
153
|
else
|
154
|
{
|
155
|
return "100%";
|
156
|
}
|
157
|
}
|
158
|
|
159
|
internal double LabelToRatio(string label)
|
160
|
{
|
161
|
if (label is null)
|
162
|
{
|
163
|
return -1f;
|
164
|
}
|
165
|
else if (label.Equals("10%"))
|
166
|
{
|
167
|
return 10f;
|
168
|
}
|
169
|
else if (label.Equals("20%"))
|
170
|
{
|
171
|
return 20f;
|
172
|
}
|
173
|
else if (label.Equals("30%"))
|
174
|
{
|
175
|
return 30f;
|
176
|
}
|
177
|
else if (label.Equals("40%"))
|
178
|
{
|
179
|
return 40f;
|
180
|
}
|
181
|
else if (label.Equals("50%"))
|
182
|
{
|
183
|
return 50f;
|
184
|
}
|
185
|
else if (label.Equals("60%"))
|
186
|
{
|
187
|
return 60f;
|
188
|
}
|
189
|
else if (label.Equals("70%"))
|
190
|
{
|
191
|
return 70f;
|
192
|
}
|
193
|
else if (label.Equals("80%"))
|
194
|
{
|
195
|
return 80f;
|
196
|
}
|
197
|
else if (label.Equals("90%"))
|
198
|
{
|
199
|
return 90f;
|
200
|
}
|
201
|
else
|
202
|
{
|
203
|
return 100f;
|
204
|
}
|
205
|
}
|
206
|
}
|
207
|
}
|