1 |
ebe96ca4
|
Roman Kalivoda
|
//
|
2 |
|
|
// Author: Roman Kalivoda
|
3 |
|
|
//
|
4 |
|
|
|
5 |
|
|
using System.Collections.Generic;
|
6 |
|
|
using System;
|
7 |
|
|
using ServerApp.Parser.Parsers;
|
8 |
d358b79e
|
Roman Kalivoda
|
using ServerApp.Parser.OutputInfo;
|
9 |
|
|
using System.Linq;
|
10 |
0060a0ae
|
Roman Kalivoda
|
using Microsoft.ML;
|
11 |
|
|
using log4net;
|
12 |
ebe96ca4
|
Roman Kalivoda
|
|
13 |
|
|
namespace ServerApp.Predictor
|
14 |
|
|
{
|
15 |
|
|
/// <summary>
|
16 |
|
|
/// A class responsible for preparation of features for classifiers.
|
17 |
|
|
/// </summary>
|
18 |
ce0940b5
|
Roman Kalivoda
|
class FeatureExtractor
|
19 |
ebe96ca4
|
Roman Kalivoda
|
{
|
20 |
0e7b6b11
|
Roman Kalivoda
|
|
21 |
|
|
private const double Confidence = 99.0;
|
22 |
|
|
|
23 |
0060a0ae
|
Roman Kalivoda
|
private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
|
24 |
|
|
|
25 |
ebe96ca4
|
Roman Kalivoda
|
/// <summary>
|
26 |
|
|
/// A DataParser instance used to access info objects.
|
27 |
|
|
/// </summary>
|
28 |
ce0940b5
|
Roman Kalivoda
|
private readonly IDataParser DataParser;
|
29 |
ebe96ca4
|
Roman Kalivoda
|
|
30 |
ce0940b5
|
Roman Kalivoda
|
/// <summary>
|
31 |
|
|
/// A configuration object of the <c>Predictor</c> package
|
32 |
|
|
/// </summary>
|
33 |
|
|
private PredictorConfiguration Configuration;
|
34 |
ebe96ca4
|
Roman Kalivoda
|
|
35 |
|
|
/// <summary>
|
36 |
|
|
/// Instantiates new FeatureExtractor class.
|
37 |
|
|
/// </summary>
|
38 |
|
|
/// <param name="dataParser">Data parser used to access training data.</param>
|
39 |
ce0940b5
|
Roman Kalivoda
|
public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
|
40 |
ebe96ca4
|
Roman Kalivoda
|
{
|
41 |
ce0940b5
|
Roman Kalivoda
|
this.DataParser = dataParser;
|
42 |
|
|
this.Configuration = configuration;
|
43 |
ebe96ca4
|
Roman Kalivoda
|
}
|
44 |
|
|
|
45 |
|
|
/// <summary>
|
46 |
d358b79e
|
Roman Kalivoda
|
/// TODO comment
|
47 |
ebe96ca4
|
Roman Kalivoda
|
/// </summary>
|
48 |
d358b79e
|
Roman Kalivoda
|
/// <param name="area"></param>
|
49 |
|
|
/// <param name="startDate"></param>
|
50 |
|
|
/// <param name="endDate"></param>
|
51 |
|
|
/// <param name="interval"></param>
|
52 |
|
|
/// <param name="wholeDay"></param>
|
53 |
|
|
/// <returns></returns>
|
54 |
ce0940b5
|
Roman Kalivoda
|
public List<ModelInput> PrepareTrainingInput(int area)
|
55 |
ebe96ca4
|
Roman Kalivoda
|
{
|
56 |
0060a0ae
|
Roman Kalivoda
|
List<string> buildings = new List<string>();
|
57 |
|
|
List<ActivityInfo> attendance = DataParser.AttendanceList;
|
58 |
ebe96ca4
|
Roman Kalivoda
|
|
59 |
|
|
// find all buildings in area
|
60 |
ce0940b5
|
Roman Kalivoda
|
foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
|
61 |
ebe96ca4
|
Roman Kalivoda
|
{
|
62 |
|
|
if (kvp.Value == area)
|
63 |
|
|
{
|
64 |
|
|
buildings.Add(kvp.Key);
|
65 |
|
|
}
|
66 |
|
|
}
|
67 |
0060a0ae
|
Roman Kalivoda
|
List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
|
68 |
|
|
activities = RejectOutliers(activities);
|
69 |
|
|
var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
|
70 |
662b2404
|
Roman Kalivoda
|
{
|
71 |
0060a0ae
|
Roman Kalivoda
|
amount = activity.amount,
|
72 |
|
|
modelInput = new ModelInput
|
73 |
662b2404
|
Roman Kalivoda
|
{
|
74 |
0060a0ae
|
Roman Kalivoda
|
Hour = activity.startTime.Hour,
|
75 |
|
|
Temp = (float)weatherInfo.temp,
|
76 |
|
|
Rain = (float)weatherInfo.rain,
|
77 |
|
|
Time = activity.startTime,
|
78 |
|
|
Wind = (float)weatherInfo.wind
|
79 |
|
|
}
|
80 |
|
|
}).ToList();
|
81 |
|
|
|
82 |
|
|
int max = inputs.Select(e => e.amount).Max();
|
83 |
|
|
foreach (var input in inputs)
|
84 |
|
|
{
|
85 |
|
|
double ratio = input.amount / (double)max;
|
86 |
|
|
input.modelInput.Label = RatioToLabel(ratio);
|
87 |
d358b79e
|
Roman Kalivoda
|
}
|
88 |
|
|
|
89 |
0060a0ae
|
Roman Kalivoda
|
return inputs.Select(e => e.modelInput).ToList();
|
90 |
|
|
}
|
91 |
|
|
|
92 |
|
|
private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
|
93 |
|
|
{
|
94 |
|
|
MLContext mlContext = new MLContext();
|
95 |
|
|
IDataView input = mlContext.Data.LoadFromEnumerable(data);
|
96 |
0e7b6b11
|
Roman Kalivoda
|
var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), Confidence, data.Count / 4));
|
97 |
0060a0ae
|
Roman Kalivoda
|
ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
|
98 |
|
|
IDataView transformedData = transformer.Transform(input);
|
99 |
|
|
List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
|
100 |
|
|
List<ActivityInfo> result = new List<ActivityInfo>();
|
101 |
|
|
|
102 |
|
|
for (int i=0; i<predictions.Count; i++)
|
103 |
d358b79e
|
Roman Kalivoda
|
{
|
104 |
0e7b6b11
|
Roman Kalivoda
|
if(predictions[i].Prediction[2] < (1 - Confidence))
|
105 |
0060a0ae
|
Roman Kalivoda
|
{
|
106 |
|
|
_log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
|
107 |
|
|
} else
|
108 |
662b2404
|
Roman Kalivoda
|
{
|
109 |
0060a0ae
|
Roman Kalivoda
|
result.Add(data[i]);
|
110 |
ebe96ca4
|
Roman Kalivoda
|
}
|
111 |
|
|
}
|
112 |
0060a0ae
|
Roman Kalivoda
|
return result;
|
113 |
ebe96ca4
|
Roman Kalivoda
|
}
|
114 |
d358b79e
|
Roman Kalivoda
|
|
115 |
|
|
private string RatioToLabel(double ratio)
|
116 |
|
|
{
|
117 |
|
|
if (ratio < 0.1f)
|
118 |
|
|
{
|
119 |
|
|
return "10%";
|
120 |
ce0940b5
|
Roman Kalivoda
|
}
|
121 |
|
|
else if (ratio < 0.2f)
|
122 |
d358b79e
|
Roman Kalivoda
|
{
|
123 |
|
|
return "20%";
|
124 |
ce0940b5
|
Roman Kalivoda
|
}
|
125 |
|
|
else if (ratio < 0.3f)
|
126 |
d358b79e
|
Roman Kalivoda
|
{
|
127 |
|
|
return "30%";
|
128 |
ce0940b5
|
Roman Kalivoda
|
}
|
129 |
|
|
else if (ratio < 0.4f)
|
130 |
d358b79e
|
Roman Kalivoda
|
{
|
131 |
|
|
return "40%";
|
132 |
ce0940b5
|
Roman Kalivoda
|
}
|
133 |
|
|
else if (ratio < 0.5f)
|
134 |
d358b79e
|
Roman Kalivoda
|
{
|
135 |
|
|
return "50%";
|
136 |
ce0940b5
|
Roman Kalivoda
|
}
|
137 |
|
|
else if (ratio < 0.6f)
|
138 |
d358b79e
|
Roman Kalivoda
|
{
|
139 |
|
|
return "60%";
|
140 |
ce0940b5
|
Roman Kalivoda
|
}
|
141 |
|
|
else if (ratio < 0.7f)
|
142 |
|
|
{
|
143 |
d358b79e
|
Roman Kalivoda
|
return "70%";
|
144 |
ce0940b5
|
Roman Kalivoda
|
}
|
145 |
|
|
else if (ratio < 0.8f)
|
146 |
d358b79e
|
Roman Kalivoda
|
{
|
147 |
|
|
return "80%";
|
148 |
ce0940b5
|
Roman Kalivoda
|
}
|
149 |
|
|
else if (ratio < 0.9f)
|
150 |
d358b79e
|
Roman Kalivoda
|
{
|
151 |
|
|
return "90%";
|
152 |
ce0940b5
|
Roman Kalivoda
|
}
|
153 |
|
|
else
|
154 |
d358b79e
|
Roman Kalivoda
|
{
|
155 |
|
|
return "100%";
|
156 |
|
|
}
|
157 |
|
|
}
|
158 |
|
|
|
159 |
ce0940b5
|
Roman Kalivoda
|
internal double LabelToRatio(string label)
|
160 |
d358b79e
|
Roman Kalivoda
|
{
|
161 |
0060a0ae
|
Roman Kalivoda
|
if (label is null)
|
162 |
|
|
{
|
163 |
|
|
return -1f;
|
164 |
|
|
}
|
165 |
|
|
else if (label.Equals("10%"))
|
166 |
ce0940b5
|
Roman Kalivoda
|
{
|
167 |
0060a0ae
|
Roman Kalivoda
|
return 10f;
|
168 |
d358b79e
|
Roman Kalivoda
|
}
|
169 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("20%"))
|
170 |
|
|
{
|
171 |
0060a0ae
|
Roman Kalivoda
|
return 20f;
|
172 |
d358b79e
|
Roman Kalivoda
|
}
|
173 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("30%"))
|
174 |
|
|
{
|
175 |
0060a0ae
|
Roman Kalivoda
|
return 30f;
|
176 |
d358b79e
|
Roman Kalivoda
|
}
|
177 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("40%"))
|
178 |
|
|
{
|
179 |
0060a0ae
|
Roman Kalivoda
|
return 40f;
|
180 |
d358b79e
|
Roman Kalivoda
|
}
|
181 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("50%"))
|
182 |
|
|
{
|
183 |
0060a0ae
|
Roman Kalivoda
|
return 50f;
|
184 |
d358b79e
|
Roman Kalivoda
|
}
|
185 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("60%"))
|
186 |
|
|
{
|
187 |
0060a0ae
|
Roman Kalivoda
|
return 60f;
|
188 |
d358b79e
|
Roman Kalivoda
|
}
|
189 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("70%"))
|
190 |
|
|
{
|
191 |
0060a0ae
|
Roman Kalivoda
|
return 70f;
|
192 |
d358b79e
|
Roman Kalivoda
|
}
|
193 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("80%"))
|
194 |
|
|
{
|
195 |
0060a0ae
|
Roman Kalivoda
|
return 80f;
|
196 |
d358b79e
|
Roman Kalivoda
|
}
|
197 |
ce0940b5
|
Roman Kalivoda
|
else if (label.Equals("90%"))
|
198 |
|
|
{
|
199 |
0060a0ae
|
Roman Kalivoda
|
return 90f;
|
200 |
d358b79e
|
Roman Kalivoda
|
}
|
201 |
|
|
else
|
202 |
|
|
{
|
203 |
0060a0ae
|
Roman Kalivoda
|
return 100f;
|
204 |
d358b79e
|
Roman Kalivoda
|
}
|
205 |
|
|
}
|
206 |
ebe96ca4
|
Roman Kalivoda
|
}
|
207 |
|
|
}
|