Revize 0060a0ae
Přidáno uživatelem Roman Kalivoda před téměř 4 roky(ů)
Server/ServerApp/Predictor/FeatureExtractor.cs | ||
---|---|---|
7 | 7 |
using ServerApp.Parser.Parsers; |
8 | 8 |
using ServerApp.Parser.OutputInfo; |
9 | 9 |
using System.Linq; |
10 |
using Microsoft.ML; |
|
11 |
using log4net; |
|
10 | 12 |
|
11 | 13 |
namespace ServerApp.Predictor |
12 | 14 |
{ |
... | ... | |
15 | 17 |
/// </summary> |
16 | 18 |
class FeatureExtractor |
17 | 19 |
{ |
20 |
private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor)); |
|
21 |
|
|
18 | 22 |
/// <summary> |
19 | 23 |
/// A DataParser instance used to access info objects. |
20 | 24 |
/// </summary> |
... | ... | |
46 | 50 |
/// <returns></returns> |
47 | 51 |
public List<ModelInput> PrepareTrainingInput(int area) |
48 | 52 |
{ |
49 |
List<string> buildings = new List<string>(); |
|
53 |
List<string> buildings = new List<string>(); |
|
54 |
List<ActivityInfo> attendance = DataParser.AttendanceList; |
|
50 | 55 |
|
51 | 56 |
// find all buildings in area |
52 | 57 |
foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas) |
... | ... | |
56 | 61 |
buildings.Add(kvp.Key); |
57 | 62 |
} |
58 | 63 |
} |
59 |
|
|
60 |
var res = new List<ModelInput>();
|
|
61 |
foreach (WeatherInfo val in DataParser.WeatherList)
|
|
64 |
List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList(); |
|
65 |
activities = RejectOutliers(activities);
|
|
66 |
var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
|
|
62 | 67 |
{ |
63 |
res.Add(new ModelInput |
|
68 |
amount = activity.amount, |
|
69 |
modelInput = new ModelInput |
|
64 | 70 |
{ |
65 |
Time = val.startTime, |
|
66 |
Temp = (float)val.temp, |
|
67 |
Hour = val.startTime.Hour, |
|
68 |
Wind = (float)val.wind, |
|
69 |
Rain = (float)val.rain, |
|
70 |
}); |
|
71 |
Hour = activity.startTime.Hour, |
|
72 |
Temp = (float)weatherInfo.temp, |
|
73 |
Rain = (float)weatherInfo.rain, |
|
74 |
Time = activity.startTime, |
|
75 |
Wind = (float)weatherInfo.wind |
|
76 |
} |
|
77 |
}).ToList(); |
|
78 |
|
|
79 |
int max = inputs.Select(e => e.amount).Max(); |
|
80 |
foreach (var input in inputs) |
|
81 |
{ |
|
82 |
double ratio = input.amount / (double)max; |
|
83 |
input.modelInput.Label = RatioToLabel(ratio); |
|
71 | 84 |
} |
72 | 85 |
|
73 |
List<ActivityInfo> attendance = DataParser.AttendanceList; |
|
74 |
foreach (ModelInput input in res) |
|
86 |
return inputs.Select(e => e.modelInput).ToList(); |
|
87 |
} |
|
88 |
|
|
89 |
private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data) |
|
90 |
{ |
|
91 |
MLContext mlContext = new MLContext(); |
|
92 |
IDataView input = mlContext.Data.LoadFromEnumerable(data); |
|
93 |
var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), 99.0, data.Count / 4)); |
|
94 |
ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>())); |
|
95 |
IDataView transformedData = transformer.Transform(input); |
|
96 |
List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList(); |
|
97 |
List<ActivityInfo> result = new List<ActivityInfo>(); |
|
98 |
|
|
99 |
for (int i=0; i<predictions.Count; i++) |
|
75 | 100 |
{ |
76 |
List<int> amounts = new List<int>(); |
|
77 |
List<int> maxima = new List<int>(); |
|
78 |
foreach (string building in buildings) |
|
101 |
if(predictions[i].Prediction[0] == 1) |
|
102 |
{ |
|
103 |
_log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}"); |
|
104 |
} else |
|
79 | 105 |
{ |
80 |
List<ActivityInfo> temp = attendance.Where(activity => activity.building.Equals(building)).ToList(); |
|
81 |
amounts.Add(temp.Where(activity => activity.startTime.Equals(input.Time)).Select(activity => activity.amount).FirstOrDefault()); |
|
82 |
maxima.Add(temp.Select(activity => activity.amount).Max()); |
|
106 |
result.Add(data[i]); |
|
83 | 107 |
} |
84 |
double ratio = amounts.Sum() / (double)maxima.Sum(); |
|
85 |
input.Label = RatioToLabel(ratio); |
|
86 | 108 |
} |
87 |
|
|
88 |
return res; |
|
109 |
return result; |
|
89 | 110 |
} |
90 | 111 |
|
91 | 112 |
private string RatioToLabel(double ratio) |
... | ... | |
134 | 155 |
|
135 | 156 |
internal double LabelToRatio(string label) |
136 | 157 |
{ |
137 |
if (label.Equals("10%")) |
|
158 |
if (label is null) |
|
159 |
{ |
|
160 |
return -1f; |
|
161 |
} |
|
162 |
else if (label.Equals("10%")) |
|
138 | 163 |
{ |
139 |
return 0.1f;
|
|
164 |
return 10f;
|
|
140 | 165 |
} |
141 | 166 |
else if (label.Equals("20%")) |
142 | 167 |
{ |
143 |
return 0.2f;
|
|
168 |
return 20f;
|
|
144 | 169 |
} |
145 | 170 |
else if (label.Equals("30%")) |
146 | 171 |
{ |
147 |
return 0.3f;
|
|
172 |
return 30f;
|
|
148 | 173 |
} |
149 | 174 |
else if (label.Equals("40%")) |
150 | 175 |
{ |
151 |
return 0.4f;
|
|
176 |
return 40f;
|
|
152 | 177 |
} |
153 | 178 |
else if (label.Equals("50%")) |
154 | 179 |
{ |
155 |
return 0.5f;
|
|
180 |
return 50f;
|
|
156 | 181 |
} |
157 | 182 |
else if (label.Equals("60%")) |
158 | 183 |
{ |
159 |
return 0.6f;
|
|
184 |
return 60f;
|
|
160 | 185 |
} |
161 | 186 |
else if (label.Equals("70%")) |
162 | 187 |
{ |
163 |
return 0.7f;
|
|
188 |
return 70f;
|
|
164 | 189 |
} |
165 | 190 |
else if (label.Equals("80%")) |
166 | 191 |
{ |
167 |
return 0.8f;
|
|
192 |
return 80f;
|
|
168 | 193 |
} |
169 | 194 |
else if (label.Equals("90%")) |
170 | 195 |
{ |
171 |
return 0.9f;
|
|
196 |
return 90f;
|
|
172 | 197 |
} |
173 | 198 |
else |
174 | 199 |
{ |
175 |
return 1.0f;
|
|
200 |
return 100f;
|
|
176 | 201 |
} |
177 | 202 |
} |
178 | 203 |
} |
Také k dispozici: Unified diff
Re #9056 refactoring