Projekt

Obecné

Profil

Stáhnout (6.52 KB) Statistiky
| Větev: | Tag: | Revize:
1
//
2
// Author: Roman Kalivoda
3
//
4

    
5
using System.Collections.Generic;
6
using System;
7
using ServerApp.Parser.Parsers;
8
using ServerApp.Parser.OutputInfo;
9
using System.Linq;
10
using Microsoft.ML;
11
using log4net;
12

    
13
namespace ServerApp.Predictor
14
{
15
    /// <summary>
16
    /// A class responsible for preparation of features for classifiers.
17
    /// </summary>
18
    class FeatureExtractor
19
    {
20

    
21
        private const double Confidence = 99.0;
22

    
23
        private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
24

    
25
        /// <summary>
26
        /// A DataParser instance used to access info objects.
27
        /// </summary>
28
        private readonly IDataParser DataParser;
29

    
30
        /// <summary>
31
        /// A configuration object of the <c>Predictor</c> package
32
        /// </summary>
33
        private PredictorConfiguration Configuration;
34

    
35
        /// <summary>
36
        /// Instantiates new FeatureExtractor class.
37
        /// </summary>
38
        /// <param name="dataParser">Data parser used to access training data.</param>
39
        public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
40
        {
41
            this.DataParser = dataParser;
42
            this.Configuration = configuration;
43
        }
44

    
45
        /// <summary>
46
        /// TODO comment
47
        /// </summary>
48
        /// <param name="area"></param>
49
        /// <param name="startDate"></param>
50
        /// <param name="endDate"></param>
51
        /// <param name="interval"></param>
52
        /// <param name="wholeDay"></param>
53
        /// <returns></returns>
54
        public List<ModelInput> PrepareTrainingInput(int area)
55
        {
56
            List<string> buildings = new List<string>(); 
57
            List<ActivityInfo> attendance = DataParser.AttendanceList;
58

    
59
            // find all buildings in area
60
            foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
61
            {
62
                if (kvp.Value == area)
63
                {
64
                    buildings.Add(kvp.Key);
65
                }
66
            }
67
            List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
68
           activities = RejectOutliers(activities);
69
            var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
70
            {
71
                amount = activity.amount,
72
                modelInput = new ModelInput
73
                {
74
                    Hour = activity.startTime.Hour,
75
                    Temp = (float)weatherInfo.temp,
76
                    Rain = (float)weatherInfo.rain,
77
                    Time = activity.startTime,
78
                    Wind = (float)weatherInfo.wind
79
                }
80
            }).ToList();
81

    
82
            int max = inputs.Select(e => e.amount).Max();
83
            foreach (var input in inputs)
84
            {
85
                double ratio = input.amount / (double)max;
86
                input.modelInput.Label = RatioToLabel(ratio);
87
            }
88

    
89
            return inputs.Select(e => e.modelInput).ToList();
90
        }
91

    
92
        private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
93
        {
94
            MLContext mlContext = new MLContext();
95
            IDataView input = mlContext.Data.LoadFromEnumerable(data);
96
            var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), Confidence, data.Count / 4));
97
            ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
98
            IDataView transformedData = transformer.Transform(input);
99
            List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
100
            List<ActivityInfo> result = new List<ActivityInfo>();
101

    
102
            for (int i=0; i<predictions.Count; i++)
103
            {
104
                if(predictions[i].Prediction[2] < (1 - Confidence))
105
                {
106
                    _log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
107
                } else
108
                {
109
                    result.Add(data[i]);
110
                }
111
            }
112
            return result;
113
        }
114

    
115
        private string RatioToLabel(double ratio)
116
        {
117
            if (ratio < 0.1f)
118
            {
119
                return "10%";
120
            }
121
            else if (ratio < 0.2f)
122
            {
123
                return "20%";
124
            }
125
            else if (ratio < 0.3f)
126
            {
127
                return "30%";
128
            }
129
            else if (ratio < 0.4f)
130
            {
131
                return "40%";
132
            }
133
            else if (ratio < 0.5f)
134
            {
135
                return "50%";
136
            }
137
            else if (ratio < 0.6f)
138
            {
139
                return "60%";
140
            }
141
            else if (ratio < 0.7f)
142
            {
143
                return "70%";
144
            }
145
            else if (ratio < 0.8f)
146
            {
147
                return "80%";
148
            }
149
            else if (ratio < 0.9f)
150
            {
151
                return "90%";
152
            }
153
            else
154
            {
155
                return "100%";
156
            }
157
        }
158

    
159
        internal double LabelToRatio(string label)
160
        {
161
            if (label is null)
162
            {
163
                return -1f;
164
            }
165
            else if (label.Equals("10%"))
166
            {
167
                return 10f;
168
            }
169
            else if (label.Equals("20%"))
170
            {
171
                return 20f;
172
            }
173
            else if (label.Equals("30%"))
174
            {
175
                return 30f;
176
            }
177
            else if (label.Equals("40%"))
178
            {
179
                return 40f;
180
            }
181
            else if (label.Equals("50%"))
182
            {
183
                return 50f;
184
            }
185
            else if (label.Equals("60%"))
186
            {
187
                return 60f;
188
            }
189
            else if (label.Equals("70%"))
190
            {
191
                return 70f;
192
            }
193
            else if (label.Equals("80%"))
194
            {
195
                return 80f;
196
            }
197
            else if (label.Equals("90%"))
198
            {
199
                return 90f;
200
            }
201
            else
202
            {
203
                return 100f;
204
            }
205
        }
206
    }
207
}
(3-3/11)