Projekt

Obecné

Profil

Stáhnout (6.52 KB) Statistiky
| Větev: | Tag: | Revize:
1 ebe96ca4 Roman Kalivoda
//
2
// Author: Roman Kalivoda
3
//
4
5
using System.Collections.Generic;
6
using System;
7
using ServerApp.Parser.Parsers;
8 d358b79e Roman Kalivoda
using ServerApp.Parser.OutputInfo;
9
using System.Linq;
10 0060a0ae Roman Kalivoda
using Microsoft.ML;
11
using log4net;
12 ebe96ca4 Roman Kalivoda
13
namespace ServerApp.Predictor
14
{
15
    /// <summary>
16
    /// A class responsible for preparation of features for classifiers.
17
    /// </summary>
18 ce0940b5 Roman Kalivoda
    class FeatureExtractor
19 ebe96ca4 Roman Kalivoda
    {
20 0e7b6b11 Roman Kalivoda
21
        private const double Confidence = 99.0;
22
23 0060a0ae Roman Kalivoda
        private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
24
25 ebe96ca4 Roman Kalivoda
        /// <summary>
26
        /// A DataParser instance used to access info objects.
27
        /// </summary>
28 ce0940b5 Roman Kalivoda
        private readonly IDataParser DataParser;
29 ebe96ca4 Roman Kalivoda
30 ce0940b5 Roman Kalivoda
        /// <summary>
31
        /// A configuration object of the <c>Predictor</c> package
32
        /// </summary>
33
        private PredictorConfiguration Configuration;
34 ebe96ca4 Roman Kalivoda
35
        /// <summary>
36
        /// Instantiates new FeatureExtractor class.
37
        /// </summary>
38
        /// <param name="dataParser">Data parser used to access training data.</param>
39 ce0940b5 Roman Kalivoda
        public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
40 ebe96ca4 Roman Kalivoda
        {
41 ce0940b5 Roman Kalivoda
            this.DataParser = dataParser;
42
            this.Configuration = configuration;
43 ebe96ca4 Roman Kalivoda
        }
44
45
        /// <summary>
46 d358b79e Roman Kalivoda
        /// TODO comment
47 ebe96ca4 Roman Kalivoda
        /// </summary>
48 d358b79e Roman Kalivoda
        /// <param name="area"></param>
49
        /// <param name="startDate"></param>
50
        /// <param name="endDate"></param>
51
        /// <param name="interval"></param>
52
        /// <param name="wholeDay"></param>
53
        /// <returns></returns>
54 ce0940b5 Roman Kalivoda
        public List<ModelInput> PrepareTrainingInput(int area)
55 ebe96ca4 Roman Kalivoda
        {
56 0060a0ae Roman Kalivoda
            List<string> buildings = new List<string>(); 
57
            List<ActivityInfo> attendance = DataParser.AttendanceList;
58 ebe96ca4 Roman Kalivoda
59
            // find all buildings in area
60 ce0940b5 Roman Kalivoda
            foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
61 ebe96ca4 Roman Kalivoda
            {
62
                if (kvp.Value == area)
63
                {
64
                    buildings.Add(kvp.Key);
65
                }
66
            }
67 0060a0ae Roman Kalivoda
            List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
68
           activities = RejectOutliers(activities);
69
            var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
70 662b2404 Roman Kalivoda
            {
71 0060a0ae Roman Kalivoda
                amount = activity.amount,
72
                modelInput = new ModelInput
73 662b2404 Roman Kalivoda
                {
74 0060a0ae Roman Kalivoda
                    Hour = activity.startTime.Hour,
75
                    Temp = (float)weatherInfo.temp,
76
                    Rain = (float)weatherInfo.rain,
77
                    Time = activity.startTime,
78
                    Wind = (float)weatherInfo.wind
79
                }
80
            }).ToList();
81
82
            int max = inputs.Select(e => e.amount).Max();
83
            foreach (var input in inputs)
84
            {
85
                double ratio = input.amount / (double)max;
86
                input.modelInput.Label = RatioToLabel(ratio);
87 d358b79e Roman Kalivoda
            }
88
89 0060a0ae Roman Kalivoda
            return inputs.Select(e => e.modelInput).ToList();
90
        }
91
92
        private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
93
        {
94
            MLContext mlContext = new MLContext();
95
            IDataView input = mlContext.Data.LoadFromEnumerable(data);
96 0e7b6b11 Roman Kalivoda
            var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), Confidence, data.Count / 4));
97 0060a0ae Roman Kalivoda
            ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
98
            IDataView transformedData = transformer.Transform(input);
99
            List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
100
            List<ActivityInfo> result = new List<ActivityInfo>();
101
102
            for (int i=0; i<predictions.Count; i++)
103 d358b79e Roman Kalivoda
            {
104 0e7b6b11 Roman Kalivoda
                if(predictions[i].Prediction[2] < (1 - Confidence))
105 0060a0ae Roman Kalivoda
                {
106
                    _log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
107
                } else
108 662b2404 Roman Kalivoda
                {
109 0060a0ae Roman Kalivoda
                    result.Add(data[i]);
110 ebe96ca4 Roman Kalivoda
                }
111
            }
112 0060a0ae Roman Kalivoda
            return result;
113 ebe96ca4 Roman Kalivoda
        }
114 d358b79e Roman Kalivoda
115
        private string RatioToLabel(double ratio)
116
        {
117
            if (ratio < 0.1f)
118
            {
119
                return "10%";
120 ce0940b5 Roman Kalivoda
            }
121
            else if (ratio < 0.2f)
122 d358b79e Roman Kalivoda
            {
123
                return "20%";
124 ce0940b5 Roman Kalivoda
            }
125
            else if (ratio < 0.3f)
126 d358b79e Roman Kalivoda
            {
127
                return "30%";
128 ce0940b5 Roman Kalivoda
            }
129
            else if (ratio < 0.4f)
130 d358b79e Roman Kalivoda
            {
131
                return "40%";
132 ce0940b5 Roman Kalivoda
            }
133
            else if (ratio < 0.5f)
134 d358b79e Roman Kalivoda
            {
135
                return "50%";
136 ce0940b5 Roman Kalivoda
            }
137
            else if (ratio < 0.6f)
138 d358b79e Roman Kalivoda
            {
139
                return "60%";
140 ce0940b5 Roman Kalivoda
            }
141
            else if (ratio < 0.7f)
142
            {
143 d358b79e Roman Kalivoda
                return "70%";
144 ce0940b5 Roman Kalivoda
            }
145
            else if (ratio < 0.8f)
146 d358b79e Roman Kalivoda
            {
147
                return "80%";
148 ce0940b5 Roman Kalivoda
            }
149
            else if (ratio < 0.9f)
150 d358b79e Roman Kalivoda
            {
151
                return "90%";
152 ce0940b5 Roman Kalivoda
            }
153
            else
154 d358b79e Roman Kalivoda
            {
155
                return "100%";
156
            }
157
        }
158
159 ce0940b5 Roman Kalivoda
        internal double LabelToRatio(string label)
160 d358b79e Roman Kalivoda
        {
161 0060a0ae Roman Kalivoda
            if (label is null)
162
            {
163
                return -1f;
164
            }
165
            else if (label.Equals("10%"))
166 ce0940b5 Roman Kalivoda
            {
167 0060a0ae Roman Kalivoda
                return 10f;
168 d358b79e Roman Kalivoda
            }
169 ce0940b5 Roman Kalivoda
            else if (label.Equals("20%"))
170
            {
171 0060a0ae Roman Kalivoda
                return 20f;
172 d358b79e Roman Kalivoda
            }
173 ce0940b5 Roman Kalivoda
            else if (label.Equals("30%"))
174
            {
175 0060a0ae Roman Kalivoda
                return 30f;
176 d358b79e Roman Kalivoda
            }
177 ce0940b5 Roman Kalivoda
            else if (label.Equals("40%"))
178
            {
179 0060a0ae Roman Kalivoda
                return 40f;
180 d358b79e Roman Kalivoda
            }
181 ce0940b5 Roman Kalivoda
            else if (label.Equals("50%"))
182
            {
183 0060a0ae Roman Kalivoda
                return 50f;
184 d358b79e Roman Kalivoda
            }
185 ce0940b5 Roman Kalivoda
            else if (label.Equals("60%"))
186
            {
187 0060a0ae Roman Kalivoda
                return 60f;
188 d358b79e Roman Kalivoda
            }
189 ce0940b5 Roman Kalivoda
            else if (label.Equals("70%"))
190
            {
191 0060a0ae Roman Kalivoda
                return 70f;
192 d358b79e Roman Kalivoda
            }
193 ce0940b5 Roman Kalivoda
            else if (label.Equals("80%"))
194
            {
195 0060a0ae Roman Kalivoda
                return 80f;
196 d358b79e Roman Kalivoda
            }
197 ce0940b5 Roman Kalivoda
            else if (label.Equals("90%"))
198
            {
199 0060a0ae Roman Kalivoda
                return 90f;
200 d358b79e Roman Kalivoda
            }
201
            else
202
            {
203 0060a0ae Roman Kalivoda
                return 100f;
204 d358b79e Roman Kalivoda
            }
205
        }
206 ebe96ca4 Roman Kalivoda
    }
207
}