Projekt

Obecné

Profil

Stáhnout (6.45 KB) Statistiky
| Větev: | Tag: | Revize:
1 ebe96ca4 Roman Kalivoda
//
2
// Author: Roman Kalivoda
3
//
4
5
using System.Collections.Generic;
6
using System;
7
using ServerApp.Parser.Parsers;
8 d358b79e Roman Kalivoda
using ServerApp.Parser.OutputInfo;
9
using System.Linq;
10 0060a0ae Roman Kalivoda
using Microsoft.ML;
11
using log4net;
12 ebe96ca4 Roman Kalivoda
13
namespace ServerApp.Predictor
14
{
15
    /// <summary>
16
    /// A class responsible for preparation of features for classifiers.
17
    /// </summary>
18 ce0940b5 Roman Kalivoda
    class FeatureExtractor
19 ebe96ca4 Roman Kalivoda
    {
20 0060a0ae Roman Kalivoda
        private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
21
22 ebe96ca4 Roman Kalivoda
        /// <summary>
23
        /// A DataParser instance used to access info objects.
24
        /// </summary>
25 ce0940b5 Roman Kalivoda
        private readonly IDataParser DataParser;
26 ebe96ca4 Roman Kalivoda
27 ce0940b5 Roman Kalivoda
        /// <summary>
28
        /// A configuration object of the <c>Predictor</c> package
29
        /// </summary>
30
        private PredictorConfiguration Configuration;
31 ebe96ca4 Roman Kalivoda
32
        /// <summary>
33
        /// Instantiates new FeatureExtractor class.
34
        /// </summary>
35
        /// <param name="dataParser">Data parser used to access training data.</param>
36 ce0940b5 Roman Kalivoda
        public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
37 ebe96ca4 Roman Kalivoda
        {
38 ce0940b5 Roman Kalivoda
            this.DataParser = dataParser;
39
            this.Configuration = configuration;
40 ebe96ca4 Roman Kalivoda
        }
41
42
        /// <summary>
43 d358b79e Roman Kalivoda
        /// TODO comment
44 ebe96ca4 Roman Kalivoda
        /// </summary>
45 d358b79e Roman Kalivoda
        /// <param name="area"></param>
46
        /// <param name="startDate"></param>
47
        /// <param name="endDate"></param>
48
        /// <param name="interval"></param>
49
        /// <param name="wholeDay"></param>
50
        /// <returns></returns>
51 ce0940b5 Roman Kalivoda
        public List<ModelInput> PrepareTrainingInput(int area)
52 ebe96ca4 Roman Kalivoda
        {
53 0060a0ae Roman Kalivoda
            List<string> buildings = new List<string>(); 
54
            List<ActivityInfo> attendance = DataParser.AttendanceList;
55 ebe96ca4 Roman Kalivoda
56
            // find all buildings in area
57 ce0940b5 Roman Kalivoda
            foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
58 ebe96ca4 Roman Kalivoda
            {
59
                if (kvp.Value == area)
60
                {
61
                    buildings.Add(kvp.Key);
62
                }
63
            }
64 0060a0ae Roman Kalivoda
            List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
65
           activities = RejectOutliers(activities);
66
            var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
67 662b2404 Roman Kalivoda
            {
68 0060a0ae Roman Kalivoda
                amount = activity.amount,
69
                modelInput = new ModelInput
70 662b2404 Roman Kalivoda
                {
71 0060a0ae Roman Kalivoda
                    Hour = activity.startTime.Hour,
72
                    Temp = (float)weatherInfo.temp,
73
                    Rain = (float)weatherInfo.rain,
74
                    Time = activity.startTime,
75
                    Wind = (float)weatherInfo.wind
76
                }
77
            }).ToList();
78
79
            int max = inputs.Select(e => e.amount).Max();
80
            foreach (var input in inputs)
81
            {
82
                double ratio = input.amount / (double)max;
83
                input.modelInput.Label = RatioToLabel(ratio);
84 d358b79e Roman Kalivoda
            }
85
86 0060a0ae Roman Kalivoda
            return inputs.Select(e => e.modelInput).ToList();
87
        }
88
89
        private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
90
        {
91
            MLContext mlContext = new MLContext();
92
            IDataView input = mlContext.Data.LoadFromEnumerable(data);
93
            var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), 99.0, data.Count / 4));
94
            ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
95
            IDataView transformedData = transformer.Transform(input);
96
            List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
97
            List<ActivityInfo> result = new List<ActivityInfo>();
98
99
            for (int i=0; i<predictions.Count; i++)
100 d358b79e Roman Kalivoda
            {
101 0060a0ae Roman Kalivoda
                if(predictions[i].Prediction[0] == 1)
102
                {
103
                    _log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
104
                } else
105 662b2404 Roman Kalivoda
                {
106 0060a0ae Roman Kalivoda
                    result.Add(data[i]);
107 ebe96ca4 Roman Kalivoda
                }
108
            }
109 0060a0ae Roman Kalivoda
            return result;
110 ebe96ca4 Roman Kalivoda
        }
111 d358b79e Roman Kalivoda
112
        private string RatioToLabel(double ratio)
113
        {
114
            if (ratio < 0.1f)
115
            {
116
                return "10%";
117 ce0940b5 Roman Kalivoda
            }
118
            else if (ratio < 0.2f)
119 d358b79e Roman Kalivoda
            {
120
                return "20%";
121 ce0940b5 Roman Kalivoda
            }
122
            else if (ratio < 0.3f)
123 d358b79e Roman Kalivoda
            {
124
                return "30%";
125 ce0940b5 Roman Kalivoda
            }
126
            else if (ratio < 0.4f)
127 d358b79e Roman Kalivoda
            {
128
                return "40%";
129 ce0940b5 Roman Kalivoda
            }
130
            else if (ratio < 0.5f)
131 d358b79e Roman Kalivoda
            {
132
                return "50%";
133 ce0940b5 Roman Kalivoda
            }
134
            else if (ratio < 0.6f)
135 d358b79e Roman Kalivoda
            {
136
                return "60%";
137 ce0940b5 Roman Kalivoda
            }
138
            else if (ratio < 0.7f)
139
            {
140 d358b79e Roman Kalivoda
                return "70%";
141 ce0940b5 Roman Kalivoda
            }
142
            else if (ratio < 0.8f)
143 d358b79e Roman Kalivoda
            {
144
                return "80%";
145 ce0940b5 Roman Kalivoda
            }
146
            else if (ratio < 0.9f)
147 d358b79e Roman Kalivoda
            {
148
                return "90%";
149 ce0940b5 Roman Kalivoda
            }
150
            else
151 d358b79e Roman Kalivoda
            {
152
                return "100%";
153
            }
154
        }
155
156 ce0940b5 Roman Kalivoda
        internal double LabelToRatio(string label)
157 d358b79e Roman Kalivoda
        {
158 0060a0ae Roman Kalivoda
            if (label is null)
159
            {
160
                return -1f;
161
            }
162
            else if (label.Equals("10%"))
163 ce0940b5 Roman Kalivoda
            {
164 0060a0ae Roman Kalivoda
                return 10f;
165 d358b79e Roman Kalivoda
            }
166 ce0940b5 Roman Kalivoda
            else if (label.Equals("20%"))
167
            {
168 0060a0ae Roman Kalivoda
                return 20f;
169 d358b79e Roman Kalivoda
            }
170 ce0940b5 Roman Kalivoda
            else if (label.Equals("30%"))
171
            {
172 0060a0ae Roman Kalivoda
                return 30f;
173 d358b79e Roman Kalivoda
            }
174 ce0940b5 Roman Kalivoda
            else if (label.Equals("40%"))
175
            {
176 0060a0ae Roman Kalivoda
                return 40f;
177 d358b79e Roman Kalivoda
            }
178 ce0940b5 Roman Kalivoda
            else if (label.Equals("50%"))
179
            {
180 0060a0ae Roman Kalivoda
                return 50f;
181 d358b79e Roman Kalivoda
            }
182 ce0940b5 Roman Kalivoda
            else if (label.Equals("60%"))
183
            {
184 0060a0ae Roman Kalivoda
                return 60f;
185 d358b79e Roman Kalivoda
            }
186 ce0940b5 Roman Kalivoda
            else if (label.Equals("70%"))
187
            {
188 0060a0ae Roman Kalivoda
                return 70f;
189 d358b79e Roman Kalivoda
            }
190 ce0940b5 Roman Kalivoda
            else if (label.Equals("80%"))
191
            {
192 0060a0ae Roman Kalivoda
                return 80f;
193 d358b79e Roman Kalivoda
            }
194 ce0940b5 Roman Kalivoda
            else if (label.Equals("90%"))
195
            {
196 0060a0ae Roman Kalivoda
                return 90f;
197 d358b79e Roman Kalivoda
            }
198
            else
199
            {
200 0060a0ae Roman Kalivoda
                return 100f;
201 d358b79e Roman Kalivoda
            }
202
        }
203 ebe96ca4 Roman Kalivoda
    }
204
}