Projekt

Obecné

Profil

Stáhnout (39.3 KB) Statistiky
| Větev: | Tag: | Revize:
1 eff8ec56 Vojtěch Bartička
using Core.Contexts;
2
using Core.Entities;
3
using Models.Annotations;
4
using Models.Enums;
5
using Serilog;
6
using System;
7
using System.Collections.Generic;
8
using System.Linq;
9
using System.Text;
10
using System.Threading.Tasks;
11 6bdb3d95 Vojtěch Bartička
using Microsoft.EntityFrameworkCore;
12 a6675a6d Vojtěch Bartička
using AutoMapper;
13
using Models.Tags;
14 3c185841 Vojtěch Bartička
using Ganss.XSS;
15
using HtmlAgilityPack;
16
using System.Text.RegularExpressions;
17 553486ad Vojtěch Bartička
using Newtonsoft.Json;
18 8dc25caa Vojtěch Bartička
using Core.GraphUtils;
19 eff8ec56 Vojtěch Bartička
20
namespace Core.Services.AnnotationService
21
{
22
    public class AnnotationServiceEF : IAnnotationService
23
    {
24
        private readonly DatabaseContext context;
25
        private readonly ILogger logger;
26 a6675a6d Vojtěch Bartička
        private readonly IMapper mapper;
27 eff8ec56 Vojtěch Bartička
28 0a9f9349 Vojtěch Bartička
        private const string TAG_ID_ATTRIBUTE_NAME = "aswi-tag-id";
29 c9762683 Vojtěch Bartička
        private const string TAG_INSTANCE_ATTRIBUTE_NAME = "aswi-tag-instance";
30 553486ad Vojtěch Bartička
        private const string TAG_EF_ID_ATTRIBUTE_NAME = "aswi-tag-ef-id";
31 0a9f9349 Vojtěch Bartička
32 a6675a6d Vojtěch Bartička
        public AnnotationServiceEF(DatabaseContext context, ILogger logger, IMapper mapper)
33 eff8ec56 Vojtěch Bartička
        {
34
            this.context = context;
35
            this.logger = logger;
36 a6675a6d Vojtěch Bartička
            this.mapper = mapper;
37 eff8ec56 Vojtěch Bartička
        }
38
39
        public void CreateDocumentAnnotations(AnnotationsAddRequest request, Guid clientUserId)
40
        {
41
            User addingUser = context.Users.Single(u => u.Id == clientUserId);
42
43
            // Check the documents exist
44 153d77a8 Vojtěch Bartička
            var documents = context.Documents.Where(d => request.DocumentIdList.Contains(d.Id)).ToList();
45
            if (documents.Count() != request.DocumentIdList.Count)
46 eff8ec56 Vojtěch Bartička
            {
47 153d77a8 Vojtěch Bartička
                logger.Information($"Received a non-existent Document ID when assigning documents to users");
48
                throw new InvalidOperationException($"{request.DocumentIdList.Count - documents.Count()} of the received documents do not exist");
49 eff8ec56 Vojtěch Bartička
            }
50
51 6bdb3d95 Vojtěch Bartička
            var users = context.Users.Where(u => request.UserIdList.Contains(u.Id)).ToList();
52 153d77a8 Vojtěch Bartička
            foreach (var user in users)
53 eff8ec56 Vojtěch Bartička
            {
54 153d77a8 Vojtěch Bartička
                var userAnnotatedDocuments = context.Annotations.Where(a => a.User == user).Select(a => a.Document).ToList();
55
                foreach (var doc in documents)
56 eff8ec56 Vojtěch Bartička
                {
57 153d77a8 Vojtěch Bartička
                    if (userAnnotatedDocuments.Contains(doc))
58 eff8ec56 Vojtěch Bartička
                    {
59 153d77a8 Vojtěch Bartička
                        logger.Information($"User {user.Username} has already been assigned the document {doc.Id}, ignoring");
60
                        continue;
61 eff8ec56 Vojtěch Bartička
                    }
62
63
                    context.Annotations.Add(new Annotation()
64
                    {
65
                        User = user,
66
                        UserAssigned = addingUser,
67
                        DateAssigned = DateTime.Now,
68
                        DateLastChanged = DateTime.Now,
69 153d77a8 Vojtěch Bartička
                        Document = doc,
70 eff8ec56 Vojtěch Bartička
                        State = EState.NEW,
71
                        Note = ""
72
                    });
73
                }
74
            }
75
76
            context.SaveChanges();
77
        }
78 6bdb3d95 Vojtěch Bartička
79
        public AnnotationListResponse GetUserAnnotations(Guid userId)
80
        {
81
            var annotations = context.Annotations.Where(a => a.User.Id == userId).Include(a => a.Document).ToList();
82
            var documentIds = annotations.Select(a => a.Document.Id).ToList();
83
            var documents = context.Documents.Where(d => documentIds.Contains(d.Id));
84
            var infos = new List<AnnotationListInfo>();
85
86
            var annotationsDocuments = annotations.Zip(documents, (a, d) => new { Annotation = a, Document = d });
87
            foreach (var ad in annotationsDocuments)
88
            {
89
                infos.Add(new AnnotationListInfo()
90
                {
91
                    AnnotationId = ad.Annotation.Id,
92
                    DocumentName = ad.Document.Name,
93
                    State = ad.Annotation.State
94
                });
95
            }
96
97
            return new AnnotationListResponse()
98
            {
99
                Annotations = infos
100
            };
101
        }
102 a6675a6d Vojtěch Bartička
103 f2275185 Vojtěch Bartička
        public void AddNoteToAnnotation(Guid annotationId, Guid userId, ERole userRole, AddNoteToAnnotationRequest request)
104
        {
105
            Annotation annotation = null;
106
            try
107
            {
108
                annotation = context.Annotations.Include(a => a.User).First(a => a.Id == annotationId);
109
            }
110
            catch (Exception)
111
            {
112
                throw new InvalidOperationException("Annotation not found");
113
            }
114
115
            if (userRole < ERole.ADMINISTRATOR && annotation.User.Id != userId)
116
            {
117
                throw new UnauthorizedAccessException("User does not have access to this annotation");
118
            }
119
120
            annotation.Note = request.Note;
121
            context.SaveChanges();
122
        }
123
124
125 a6675a6d Vojtěch Bartička
        public AnnotationInfo GetAnnotation(Guid annotationId, Guid userId, ERole userRole)
126
        {
127 c2a89232 Vojtěch Bartička
            var annotation = context.Annotations
128
                .Where(a => a.Id == annotationId)
129
                .Include(a => a.User)
130
                .Include(a => a.Document).ThenInclude(d => d.Content)
131
                .First();
132 3c185841 Vojtěch Bartička
133 a6675a6d Vojtěch Bartička
            if (userRole < ERole.ADMINISTRATOR)
134
            {
135
                if (annotation.User.Id != userId)
136
                {
137
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
138
                }
139
            }
140
141 c2a89232 Vojtěch Bartička
            var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First();
142 a6675a6d Vojtěch Bartička
143 c2a89232 Vojtěch Bartička
            var tags = context.AnnotationTags.Where(at => at.Annotation.Id == annotationId)
144 553486ad Vojtěch Bartička
                .Include(at => at.Tag)
145
                .ThenInclude(t => t.Category)
146 c2a89232 Vojtěch Bartička
                .Include(at => at.SubTag)
147 3c89e947 Vojtěch Bartička
                .OrderBy(at => at.Position)
148 c2a89232 Vojtěch Bartička
                .ToList();
149
150 0a9f9349 Vojtěch Bartička
            List<TagInstanceInfo> tagInstanceInfos = new();
151 a6675a6d Vojtěch Bartička
            foreach (var tag in tags)
152
            {
153
                var tagInstance = mapper.Map<TagInstanceInfo>(tag);
154 0a9f9349 Vojtěch Bartička
                tagInstanceInfos.Add(tagInstance);
155 a6675a6d Vojtěch Bartička
            }
156
157 553486ad Vojtěch Bartička
            var docToRender = "";
158
            if (annotation.CachedDocumentHTML == "")
159
            {
160
                docToRender = FullPreprocessHTML(documentContent.Content, tags);
161
                annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
162
                annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
163
                annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
164
                annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
165 4e12f0cc Vojtěch Bartička
                annotation.CachedCSS = JsonConvert.SerializeObject(TagInstanceCSS);
166 553486ad Vojtěch Bartička
                annotation.ModifiedType = EModified.NONE;
167
                annotation.CachedDocumentHTML = docToRender;
168
                context.SaveChanges();
169
            }
170
            else
171
            {
172
                docToRender = annotation.CachedDocumentHTML;
173
                TagStartPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedStartPositions);
174
                TagStartLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedLengths);
175
                TagClosingPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingPositions);
176
                TagClosingLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingLengths);
177 26671569 Vojtěch Bartička
                TagInstanceCSS = JsonConvert.DeserializeObject<List<TagInstanceCSSInfo>>(annotation.CachedCSS);
178 553486ad Vojtěch Bartička
179
                // The annotation has been modified and we need to either add the new tag or remove the tag
180
                if (annotation.ModifiedType != EModified.NONE)
181
                {
182
                    if (annotation.ModifiedType == EModified.ADDED)
183
                    {
184
                        var lastModifiedTag = context.AnnotationTags.Where(at => at.Id == annotation.LastModifiedTagId).First();
185
                        docToRender = PartialPreprocessHTMLAddTag(docToRender, documentContent.Content, lastModifiedTag, tags);
186
                    }
187
                    else if (annotation.ModifiedType == EModified.REMOVED)
188
                    {
189
                        docToRender = PartialPreprocessHTMLRemoveTag(docToRender, documentContent.Content, new AnnotationTag() { Id = annotation.LastModifiedTagId.Value }, tags);
190
                    }
191
192
                    annotation.ModifiedType = EModified.NONE;
193
                    annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
194
                    annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
195
                    annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
196
                    annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
197
                    annotation.CachedDocumentHTML = docToRender;
198 26671569 Vojtěch Bartička
                    annotation.CachedCSS = JsonConvert.SerializeObject(TagInstanceCSS);
199 553486ad Vojtěch Bartička
                    context.SaveChanges();
200
                }
201
            }
202 0a9f9349 Vojtěch Bartička
203
            // We probably cannot use AutoMapper since we are dealing with too many different entities
204
            AnnotationInfo annotationInfo = new()
205
            {
206
                SourceDocumentContent = documentContent.Content,
207
                DocumentToRender = docToRender,
208
                TagStartPositions = TagStartPositions.ToArray(),
209
                TagLengths = TagStartLengths.ToArray(),
210
                Note = annotation.Note,
211
                State = annotation.State,
212
                Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT,
213 26671569 Vojtěch Bartička
                TagInstances = tagInstanceInfos,
214
                CSSInfo = TagInstanceCSS
215 0a9f9349 Vojtěch Bartička
            };
216
217 553486ad Vojtěch Bartička
            NodeDict.Clear();
218
219 a6675a6d Vojtěch Bartička
            return annotationInfo;
220
        }
221
222 0a9f9349 Vojtěch Bartička
        private List<int> TagStartPositions = new();
223
        private List<int> TagStartLengths = new();
224
        private List<int> TagClosingPositions = new();
225
        private List<int> TagClosingLengths = new();
226
        private Dictionary<HtmlNode, HtmlNode> NodeDict = new();
227 4e12f0cc Vojtěch Bartička
        private List<TagInstanceCSSInfo> TagInstanceCSS = new();
228 0a9f9349 Vojtěch Bartička
229 553486ad Vojtěch Bartička
        /*
230
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
231
         */
232
233
234
        private string FullPreprocessHTML(string htmlSource, List<AnnotationTag> tags)
235 3c185841 Vojtěch Bartička
        {
236
            var docOriginal = new HtmlDocument();
237
            docOriginal.LoadHtml(htmlSource);
238
            var docToEdit = new HtmlDocument();
239
            docToEdit.LoadHtml(htmlSource);
240
241
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
242 553486ad Vojtěch Bartička
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
243 3c185841 Vojtěch Bartička
244 0a9f9349 Vojtěch Bartička
            int currentId = 0;
245 3c185841 Vojtěch Bartička
246 0a9f9349 Vojtěch Bartička
            FillNodeDict(descendantsOriginal, descendantsToEdit);
247
            AssignIdsToOriginalDocument(descendantsOriginal, ref currentId);
248 3c185841 Vojtěch Bartička
249 0a9f9349 Vojtěch Bartička
            WrapTextInSpan(descendantsOriginal, docToEdit);
250 553486ad Vojtěch Bartička
            int descCount = descendantsToEdit.Count;
251
252 0a9f9349 Vojtěch Bartička
            foreach (var tag in tags)
253 3c185841 Vojtěch Bartička
            {
254 0a9f9349 Vojtěch Bartička
                int i = 0;
255
                List<HtmlNode> addedForSelection = new();
256 553486ad Vojtěch Bartička
                while (i < descCount)
257 3c185841 Vojtěch Bartička
                {
258 553486ad Vojtěch Bartička
                    for (; i < descCount; i++)
259 0a9f9349 Vojtěch Bartička
                    {
260
                        var node = descendantsToEdit.ElementAt(i);
261
                        if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
262
                            node.ParentNode.Name == "style")
263
                        {
264
                            continue;
265
                        }
266
267 c9762683 Vojtěch Bartička
                        int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
268
269
                        var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
270
                        var end = TagClosingPositions[nodeId];
271 0a9f9349 Vojtěch Bartička
272
                        int selectionStart = tag.Position;
273
                        int selectionEnd = tag.Position + tag.Length;
274
275
                        if (selectionStart < end && selectionEnd > start)
276
                        {
277
                            if (selectionStart <= start && selectionEnd >= end)
278
                            {
279
                                addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
280
                            }
281
                            else if (selectionStart <= start)
282
                            {
283
                                addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
284
                            }
285
                            else if (selectionEnd >= end)
286
                            {
287
                                addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
288
                            }
289
                            else
290
                            {
291
                                addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
292
                            }
293 553486ad Vojtěch Bartička
                            descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
294
                            descCount = descendantsToEdit.Count;
295 0a9f9349 Vojtěch Bartička
                            break;
296
                        }
297
                    }
298 3c185841 Vojtěch Bartička
                }
299
300
            }
301
302 179a097d Vojtěch Bartička
            ModifyLinks(descendantsToEdit);
303 3c185841 Vojtěch Bartička
            string docToRender = docToEdit.DocumentNode.OuterHtml;
304
            HtmlSanitizer sanitizer = new HtmlSanitizer();
305
            sanitizer.AllowedAttributes.Clear();
306 0a9f9349 Vojtěch Bartička
            sanitizer.AllowedAttributes.Add(TAG_ID_ATTRIBUTE_NAME);
307
            sanitizer.AllowedAttributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME);
308 179a097d Vojtěch Bartička
            sanitizer.AllowedAttributes.Add("href");
309 c9762683 Vojtěch Bartička
            sanitizer.AllowedAttributes.Add("end");
310
            sanitizer.AllowedAttributes.Add("start");
311
            sanitizer.AllowedAttributes.Add("class");
312 be4deff8 Vojtěch Bartička
            if (sanitizer.AllowedTags.Contains("script"))
313
            {
314
                sanitizer.AllowedTags.Remove("script");
315 553486ad Vojtěch Bartička
            }
316 c9762683 Vojtěch Bartička
            if (!sanitizer.AllowedTags.Contains("style"))
317
            {
318
                sanitizer.AllowedTags.Add("style");
319 be4deff8 Vojtěch Bartička
            }
320 179a097d Vojtěch Bartička
            sanitizer.AllowedTags.Add("a");
321
            docToRender = sanitizer.Sanitize(docToRender);
322 4e12f0cc Vojtěch Bartička
            GenerateCSS(tags);
323
            return docToRender;
324 0a9f9349 Vojtěch Bartička
        }
325
326
        private HtmlNode SolveFullFill(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit, AnnotationTag tag)
327
        {
328
            // full fill
329
            string textSelected = node.InnerText;
330
331
            var parentNode = node.ParentNode;
332
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
333
            parentNode.ChildNodes.RemoveAt(nodeIndex);
334
335
            EPosition markerPosition = EPosition.MARK_NONE;
336
            if (selectionEnd == end && selectionStart == start)
337
            {
338
                markerPosition = EPosition.MARK_LEFT_RIGHT;
339
            }
340
341 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, start, markerPosition);
342 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
343
344
            return spanSelected;
345
        }
346
347
        private List<HtmlNode> SolveRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
348
                                             AnnotationTag tag)
349
        {
350
            // partial fill, end gap
351
            string text = node.InnerText;
352
            string textAfter = text.Substring(Math.Min(selectionStart - start + tag.Length, text.Length));
353
            string textSelected = text.Substring(0, selectionEnd - start);
354
355
            var parentNode = node.ParentNode;
356
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
357
            parentNode.ChildNodes.RemoveAt(nodeIndex);
358
359
            int spanSelectedStart = start;
360
            int spanAfterStart = start + textSelected.Length;
361
362 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_RIGHT);
363 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
364
365 553486ad Vojtěch Bartička
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
366 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanAfter);
367
368
            return new() { spanSelected, spanAfter };
369
        }
370
371
        private List<HtmlNode> SolveLeftGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
372
                                             AnnotationTag tag)
373
        {
374
            // partial fill, start gap
375
            string text = node.InnerText;
376
            string textBefore = text.Substring(0, selectionStart - start);
377
            string textSelected = text.Substring(selectionStart - start, Math.Min(tag.Length, text.Length - textBefore.Length));
378
379
            var parentNode = node.ParentNode;
380
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
381
            parentNode.ChildNodes.RemoveAt(nodeIndex);
382
383
            int spanBeforeStart = start;
384
            int spanSelectedStart = start + textBefore.Length;
385
386 553486ad Vojtěch Bartička
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
387 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
388
389 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT);
390 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
391
392
            return new() { spanSelected, spanBefore };
393
        }
394
395
        private List<HtmlNode> SolveLeftRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
396
                                                 AnnotationTag tag)
397
        {
398
            // partial fill, start gap end gap
399
            string text = node.InnerText;
400
            string textBefore = text.Substring(0, selectionStart - start);
401
            string textAfter = text.Substring(selectionStart - start + tag.Length);
402
            string textSelected = text.Substring(selectionStart - start, tag.Length);
403
404
            var parentNode = node.ParentNode;
405
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
406
            parentNode.ChildNodes.RemoveAt(nodeIndex);
407
408
            int spanBeforeStart = start;
409
            int spanSelectedStart = start + textBefore.Length;
410
            int spanAfterStart = start + textBefore.Length + textSelected.Length;
411
412 553486ad Vojtěch Bartička
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
413 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
414
415 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
416 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
417
418 553486ad Vojtěch Bartička
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
419 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 2, spanAfter);
420
421
            return new() { spanSelected, spanBefore, spanAfter };
422
        }
423
424 553486ad Vojtěch Bartička
        private HtmlNode CreateSpan(HtmlDocument doc, string text, int tagId, Guid? instanceId, Guid? entityId, int startPosition, EPosition position = EPosition.MARK_NONE)
425 0a9f9349 Vojtěch Bartička
        {
426
            HtmlNode span = doc.CreateElement("span");
427
            span.InnerHtml = text;
428
            TagStartPositions.Add(startPosition);
429
            TagStartLengths.Add(0);
430
            TagClosingPositions.Add(startPosition + text.Length);
431
            TagClosingLengths.Add(0);
432 553486ad Vojtěch Bartička
            span.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, tagId.ToString());
433 0a9f9349 Vojtěch Bartička
434
            if (instanceId != null)
435
            {
436
                span.AddClass("annotation");
437 553486ad Vojtěch Bartička
                span.Attributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME, instanceId.Value.ToString());
438
                span.Attributes.Add(TAG_EF_ID_ATTRIBUTE_NAME, entityId.ToString());
439 0a9f9349 Vojtěch Bartička
440
                if (position == EPosition.MARK_LEFT || position == EPosition.MARK_LEFT_RIGHT)
441
                {
442
                    span.Attributes.Add("start", "1");
443
                }
444
                if (position == EPosition.MARK_RIGHT || position == EPosition.MARK_LEFT_RIGHT)
445
                {
446
                    span.Attributes.Add("end", "1");
447
                }
448
            }
449
450
            return span;
451
        }
452
453
        private enum EPosition
454
        {
455
            MARK_LEFT = 5,
456
            MARK_RIGHT = 3,
457
            MARK_LEFT_RIGHT = 2,
458
            MARK_NONE = 0
459
        }
460
461 179a097d Vojtěch Bartička
        private void ModifyLinks(IEnumerable<HtmlNode> descendantsOriginal)
462
        {
463
            foreach (var descendant in descendantsOriginal)
464
            {
465
                if (descendant.Name == "a")
466
                {
467
                    if (descendant.Attributes.Contains("href"))
468
                    {
469
                        descendant.SetAttributeValue("href", "/link?url=" + descendant.Attributes["href"].Value);
470
                        descendant.SetAttributeValue("target", "_blank");
471
                    }
472
                }
473
            }
474
        }
475
476 0a9f9349 Vojtěch Bartička
        private void WrapTextInSpan(IEnumerable<HtmlNode> descendantsOriginal, HtmlDocument docToEdit)
477
        {
478 ecf6d2e0 Vojtěch Bartička
            // Special case for non-html documents
479
            if (descendantsOriginal.Count() == 2)
480
            {
481
                var documentNode = descendantsOriginal.ElementAt(0);
482
                var childNode = descendantsOriginal.ElementAt(1);
483
                if (documentNode.Name == "#document" && childNode.Name == "#text")
484
                {
485
                    HtmlNode coveringSpan = docToEdit.CreateElement("span");
486
                    coveringSpan.InnerHtml = childNode.InnerHtml;
487
                    TagStartPositions.Add(childNode.InnerStartIndex);
488
                    TagStartLengths.Add(0);
489
                    TagClosingPositions.Add(childNode.InnerStartIndex + childNode.InnerLength);
490
                    TagClosingLengths.Add(0);
491
                    coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
492
493
                    var parent = NodeDict[documentNode];
494
495
                    parent.ChildNodes.RemoveAt(0);
496
                    parent.ChildNodes.Add(coveringSpan);
497
498
                    return;
499
                }
500
            }
501
502 0a9f9349 Vojtěch Bartička
            foreach (var node in descendantsOriginal)
503
            {
504
                var originalNode = node;
505
                var toEditNode = NodeDict[node];
506
507
                if (originalNode.Name.Contains("#"))
508
                {
509
                    continue;
510
                }
511
                else
512
                {
513
                    bool onlyText = true;
514
                    bool onlySubtags = true;
515
516
                    foreach (var child in node.ChildNodes)
517
                    {
518
                        if (child.Name.Contains("#"))
519
                        {
520
                            onlySubtags = false;
521
                        }
522
                        else
523
                        {
524
                            onlyText = false;
525
                        }
526
                    }
527
528
                    if (onlyText || onlySubtags)
529
                    {
530
                        continue;
531
                    }
532
                    else
533
                    {
534
535
                        foreach (var child in node.ChildNodes)
536
                        {
537
                            if (child.Name.Contains("#text"))
538
                            {
539
                                HtmlNode coveringSpan = docToEdit.CreateElement("span");
540
                                coveringSpan.InnerHtml = child.InnerHtml;
541
                                TagStartPositions.Add(child.InnerStartIndex);
542
                                TagStartLengths.Add(0);
543
                                TagClosingPositions.Add(child.InnerStartIndex + child.InnerLength);
544
                                TagClosingLengths.Add(0);
545
                                coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
546
547
                                var parent = NodeDict[node];
548
                                var index = parent.ChildNodes.IndexOf(NodeDict[child]);
549
550
                                parent.ChildNodes.RemoveAt(index);
551
                                parent.ChildNodes.Insert(index, coveringSpan);
552
                            }
553
                        }
554
                    }
555
                }
556
            }
557
        }
558
559 4e12f0cc Vojtěch Bartička
        private void GenerateCSS(List<AnnotationTag> tags)
560 0a9f9349 Vojtěch Bartička
        {
561 4e12f0cc Vojtěch Bartička
            /*string inner = "span.annotation {border-bottom: 2px solid;}";
562
            inner += "span {line-height: 30px}\n";*/
563 0a9f9349 Vojtěch Bartička
564 8dc25caa Vojtěch Bartička
            var tagPaddingDict = Intersections.ColorGraph(Intersections.FindIntersections(tags));
565 0a9f9349 Vojtěch Bartička
            foreach (var tag in tags)
566
            {
567 8dc25caa Vojtěch Bartička
                var padding = (tagPaddingDict[tag] + 1) * 2;
568 4e12f0cc Vojtěch Bartička
                TagInstanceCSS.Add(new()
569
                {
570
                    InstanceId = tag.Instance,
571
                    Color = tag.Tag.Color,
572
                    Padding = padding
573 179a097d Vojtěch Bartička
                });
574 0a9f9349 Vojtěch Bartička
            }
575
        }
576
577
        private void AssignIdsToOriginalDocument(IEnumerable<HtmlNode> descendantsOriginal, ref int currentId)
578
        {
579
            foreach (var node in descendantsOriginal)
580
            {
581
                var originalNode = node;
582
                var toEditNode = NodeDict[node];
583
584
                if (originalNode.Name.Contains("#"))
585
                {
586
                    continue;
587
                }
588
                else
589
                {
590
                    TagStartPositions.Add(originalNode.OuterStartIndex);
591
                    TagStartLengths.Add(originalNode.InnerStartIndex - originalNode.OuterStartIndex);
592
                    currentId = TagStartPositions.Count - 1;
593
                    toEditNode.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, currentId.ToString());
594
595
                    TagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
596
                    TagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
597
                }
598
            }
599
        }
600
601
        private void FillNodeDict(IEnumerable<HtmlNode> descendantsOriginal, IEnumerable<HtmlNode> descendantsToEdit)
602
        {
603
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new
604
            {
605
                Original = orig,
606
                ToEdit = toEdit
607
            });
608
            foreach (var node in zipped)
609
            {
610
                var originalNode = node.Original;
611
                var toEditNode = node.ToEdit;
612
                NodeDict.Add(originalNode, toEditNode);
613
            }
614 3c185841 Vojtěch Bartička
        }
615
616 553486ad Vojtěch Bartička
617
        /*
618
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
619
         */
620
621
        /*
622
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
623
         */
624
625
        private string PartialPreprocessHTMLAddTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToAdd, List<AnnotationTag> tags)
626
        {
627
            var docOriginal = new HtmlDocument();
628
            docOriginal.LoadHtml(htmlOriginal);
629
            var docToEdit = new HtmlDocument();
630
            docToEdit.LoadHtml(htmlToEdit);
631
632
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
633
634
            int i = 0;
635
            List<HtmlNode> addedForSelection = new();
636
637
            int descendantsCount = descendantsToEdit.Count();
638
            while (i < descendantsCount)
639
            {
640
                for (; i < descendantsCount; i++)
641
                {
642
                    var node = descendantsToEdit.ElementAt(i);
643
                    if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
644 ecf6d2e0 Vojtěch Bartička
                        node.ParentNode.Name == "style" || node.ParentNode.Name.StartsWith("#"))
645 553486ad Vojtěch Bartička
                    {
646
                        continue;
647
                    }
648
649
                    int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
650
651
                    var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
652
                    var end = TagClosingPositions[nodeId];
653
654
                    int selectionStart = tagToAdd.Position;
655
                    int selectionEnd = tagToAdd.Position + tagToAdd.Length;
656
657
                    if (selectionStart < end && selectionEnd > start)
658
                    {
659
                        if (selectionStart <= start && selectionEnd >= end)
660
                        {
661
                            addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
662
                        }
663
                        else if (selectionStart <= start)
664
                        {
665
                            addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
666
                        }
667
                        else if (selectionEnd >= end)
668
                        {
669
                            addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
670
                        }
671
                        else
672
                        {
673
                            addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
674
                        }
675
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
676
                        descendantsCount = descendantsToEdit.Count();
677
                        break;
678
                    }
679
                }
680
            }
681
682 4e12f0cc Vojtěch Bartička
            GenerateCSS(tags);
683 553486ad Vojtěch Bartička
            return docToEdit.DocumentNode.OuterHtml;
684
        }
685
686
687
        private string PartialPreprocessHTMLRemoveTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToRemove, List<AnnotationTag> tags)
688
        {
689
            var docOriginal = new HtmlDocument();
690
            docOriginal.LoadHtml(htmlOriginal);
691
            var docToEdit = new HtmlDocument();
692
            docToEdit.LoadHtml(htmlToEdit);
693
694
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
695
696
            int i = 0;
697
            int descendantsCount = descendantsToEdit.Count();
698
            while (i < descendantsCount)
699
            {
700
                for (; i < descendantsCount; i++)
701
                {
702
                    var node = descendantsToEdit.ElementAt(i);
703
                    if (!node.Attributes.Contains(TAG_EF_ID_ATTRIBUTE_NAME))
704
                    {
705
                        continue;
706
                    }
707
                    else
708
                    {
709
                        if (node.Attributes[TAG_EF_ID_ATTRIBUTE_NAME].Value != tagToRemove.Id.ToString())
710
                        {
711
                            continue;
712
                        }
713
714
                        node.Attributes.Remove(TAG_EF_ID_ATTRIBUTE_NAME);
715
                        node.Attributes.Remove(TAG_INSTANCE_ATTRIBUTE_NAME);
716
                        node.Attributes.Remove("class");
717
                        node.Attributes.Remove("start");
718
                        node.Attributes.Remove("end");
719
720
                        /*var parent = node.ParentNode;
721
                        var contents = node.ChildNodes;
722
                        int index = parent.ChildNodes.IndexOf(node);
723
                        parent.ChildNodes.RemoveAt(index);
724
725
                        List<HtmlNode> newChildren = new();
726
                        for (int j = 0; j < index; j++)
727
                        {
728
                            newChildren.Add(parent.ChildNodes[j]);
729
                        }
730
                        for (int j = 0; j < contents.Count; j++)
731
                        {
732
                            newChildren.Add(contents[j]);
733
                        }
734
                        for (int j = index; j < parent.ChildNodes.Count; j++)
735
                        {
736
                            newChildren.Add(parent.ChildNodes[j]);
737
                        }
738
739
                        parent.ChildNodes.Clear();
740
                        foreach (var child in newChildren) { parent.ChildNodes.Add(child); }*/
741 58363b44 Vojtěch Bartička
742 553486ad Vojtěch Bartička
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
743
                        descendantsCount = descendantsToEdit.Count();
744
                        break;
745
                    }
746
                }
747
            }
748
749 4e12f0cc Vojtěch Bartička
            GenerateCSS(tags);
750 553486ad Vojtěch Bartička
            return docToEdit.DocumentNode.OuterHtml;
751
        }
752
753
        /*
754
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
755
         */
756
757
758 a6675a6d Vojtěch Bartička
        // TODO temporary
759
        private bool IsHtml(string text)
760
        {
761 0a9f9349 Vojtěch Bartička
            return text.Contains("<html>");
762 a6675a6d Vojtěch Bartička
        }
763 be4deff8 Vojtěch Bartička
764
        public void AddAnnotationInstance(Guid annotationId, Guid userId, ERole userRole, AnnotationInstanceAddRequest request)
765
        {
766
            var annotation = context.Annotations
767
               .Where(a => a.Id == annotationId)
768
               .Include(a => a.User)
769
               .Include(a => a.Document).ThenInclude(d => d.Content)
770
               .First();
771
772
            if (userRole < ERole.ADMINISTRATOR)
773
            {
774
                if (annotation.User.Id != userId)
775
                {
776
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
777
                }
778
            }
779
780 2f3821f0 Vojtěch Bartička
            if (annotation.State == EState.NEW)
781
            {
782
                annotation.State = EState.IN_PROGRESS;
783
            }
784
785 be4deff8 Vojtěch Bartička
            AnnotationTag annotationTag = new()
786
            {
787 553486ad Vojtěch Bartička
                Id = Guid.NewGuid(),
788 be4deff8 Vojtěch Bartička
                Annotation = annotation,
789
                Instance = request.InstanceId == null ? Guid.NewGuid() : request.InstanceId.Value,
790
                Length = request.Length,
791
                Position = request.Position,
792 f6f1ca65 Vojtěch Bartička
                SelectedText = request.SelectedText,
793 be4deff8 Vojtěch Bartička
                Note = ""
794
            };
795
796
            if (request.Type == ETagType.TAG)
797
            {
798
                annotationTag.Tag = context.Tags.Where(t => t.Id == request.Id).Single();
799
                annotationTag.SubTag = null;
800 15c88dc1 Vojtěch Bartička
801
                if (annotationTag.Tag.SentimentEnabled)
802
                {
803
                    annotationTag.Sentiment = ETagSentiment.NEUTRAL;
804
                }
805 2f3821f0 Vojtěch Bartička
806 58363b44 Vojtěch Bartička
                // If for the same annotation exists a tag with same position and length and of the same type, ignore
807 2f3821f0 Vojtěch Bartička
                if (context.AnnotationTags.Any(at =>
808 58363b44 Vojtěch Bartička
                at.Position == annotationTag.Position &&
809
                at.Length == annotationTag.Length &&
810
                at.Annotation == annotation &&
811
                at.Tag == annotationTag.Tag))
812
                {
813
                    throw new InvalidOperationException("Duplicate tag");
814
                }
815
816 be4deff8 Vojtěch Bartička
            }
817
            else if (request.Type == ETagType.SUBTAG)
818
            {
819
                var subTag = context.SubTags.Where(st => st.Id == request.Id).Include(st => st.Tag).Single();
820
                annotationTag.SubTag = subTag;
821
                annotationTag.Tag = subTag.Tag;
822 15c88dc1 Vojtěch Bartička
823
                if (annotationTag.SubTag.SentimentEnabled)
824
                {
825
                    annotationTag.Sentiment = ETagSentiment.NEUTRAL;
826
                }
827 2f3821f0 Vojtěch Bartička
828
                if (context.AnnotationTags.Any(at =>
829 58363b44 Vojtěch Bartička
                at.Position == annotationTag.Position &&
830
                at.Length == annotationTag.Length &&
831
                at.Annotation == annotation &&
832
                at.Tag == annotationTag.Tag &&
833
                at.SubTag == annotationTag.SubTag))
834
                {
835
                    throw new InvalidOperationException("Duplicate tag");
836
                }
837 be4deff8 Vojtěch Bartička
            }
838
            else
839
            {
840
                throw new ArgumentException($"Unknown tag type {request.Type}");
841
            }
842
843 553486ad Vojtěch Bartička
            annotation.LastModifiedTagId = annotationTag.Id;
844
            annotation.ModifiedType = EModified.ADDED;
845
846 abf94f21 Lukáš Vlček
            context.AnnotationTags.Add(annotationTag);
847 be4deff8 Vojtěch Bartička
            context.SaveChanges();
848
        }
849 0f8d6304 Vojtěch Bartička
850
        public void DeleteAnnotationInstance(Guid annotationId, Guid tagInstanceId, Guid loggedUserId, ERole userRole)
851
        {
852
            Annotation annotation = null;
853
            try
854
            {
855
                annotation = context.Annotations
856
                   .Where(a => a.Id == annotationId)
857
                   .Include(a => a.User)
858
                   .Include(a => a.Document).ThenInclude(d => d.Content)
859
                   .First();
860
861
            }
862
            catch (Exception ex)
863
            {
864
                throw new InvalidOperationException("Could not find annotation");
865
            }
866
867
868
            if (userRole < ERole.ADMINISTRATOR)
869
            {
870
                if (annotation.User.Id != loggedUserId)
871
                {
872
                    throw new UnauthorizedAccessException($"User {loggedUserId} does not have assigned annotation {annotationId}");
873
                }
874
            }
875
876
            if (!context.AnnotationTags.Any(at => at.Id == tagInstanceId))
877
            {
878
                throw new InvalidOperationException("Could not find tag instance");
879
            }
880
881 553486ad Vojtěch Bartička
882
            var annotationTag = context.AnnotationTags.First(at => at.Id == tagInstanceId);
883
            annotation.LastModifiedTagId = annotationTag.Id;
884
            annotation.ModifiedType = EModified.REMOVED;
885
886
            context.AnnotationTags.Remove(annotationTag);
887 0a9f9349 Vojtěch Bartička
888 0f8d6304 Vojtěch Bartička
            context.SaveChanges();
889
        }
890 15c88dc1 Vojtěch Bartička
891
        public void SetTagInstanceSentiment(Guid annotationId, Guid instanceId, Guid userId, ERole userRole, ETagSentiment sentiment)
892
        {
893
            Annotation annotation = null;
894
            try
895
            {
896
                annotation = context.Annotations
897
                   .Where(a => a.Id == annotationId)
898
                   .Include(a => a.User)
899
                   .Include(a => a.Document).ThenInclude(d => d.Content)
900
                   .First();
901
902
            }
903
            catch (Exception ex)
904
            {
905
                throw new InvalidOperationException("Could not find annotation");
906
            }
907
908
909
            if (userRole < ERole.ADMINISTRATOR)
910
            {
911
                if (annotation.User.Id != userId)
912
                {
913
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
914
                }
915
            }
916
917
918
            var tagInstances = context.AnnotationTags.Where(at => at.Instance == instanceId).ToList();
919
            if (tagInstances.Count() == 0)
920
            {
921
                throw new InvalidOperationException("No such instance found");
922
            }
923
924
            foreach (var tagInstance in tagInstances)
925
            {
926
                tagInstance.Sentiment = sentiment;
927
            }
928
929
            context.SaveChanges();
930
        }
931 0ea30313 Vojtěch Bartička
932
        public void MarkAnnotationAsDone(Guid annotationId, Guid userId, ERole userRole, bool done)
933
        {
934
            Annotation annotation = null;
935
            try
936
            {
937
                annotation = context.Annotations
938
                   .Where(a => a.Id == annotationId)
939
                   .Include(a => a.User)
940
                   .Include(a => a.Document).ThenInclude(d => d.Content)
941
                   .First();
942
943
            }
944
            catch (Exception ex)
945
            {
946
                throw new InvalidOperationException("Could not find annotation");
947
            }
948
949
950
            if (userRole < ERole.ADMINISTRATOR)
951
            {
952
                if (annotation.User.Id != userId)
953
                {
954
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
955
                }
956
            }
957
958
            annotation.State = done ? EState.DONE : EState.IN_PROGRESS;
959
            context.SaveChanges();
960
        }
961 eff8ec56 Vojtěch Bartička
    }
962
}