Projekt

Obecné

Profil

Stáhnout (35.7 KB) Statistiky
| Větev: | Tag: | Revize:
1 eff8ec56 Vojtěch Bartička
using Core.Contexts;
2
using Core.Entities;
3
using Models.Annotations;
4
using Models.Enums;
5
using Serilog;
6
using System;
7
using System.Collections.Generic;
8
using System.Linq;
9
using System.Text;
10
using System.Threading.Tasks;
11 6bdb3d95 Vojtěch Bartička
using Microsoft.EntityFrameworkCore;
12 a6675a6d Vojtěch Bartička
using AutoMapper;
13
using Models.Tags;
14 3c185841 Vojtěch Bartička
using Ganss.XSS;
15
using HtmlAgilityPack;
16
using System.Text.RegularExpressions;
17 553486ad Vojtěch Bartička
using Newtonsoft.Json;
18 eff8ec56 Vojtěch Bartička
19
namespace Core.Services.AnnotationService
20
{
21
    public class AnnotationServiceEF : IAnnotationService
22
    {
23
        private readonly DatabaseContext context;
24
        private readonly ILogger logger;
25 a6675a6d Vojtěch Bartička
        private readonly IMapper mapper;
26 eff8ec56 Vojtěch Bartička
27 0a9f9349 Vojtěch Bartička
        private const string TAG_ID_ATTRIBUTE_NAME = "aswi-tag-id";
28 c9762683 Vojtěch Bartička
        private const string TAG_INSTANCE_ATTRIBUTE_NAME = "aswi-tag-instance";
29 553486ad Vojtěch Bartička
        private const string TAG_EF_ID_ATTRIBUTE_NAME = "aswi-tag-ef-id";
30 0a9f9349 Vojtěch Bartička
31 a6675a6d Vojtěch Bartička
        public AnnotationServiceEF(DatabaseContext context, ILogger logger, IMapper mapper)
32 eff8ec56 Vojtěch Bartička
        {
33
            this.context = context;
34
            this.logger = logger;
35 a6675a6d Vojtěch Bartička
            this.mapper = mapper;
36 eff8ec56 Vojtěch Bartička
        }
37
38
        public void CreateDocumentAnnotations(AnnotationsAddRequest request, Guid clientUserId)
39
        {
40
            User addingUser = context.Users.Single(u => u.Id == clientUserId);
41
42
            // Check the documents exist
43 153d77a8 Vojtěch Bartička
            var documents = context.Documents.Where(d => request.DocumentIdList.Contains(d.Id)).ToList();
44
            if (documents.Count() != request.DocumentIdList.Count)
45 eff8ec56 Vojtěch Bartička
            {
46 153d77a8 Vojtěch Bartička
                logger.Information($"Received a non-existent Document ID when assigning documents to users");
47
                throw new InvalidOperationException($"{request.DocumentIdList.Count - documents.Count()} of the received documents do not exist");
48 eff8ec56 Vojtěch Bartička
            }
49
50 6bdb3d95 Vojtěch Bartička
            var users = context.Users.Where(u => request.UserIdList.Contains(u.Id)).ToList();
51 153d77a8 Vojtěch Bartička
            foreach (var user in users)
52 eff8ec56 Vojtěch Bartička
            {
53 153d77a8 Vojtěch Bartička
                var userAnnotatedDocuments = context.Annotations.Where(a => a.User == user).Select(a => a.Document).ToList();
54
                foreach (var doc in documents)
55 eff8ec56 Vojtěch Bartička
                {
56 153d77a8 Vojtěch Bartička
                    if (userAnnotatedDocuments.Contains(doc))
57 eff8ec56 Vojtěch Bartička
                    {
58 153d77a8 Vojtěch Bartička
                        logger.Information($"User {user.Username} has already been assigned the document {doc.Id}, ignoring");
59
                        continue;
60 eff8ec56 Vojtěch Bartička
                    }
61
62
                    context.Annotations.Add(new Annotation()
63
                    {
64
                        User = user,
65
                        UserAssigned = addingUser,
66
                        DateAssigned = DateTime.Now,
67
                        DateLastChanged = DateTime.Now,
68 153d77a8 Vojtěch Bartička
                        Document = doc,
69 eff8ec56 Vojtěch Bartička
                        State = EState.NEW,
70
                        Note = ""
71
                    });
72
                }
73
            }
74
75
            context.SaveChanges();
76
        }
77 6bdb3d95 Vojtěch Bartička
78
        public AnnotationListResponse GetUserAnnotations(Guid userId)
79
        {
80
            var annotations = context.Annotations.Where(a => a.User.Id == userId).Include(a => a.Document).ToList();
81
            var documentIds = annotations.Select(a => a.Document.Id).ToList();
82
            var documents = context.Documents.Where(d => documentIds.Contains(d.Id));
83
            var infos = new List<AnnotationListInfo>();
84
85
            var annotationsDocuments = annotations.Zip(documents, (a, d) => new { Annotation = a, Document = d });
86
            foreach (var ad in annotationsDocuments)
87
            {
88
                infos.Add(new AnnotationListInfo()
89
                {
90
                    AnnotationId = ad.Annotation.Id,
91
                    DocumentName = ad.Document.Name,
92
                    State = ad.Annotation.State
93
                });
94
            }
95
96
            return new AnnotationListResponse()
97
            {
98
                Annotations = infos
99
            };
100
        }
101 a6675a6d Vojtěch Bartička
102
        public AnnotationInfo GetAnnotation(Guid annotationId, Guid userId, ERole userRole)
103
        {
104 c2a89232 Vojtěch Bartička
            var annotation = context.Annotations
105
                .Where(a => a.Id == annotationId)
106
                .Include(a => a.User)
107
                .Include(a => a.Document).ThenInclude(d => d.Content)
108
                .First();
109 3c185841 Vojtěch Bartička
110 a6675a6d Vojtěch Bartička
            if (userRole < ERole.ADMINISTRATOR)
111
            {
112
                if (annotation.User.Id != userId)
113
                {
114
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
115
                }
116
            }
117
118 c2a89232 Vojtěch Bartička
            var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First();
119 a6675a6d Vojtěch Bartička
120 c2a89232 Vojtěch Bartička
            var tags = context.AnnotationTags.Where(at => at.Annotation.Id == annotationId)
121 553486ad Vojtěch Bartička
                .Include(at => at.Tag)
122
                .ThenInclude(t => t.Category)
123 c2a89232 Vojtěch Bartička
                .Include(at => at.SubTag)
124 3c89e947 Vojtěch Bartička
                .OrderBy(at => at.Position)
125 c2a89232 Vojtěch Bartička
                .ToList();
126
127 0a9f9349 Vojtěch Bartička
            List<TagInstanceInfo> tagInstanceInfos = new();
128 a6675a6d Vojtěch Bartička
            foreach (var tag in tags)
129
            {
130
                var tagInstance = mapper.Map<TagInstanceInfo>(tag);
131 0a9f9349 Vojtěch Bartička
                tagInstanceInfos.Add(tagInstance);
132 a6675a6d Vojtěch Bartička
            }
133
134 553486ad Vojtěch Bartička
            var docToRender = "";
135
            if (annotation.CachedDocumentHTML == "")
136
            {
137
                docToRender = FullPreprocessHTML(documentContent.Content, tags);
138
                annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
139
                annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
140
                annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
141
                annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
142
                //annotation.CachedNodeDict = JsonConvert.SerializeObject(NodeDict);
143
                annotation.ModifiedType = EModified.NONE;
144
                annotation.CachedDocumentHTML = docToRender;
145
                context.SaveChanges();
146
            }
147
            else
148
            {
149
                docToRender = annotation.CachedDocumentHTML;
150
                TagStartPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedStartPositions);
151
                TagStartLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedLengths);
152
                TagClosingPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingPositions);
153
                TagClosingLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingLengths);
154
155
                // The annotation has been modified and we need to either add the new tag or remove the tag
156
                if (annotation.ModifiedType != EModified.NONE)
157
                {
158
                    if (annotation.ModifiedType == EModified.ADDED)
159
                    {
160
                        var lastModifiedTag = context.AnnotationTags.Where(at => at.Id == annotation.LastModifiedTagId).First();
161
                        docToRender = PartialPreprocessHTMLAddTag(docToRender, documentContent.Content, lastModifiedTag, tags);
162
                    }
163
                    else if (annotation.ModifiedType == EModified.REMOVED)
164
                    {
165
                        docToRender = PartialPreprocessHTMLRemoveTag(docToRender, documentContent.Content, new AnnotationTag() { Id = annotation.LastModifiedTagId.Value }, tags);
166
                    }
167
168
                    annotation.ModifiedType = EModified.NONE;
169
                    annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
170
                    annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
171
                    annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
172
                    annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
173
                    annotation.CachedDocumentHTML = docToRender;
174
                    //annotation.CachedNodeDict = JsonConvert.SerializeObject(NodeDict);
175
                    context.SaveChanges();
176
                }
177
            }
178 0a9f9349 Vojtěch Bartička
179
            // We probably cannot use AutoMapper since we are dealing with too many different entities
180
            AnnotationInfo annotationInfo = new()
181
            {
182
                SourceDocumentContent = documentContent.Content,
183
                DocumentToRender = docToRender,
184
                TagStartPositions = TagStartPositions.ToArray(),
185
                TagLengths = TagStartLengths.ToArray(),
186
                Note = annotation.Note,
187
                State = annotation.State,
188
                Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT,
189
                TagInstances = tagInstanceInfos
190
            };
191
192 553486ad Vojtěch Bartička
            NodeDict.Clear();
193
194 a6675a6d Vojtěch Bartička
            return annotationInfo;
195
        }
196
197 0a9f9349 Vojtěch Bartička
        private List<int> TagStartPositions = new();
198
        private List<int> TagStartLengths = new();
199
        private List<int> TagClosingPositions = new();
200
        private List<int> TagClosingLengths = new();
201
        private Dictionary<HtmlNode, HtmlNode> NodeDict = new();
202
203 553486ad Vojtěch Bartička
        /*
204
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
205
         */
206
207
208
        private string FullPreprocessHTML(string htmlSource, List<AnnotationTag> tags)
209 3c185841 Vojtěch Bartička
        {
210
            var docOriginal = new HtmlDocument();
211
            docOriginal.LoadHtml(htmlSource);
212
            var docToEdit = new HtmlDocument();
213
            docToEdit.LoadHtml(htmlSource);
214
215
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
216 553486ad Vojtěch Bartička
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
217 3c185841 Vojtěch Bartička
218 0a9f9349 Vojtěch Bartička
            int currentId = 0;
219 3c185841 Vojtěch Bartička
220 0a9f9349 Vojtěch Bartička
            FillNodeDict(descendantsOriginal, descendantsToEdit);
221
            AssignIdsToOriginalDocument(descendantsOriginal, ref currentId);
222 3c185841 Vojtěch Bartička
223 0a9f9349 Vojtěch Bartička
            WrapTextInSpan(descendantsOriginal, docToEdit);
224 3c185841 Vojtěch Bartička
225 553486ad Vojtěch Bartička
            int descCount = descendantsToEdit.Count;
226
227 0a9f9349 Vojtěch Bartička
            foreach (var tag in tags)
228 3c185841 Vojtěch Bartička
            {
229 0a9f9349 Vojtěch Bartička
                int i = 0;
230
                List<HtmlNode> addedForSelection = new();
231 553486ad Vojtěch Bartička
                while (i < descCount)
232 3c185841 Vojtěch Bartička
                {
233 553486ad Vojtěch Bartička
                    for (; i < descCount; i++)
234 0a9f9349 Vojtěch Bartička
                    {
235
                        var node = descendantsToEdit.ElementAt(i);
236
                        if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
237
                            node.ParentNode.Name == "style")
238
                        {
239
                            continue;
240
                        }
241
242 c9762683 Vojtěch Bartička
                        int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
243
244
                        var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
245
                        var end = TagClosingPositions[nodeId];
246 0a9f9349 Vojtěch Bartička
247
                        int selectionStart = tag.Position;
248
                        int selectionEnd = tag.Position + tag.Length;
249
250
                        if (selectionStart < end && selectionEnd > start)
251
                        {
252
                            if (selectionStart <= start && selectionEnd >= end)
253
                            {
254
                                addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
255
                            }
256
                            else if (selectionStart <= start)
257
                            {
258
                                addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
259
                            }
260
                            else if (selectionEnd >= end)
261
                            {
262
                                addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
263
                            }
264
                            else
265
                            {
266
                                addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
267
                            }
268 553486ad Vojtěch Bartička
                            descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
269
                            descCount = descendantsToEdit.Count;
270 0a9f9349 Vojtěch Bartička
                            break;
271
                        }
272
                    }
273 3c185841 Vojtěch Bartička
                }
274
275
            }
276
277
            string docToRender = docToEdit.DocumentNode.OuterHtml;
278
            HtmlSanitizer sanitizer = new HtmlSanitizer();
279
            sanitizer.AllowedAttributes.Clear();
280 0a9f9349 Vojtěch Bartička
            sanitizer.AllowedAttributes.Add(TAG_ID_ATTRIBUTE_NAME);
281
            sanitizer.AllowedAttributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME);
282 c9762683 Vojtěch Bartička
            sanitizer.AllowedAttributes.Add("end");
283
            sanitizer.AllowedAttributes.Add("start");
284
            sanitizer.AllowedAttributes.Add("class");
285 be4deff8 Vojtěch Bartička
            if (sanitizer.AllowedTags.Contains("script"))
286
            {
287
                sanitizer.AllowedTags.Remove("script");
288 553486ad Vojtěch Bartička
            }
289 c9762683 Vojtěch Bartička
            if (!sanitizer.AllowedTags.Contains("style"))
290
            {
291
                sanitizer.AllowedTags.Add("style");
292 be4deff8 Vojtěch Bartička
            }
293 3c185841 Vojtěch Bartička
            docToRender = sanitizer.Sanitize(docToRender);
294
295 c9762683 Vojtěch Bartička
            HtmlDocument doc = new HtmlDocument();
296
            doc.LoadHtml(docToRender);
297
            var cssNode = GenerateCSS(doc, tags, new());
298
            doc.DocumentNode.ChildNodes.Insert(0, cssNode);
299
300
            return doc.DocumentNode.OuterHtml;
301 0a9f9349 Vojtěch Bartička
        }
302
303
        private HtmlNode SolveFullFill(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit, AnnotationTag tag)
304
        {
305
            // full fill
306
            string textSelected = node.InnerText;
307
308
            var parentNode = node.ParentNode;
309
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
310
            parentNode.ChildNodes.RemoveAt(nodeIndex);
311
312
            EPosition markerPosition = EPosition.MARK_NONE;
313
            if (selectionEnd == end && selectionStart == start)
314
            {
315
                markerPosition = EPosition.MARK_LEFT_RIGHT;
316
            }
317
318 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, start, markerPosition);
319 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
320
321
            return spanSelected;
322
        }
323
324
        private List<HtmlNode> SolveRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
325
                                             AnnotationTag tag)
326
        {
327
            // partial fill, end gap
328
            string text = node.InnerText;
329
            string textAfter = text.Substring(Math.Min(selectionStart - start + tag.Length, text.Length));
330
            string textSelected = text.Substring(0, selectionEnd - start);
331
332
            var parentNode = node.ParentNode;
333
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
334
            parentNode.ChildNodes.RemoveAt(nodeIndex);
335
336
            int spanSelectedStart = start;
337
            int spanAfterStart = start + textSelected.Length;
338
339 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_RIGHT);
340 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
341
342 553486ad Vojtěch Bartička
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
343 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanAfter);
344
345
            return new() { spanSelected, spanAfter };
346
        }
347
348
        private List<HtmlNode> SolveLeftGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
349
                                             AnnotationTag tag)
350
        {
351
            // partial fill, start gap
352
            string text = node.InnerText;
353
            string textBefore = text.Substring(0, selectionStart - start);
354
            string textSelected = text.Substring(selectionStart - start, Math.Min(tag.Length, text.Length - textBefore.Length));
355
356
            var parentNode = node.ParentNode;
357
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
358
            parentNode.ChildNodes.RemoveAt(nodeIndex);
359
360
            int spanBeforeStart = start;
361
            int spanSelectedStart = start + textBefore.Length;
362
363 553486ad Vojtěch Bartička
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
364 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
365
366 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT);
367 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
368
369
            return new() { spanSelected, spanBefore };
370
        }
371
372
        private List<HtmlNode> SolveLeftRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
373
                                                 AnnotationTag tag)
374
        {
375
            // partial fill, start gap end gap
376
            string text = node.InnerText;
377
            string textBefore = text.Substring(0, selectionStart - start);
378
            string textAfter = text.Substring(selectionStart - start + tag.Length);
379
            string textSelected = text.Substring(selectionStart - start, tag.Length);
380
381
            var parentNode = node.ParentNode;
382
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
383
            parentNode.ChildNodes.RemoveAt(nodeIndex);
384
385
            int spanBeforeStart = start;
386
            int spanSelectedStart = start + textBefore.Length;
387
            int spanAfterStart = start + textBefore.Length + textSelected.Length;
388
389 553486ad Vojtěch Bartička
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
390 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
391
392 553486ad Vojtěch Bartička
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
393 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
394
395 553486ad Vojtěch Bartička
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
396 0a9f9349 Vojtěch Bartička
            parentNode.ChildNodes.Insert(nodeIndex + 2, spanAfter);
397
398
            return new() { spanSelected, spanBefore, spanAfter };
399
        }
400
401 553486ad Vojtěch Bartička
        private HtmlNode CreateSpan(HtmlDocument doc, string text, int tagId, Guid? instanceId, Guid? entityId, int startPosition, EPosition position = EPosition.MARK_NONE)
402 0a9f9349 Vojtěch Bartička
        {
403
            HtmlNode span = doc.CreateElement("span");
404
            span.InnerHtml = text;
405
            TagStartPositions.Add(startPosition);
406
            TagStartLengths.Add(0);
407
            TagClosingPositions.Add(startPosition + text.Length);
408
            TagClosingLengths.Add(0);
409 553486ad Vojtěch Bartička
            span.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, tagId.ToString());
410 0a9f9349 Vojtěch Bartička
411
            if (instanceId != null)
412
            {
413
                span.AddClass("annotation");
414 553486ad Vojtěch Bartička
                span.Attributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME, instanceId.Value.ToString());
415
                span.Attributes.Add(TAG_EF_ID_ATTRIBUTE_NAME, entityId.ToString());
416 0a9f9349 Vojtěch Bartička
417
                if (position == EPosition.MARK_LEFT || position == EPosition.MARK_LEFT_RIGHT)
418
                {
419
                    span.Attributes.Add("start", "1");
420
                }
421
                if (position == EPosition.MARK_RIGHT || position == EPosition.MARK_LEFT_RIGHT)
422
                {
423
                    span.Attributes.Add("end", "1");
424
                }
425
            }
426
427
            return span;
428
        }
429
430
        private enum EPosition
431
        {
432
            MARK_LEFT = 5,
433
            MARK_RIGHT = 3,
434
            MARK_LEFT_RIGHT = 2,
435
            MARK_NONE = 0
436
        }
437
438
        private void WrapTextInSpan(IEnumerable<HtmlNode> descendantsOriginal, HtmlDocument docToEdit)
439
        {
440 ecf6d2e0 Vojtěch Bartička
            // Special case for non-html documents
441
            if (descendantsOriginal.Count() == 2)
442
            {
443
                var documentNode = descendantsOriginal.ElementAt(0);
444
                var childNode = descendantsOriginal.ElementAt(1);
445
                if (documentNode.Name == "#document" && childNode.Name == "#text")
446
                {
447
                    HtmlNode coveringSpan = docToEdit.CreateElement("span");
448
                    coveringSpan.InnerHtml = childNode.InnerHtml;
449
                    TagStartPositions.Add(childNode.InnerStartIndex);
450
                    TagStartLengths.Add(0);
451
                    TagClosingPositions.Add(childNode.InnerStartIndex + childNode.InnerLength);
452
                    TagClosingLengths.Add(0);
453
                    coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
454
455
                    var parent = NodeDict[documentNode];
456
457
                    parent.ChildNodes.RemoveAt(0);
458
                    parent.ChildNodes.Add(coveringSpan);
459
460
                    return;
461
                }
462
            }
463
464 0a9f9349 Vojtěch Bartička
            foreach (var node in descendantsOriginal)
465
            {
466
                var originalNode = node;
467
                var toEditNode = NodeDict[node];
468
469
                if (originalNode.Name.Contains("#"))
470
                {
471
                    continue;
472
                }
473
                else
474
                {
475
                    bool onlyText = true;
476
                    bool onlySubtags = true;
477
478
                    foreach (var child in node.ChildNodes)
479
                    {
480
                        if (child.Name.Contains("#"))
481
                        {
482
                            onlySubtags = false;
483
                        }
484
                        else
485
                        {
486
                            onlyText = false;
487
                        }
488
                    }
489
490
                    if (onlyText || onlySubtags)
491
                    {
492
                        continue;
493
                    }
494
                    else
495
                    {
496
497
                        foreach (var child in node.ChildNodes)
498
                        {
499
                            if (child.Name.Contains("#text"))
500
                            {
501
                                HtmlNode coveringSpan = docToEdit.CreateElement("span");
502
                                coveringSpan.InnerHtml = child.InnerHtml;
503
                                TagStartPositions.Add(child.InnerStartIndex);
504
                                TagStartLengths.Add(0);
505
                                TagClosingPositions.Add(child.InnerStartIndex + child.InnerLength);
506
                                TagClosingLengths.Add(0);
507
                                coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
508
509
                                var parent = NodeDict[node];
510
                                var index = parent.ChildNodes.IndexOf(NodeDict[child]);
511
512
                                parent.ChildNodes.RemoveAt(index);
513
                                parent.ChildNodes.Insert(index, coveringSpan);
514
                            }
515
                        }
516
                    }
517
                }
518
            }
519
        }
520
521
        private HtmlNode GenerateCSS(HtmlDocument docToEdit, List<AnnotationTag> tags, List<int> paddings)
522
        {
523
            HtmlNode style = docToEdit.CreateElement("style");
524
525
            string inner = "span.annotation {border-bottom: 2px solid;}";
526
            inner += "span {line-height: 30px}\n";
527
528
            // TODO temporary
529 c9762683 Vojtěch Bartička
            int lastPadding = 0;
530 0a9f9349 Vojtěch Bartička
            foreach (var tag in tags)
531
            {
532 c9762683 Vojtěch Bartička
                inner += $"span[{TAG_INSTANCE_ATTRIBUTE_NAME}=\"{tag.Instance}\"] {{ border-color:{tag.Tag.Color}; padding-bottom: {lastPadding % 5}px }}";
533 0a9f9349 Vojtěch Bartička
                lastPadding += 2;
534
            }
535
536
            inner += "span[end=\"1\"] {border-end-end-radius: 0px; border-right: 1px solid darkgray}\n";
537
            inner += "span[start=\"1\"] {border-end-start-radius: 0px; border-left: 1px solid darkgray}\n";
538
539
            style.InnerHtml = inner;
540
            return style;
541
        }
542
543
        private void AssignIdsToOriginalDocument(IEnumerable<HtmlNode> descendantsOriginal, ref int currentId)
544
        {
545
            foreach (var node in descendantsOriginal)
546
            {
547
                var originalNode = node;
548
                var toEditNode = NodeDict[node];
549
550
                if (originalNode.Name.Contains("#"))
551
                {
552
                    continue;
553
                }
554
                else
555
                {
556
                    TagStartPositions.Add(originalNode.OuterStartIndex);
557
                    TagStartLengths.Add(originalNode.InnerStartIndex - originalNode.OuterStartIndex);
558
                    currentId = TagStartPositions.Count - 1;
559
                    toEditNode.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, currentId.ToString());
560
561
                    TagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
562
                    TagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
563
                }
564
            }
565
        }
566
567
        private void FillNodeDict(IEnumerable<HtmlNode> descendantsOriginal, IEnumerable<HtmlNode> descendantsToEdit)
568
        {
569
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new
570
            {
571
                Original = orig,
572
                ToEdit = toEdit
573
            });
574
            foreach (var node in zipped)
575
            {
576
                var originalNode = node.Original;
577
                var toEditNode = node.ToEdit;
578
                NodeDict.Add(originalNode, toEditNode);
579
            }
580 3c185841 Vojtěch Bartička
        }
581
582 553486ad Vojtěch Bartička
583
        /*
584
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
585
         */
586
587
        /*
588
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
589
         */
590
591
        private string PartialPreprocessHTMLAddTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToAdd, List<AnnotationTag> tags)
592
        {
593
            var docOriginal = new HtmlDocument();
594
            docOriginal.LoadHtml(htmlOriginal);
595
            var docToEdit = new HtmlDocument();
596
            docToEdit.LoadHtml(htmlToEdit);
597
598
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
599
            RemoveCSS(docToEdit);
600
601
            int i = 0;
602
            List<HtmlNode> addedForSelection = new();
603
604
            int descendantsCount = descendantsToEdit.Count();
605
            while (i < descendantsCount)
606
            {
607
                for (; i < descendantsCount; i++)
608
                {
609
                    var node = descendantsToEdit.ElementAt(i);
610
                    if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
611 ecf6d2e0 Vojtěch Bartička
                        node.ParentNode.Name == "style" || node.ParentNode.Name.StartsWith("#"))
612 553486ad Vojtěch Bartička
                    {
613
                        continue;
614
                    }
615
616
                    int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
617
618
                    var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
619
                    var end = TagClosingPositions[nodeId];
620
621
                    int selectionStart = tagToAdd.Position;
622
                    int selectionEnd = tagToAdd.Position + tagToAdd.Length;
623
624
                    if (selectionStart < end && selectionEnd > start)
625
                    {
626
                        if (selectionStart <= start && selectionEnd >= end)
627
                        {
628
                            addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
629
                        }
630
                        else if (selectionStart <= start)
631
                        {
632
                            addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
633
                        }
634
                        else if (selectionEnd >= end)
635
                        {
636
                            addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
637
                        }
638
                        else
639
                        {
640
                            addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
641
                        }
642
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
643
                        descendantsCount = descendantsToEdit.Count();
644
                        break;
645
                    }
646
                }
647
            }
648
649
            var cssNode = GenerateCSS(docToEdit, tags, new());
650
            docToEdit.DocumentNode.ChildNodes.Insert(0, cssNode);
651
652
            return docToEdit.DocumentNode.OuterHtml;
653
        }
654
655
656
        private string PartialPreprocessHTMLRemoveTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToRemove, List<AnnotationTag> tags)
657
        {
658
            var docOriginal = new HtmlDocument();
659
            docOriginal.LoadHtml(htmlOriginal);
660
            var docToEdit = new HtmlDocument();
661
            docToEdit.LoadHtml(htmlToEdit);
662
663
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
664
            RemoveCSS(docToEdit);
665
666
            int i = 0;
667
            int descendantsCount = descendantsToEdit.Count();
668
            while (i < descendantsCount)
669
            {
670
                for (; i < descendantsCount; i++)
671
                {
672
                    var node = descendantsToEdit.ElementAt(i);
673
                    if (!node.Attributes.Contains(TAG_EF_ID_ATTRIBUTE_NAME))
674
                    {
675
                        continue;
676
                    }
677
                    else
678
                    {
679
                        if (node.Attributes[TAG_EF_ID_ATTRIBUTE_NAME].Value != tagToRemove.Id.ToString())
680
                        {
681
                            continue;
682
                        }
683
684
                        node.Attributes.Remove(TAG_EF_ID_ATTRIBUTE_NAME);
685
                        node.Attributes.Remove(TAG_INSTANCE_ATTRIBUTE_NAME);
686
                        node.Attributes.Remove("class");
687
                        node.Attributes.Remove("start");
688
                        node.Attributes.Remove("end");
689
690
                        /*var parent = node.ParentNode;
691
                        var contents = node.ChildNodes;
692
                        int index = parent.ChildNodes.IndexOf(node);
693
                        parent.ChildNodes.RemoveAt(index);
694
695
                        List<HtmlNode> newChildren = new();
696
                        for (int j = 0; j < index; j++)
697
                        {
698
                            newChildren.Add(parent.ChildNodes[j]);
699
                        }
700
                        for (int j = 0; j < contents.Count; j++)
701
                        {
702
                            newChildren.Add(contents[j]);
703
                        }
704
                        for (int j = index; j < parent.ChildNodes.Count; j++)
705
                        {
706
                            newChildren.Add(parent.ChildNodes[j]);
707
                        }
708
709
                        parent.ChildNodes.Clear();
710
                        foreach (var child in newChildren) { parent.ChildNodes.Add(child); }*/
711 58363b44 Vojtěch Bartička
712 553486ad Vojtěch Bartička
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
713
                        descendantsCount = descendantsToEdit.Count();
714
                        break;
715
                    }
716
                }
717
            }
718
719
            var cssNode = GenerateCSS(docToEdit, tags, new());
720
            docToEdit.DocumentNode.ChildNodes.Insert(0, cssNode);
721
722
            return docToEdit.DocumentNode.OuterHtml;
723
        }
724
725
        private void RemoveCSS(HtmlDocument doc)
726
        {
727
            doc.DocumentNode.ChildNodes.RemoveAt(0);
728
        }
729
730
        /*
731
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
732
         */
733
734
735 a6675a6d Vojtěch Bartička
        // TODO temporary
736
        private bool IsHtml(string text)
737
        {
738 0a9f9349 Vojtěch Bartička
            return text.Contains("<html>");
739 a6675a6d Vojtěch Bartička
        }
740 be4deff8 Vojtěch Bartička
741
        public void AddAnnotationInstance(Guid annotationId, Guid userId, ERole userRole, AnnotationInstanceAddRequest request)
742
        {
743
            var annotation = context.Annotations
744
               .Where(a => a.Id == annotationId)
745
               .Include(a => a.User)
746
               .Include(a => a.Document).ThenInclude(d => d.Content)
747
               .First();
748
749
            if (userRole < ERole.ADMINISTRATOR)
750
            {
751
                if (annotation.User.Id != userId)
752
                {
753
                    throw new UnauthorizedAccessException($"User {userId} does not have assigned annotation {annotationId}");
754
                }
755
            }
756
757
            AnnotationTag annotationTag = new()
758
            {
759 553486ad Vojtěch Bartička
                Id = Guid.NewGuid(),
760 be4deff8 Vojtěch Bartička
                Annotation = annotation,
761
                Instance = request.InstanceId == null ? Guid.NewGuid() : request.InstanceId.Value,
762
                Length = request.Length,
763
                Position = request.Position,
764
                Note = ""
765
            };
766
767
            if (request.Type == ETagType.TAG)
768
            {
769
                annotationTag.Tag = context.Tags.Where(t => t.Id == request.Id).Single();
770
                annotationTag.SubTag = null;
771 58363b44 Vojtěch Bartička
772
                // If for the same annotation exists a tag with same position and length and of the same type, ignore
773
                if (context.AnnotationTags.Any(at => 
774
                at.Position == annotationTag.Position &&
775
                at.Length == annotationTag.Length &&
776
                at.Annotation == annotation &&
777
                at.Tag == annotationTag.Tag))
778
                {
779
                    throw new InvalidOperationException("Duplicate tag");
780
                }
781
782 be4deff8 Vojtěch Bartička
            }
783
            else if (request.Type == ETagType.SUBTAG)
784
            {
785
                var subTag = context.SubTags.Where(st => st.Id == request.Id).Include(st => st.Tag).Single();
786
                annotationTag.SubTag = subTag;
787
                annotationTag.Tag = subTag.Tag;
788 58363b44 Vojtěch Bartička
789
                if (context.AnnotationTags.Any(at => 
790
                at.Position == annotationTag.Position &&
791
                at.Length == annotationTag.Length &&
792
                at.Annotation == annotation &&
793
                at.Tag == annotationTag.Tag &&
794
                at.SubTag == annotationTag.SubTag))
795
                {
796
                    throw new InvalidOperationException("Duplicate tag");
797
                }
798 be4deff8 Vojtěch Bartička
            }
799
            else
800
            {
801
                throw new ArgumentException($"Unknown tag type {request.Type}");
802
            }
803
804 553486ad Vojtěch Bartička
            annotation.LastModifiedTagId = annotationTag.Id;
805
            annotation.ModifiedType = EModified.ADDED;
806
807 abf94f21 Lukáš Vlček
            context.AnnotationTags.Add(annotationTag);
808 be4deff8 Vojtěch Bartička
            context.SaveChanges();
809
        }
810 0f8d6304 Vojtěch Bartička
811
        public void DeleteAnnotationInstance(Guid annotationId, Guid tagInstanceId, Guid loggedUserId, ERole userRole)
812
        {
813
            Annotation annotation = null;
814
            try
815
            {
816
                annotation = context.Annotations
817
                   .Where(a => a.Id == annotationId)
818
                   .Include(a => a.User)
819
                   .Include(a => a.Document).ThenInclude(d => d.Content)
820
                   .First();
821
822
            }
823
            catch (Exception ex)
824
            {
825
                throw new InvalidOperationException("Could not find annotation");
826
            }
827
828
829
            if (userRole < ERole.ADMINISTRATOR)
830
            {
831
                if (annotation.User.Id != loggedUserId)
832
                {
833
                    throw new UnauthorizedAccessException($"User {loggedUserId} does not have assigned annotation {annotationId}");
834
                }
835
            }
836
837
            if (!context.AnnotationTags.Any(at => at.Id == tagInstanceId))
838
            {
839
                throw new InvalidOperationException("Could not find tag instance");
840
            }
841
842 553486ad Vojtěch Bartička
843
            var annotationTag = context.AnnotationTags.First(at => at.Id == tagInstanceId);
844
            annotation.LastModifiedTagId = annotationTag.Id;
845
            annotation.ModifiedType = EModified.REMOVED;
846
847
            context.AnnotationTags.Remove(annotationTag);
848 0a9f9349 Vojtěch Bartička
849 0f8d6304 Vojtěch Bartička
            context.SaveChanges();
850
        }
851 eff8ec56 Vojtěch Bartička
    }
852
}