Projekt

Obecné

Profil

Stáhnout (31.6 KB) Statistiky
| Větev: | Tag: | Revize:
1
using Core.Entities;
2
using Core.GraphUtils;
3
using Ganss.XSS;
4
using HtmlAgilityPack;
5
using Models.Tags;
6
using System;
7
using System.Collections.Generic;
8
using System.Linq;
9
using System.Text;
10
using System.Threading.Tasks;
11

    
12
namespace Core.Services
13
{
14
    public class HTMLService : IHTMLService
15
    {
16
        /// <summary>
17
        /// Attribute name for uniquely identifying each tag in the HTML document on the frontend
18
        /// </summary>
19
        private const string TAG_ID_ATTRIBUTE_NAME = "aswi-tag-id";
20

    
21
        /// <summary>
22
        /// Attribute name for tag instance so that each marked out tag can be given a style
23
        /// </summary>
24
        private const string TAG_INSTANCE_ATTRIBUTE_NAME = "aswi-tag-instance";
25

    
26
        /// <summary>
27
        /// Attribute name for EF Tag IDs so that the frontend can reference them to the backend for deletion
28
        /// </summary>
29
        private const string TAG_EF_ID_ATTRIBUTE_NAME = "aswi-tag-ef-id";
30

    
31
        /// <summary>
32
        /// Info that is cached in tha database
33
        /// </summary>
34
        public class CachedInfo
35
        {
36
            /// <summary>
37
            /// Positions of opening tags in the original HTML document
38
            /// </summary>
39
            public List<int> TagStartPositions = new();
40
            /// <summary>
41
            /// Lengths of opening tags in the original HTML document
42
            /// </summary>
43
            public List<int> TagStartLengths = new();
44
            /// <summary>
45
            /// Positions of closing tags in the original HTML document
46
            /// </summary>
47
            public List<int> TagClosingPositions = new();
48
            /// <summary>
49
            /// Lengths for closing tags in the original HTML document
50
            /// </summary>
51
            public List<int> TagClosingLengths = new();
52
            /// <summary>
53
            /// Unused
54
            /// </summary>
55
            public Dictionary<HtmlNode, HtmlNode> NodeDict = new();
56
            /// <summary>
57
            /// Cached CSS for each tag
58
            /// </summary>
59
            public List<TagInstanceCSSInfo> TagInstanceCSS = new();
60
        }
61

    
62
        private List<int> TagStartPositions = new();
63
        private List<int> TagStartLengths = new();
64
        private List<int> TagClosingPositions = new();
65
        private List<int> TagClosingLengths = new();
66
        private Dictionary<HtmlNode, HtmlNode> NodeDict = new();
67
        private List<TagInstanceCSSInfo> TagInstanceCSS = new();
68

    
69
        /// <summary>
70
        /// Unpack cached info into local (request-scoped) variables
71
        /// </summary>
72
        /// <param name="cachedInfo"></param>
73
        private void UnpackCachedInfo(CachedInfo cachedInfo)
74
        {
75
            TagStartPositions = cachedInfo.TagStartPositions;
76
            TagStartLengths = cachedInfo.TagStartLengths;
77
            TagClosingPositions = cachedInfo.TagClosingPositions;
78
            TagClosingLengths = cachedInfo.TagClosingLengths;
79
            NodeDict = cachedInfo.NodeDict;
80
            TagInstanceCSS = cachedInfo.TagInstanceCSS;
81
        }
82

    
83

    
84
        /*
85
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
86
         */
87

    
88
        public (string, CachedInfo) FullPreprocessHTML(string htmlSource, List<AnnotationTagGeneric> tags)
89
        {
90
            var docOriginal = new HtmlDocument();
91
            docOriginal.LoadHtml(htmlSource);
92
            var docToEdit = new HtmlDocument();
93
            docToEdit.LoadHtml(htmlSource);
94

    
95
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
96
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
97

    
98
            int currentId = 0;
99

    
100
            // Create mapping original-document-node -> to-edit-document-node
101
            FillNodeDict(descendantsOriginal, descendantsToEdit);
102

    
103
            // Assign IDs to all original rtags
104
            AssignIdsToOriginalDocument(descendantsOriginal, ref currentId);
105

    
106
            // Wrap all text that is not in a tag or where the parent node contains tags as children also in spans
107
            WrapTextInSpan(descendantsOriginal, docToEdit);
108

    
109
            descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
110
            int descCount = descendantsToEdit.Count;
111

    
112
            foreach (var tag in tags)
113
            {
114
                int i = 0;
115
                List<HtmlNode> addedForSelection = new();
116
                while (i < descCount)
117
                {
118
                    for (; i < descCount; i++)
119
                    {
120
                        var node = descendantsToEdit.ElementAt(i);
121
                        if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) || node.ParentNode.Name == "style")
122
                        {
123
                            continue;
124
                        }
125

    
126
                        int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
127

    
128
                        var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
129
                        var end = TagClosingPositions[nodeId];
130

    
131
                        int selectionStart = tag.Position;
132
                        int selectionEnd = tag.Position + tag.Length;
133

    
134
                        if (selectionStart < end && selectionEnd > start)
135
                        {
136
                            if (selectionStart <= start && selectionEnd >= end)
137
                            {
138
                                addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
139
                            }
140
                            else if (selectionStart <= start)
141
                            {
142
                                addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
143
                            }
144
                            else if (selectionEnd >= end)
145
                            {
146
                                addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
147
                            }
148
                            else
149
                            {
150
                                addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
151
                            }
152
                            descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
153
                            descCount = descendantsToEdit.Count;
154
                            break;
155
                        }
156
                    }
157
                }
158

    
159
            }
160

    
161
            ModifyLinks(descendantsToEdit);
162
            string docToRender = docToEdit.DocumentNode.OuterHtml;
163
            HtmlSanitizer sanitizer = new HtmlSanitizer();
164
            sanitizer.AllowedAttributes.Clear();
165
            sanitizer.AllowedAttributes.Add(TAG_ID_ATTRIBUTE_NAME);
166
            sanitizer.AllowedAttributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME);
167
            sanitizer.AllowedAttributes.Add(TAG_EF_ID_ATTRIBUTE_NAME);
168
            sanitizer.AllowedAttributes.Add("href");
169
            sanitizer.AllowedAttributes.Add("end");
170
            sanitizer.AllowedAttributes.Add("start");
171
            sanitizer.AllowedAttributes.Add("class");
172
            sanitizer.AllowedAttributes.Add("target");
173
            sanitizer.AllowedAttributes.Add("id");
174
            if (sanitizer.AllowedTags.Contains("script"))
175
            {
176
                sanitizer.AllowedTags.Remove("script");
177
            }
178
            if (!sanitizer.AllowedTags.Contains("style"))
179
            {
180
                sanitizer.AllowedTags.Add("style");
181
            }
182
            sanitizer.AllowedTags.Add("a");
183
            docToRender = sanitizer.Sanitize(docToRender);
184
            docToRender = docToRender.Replace("&nbsp;", " ");
185
            GenerateCSS(tags);
186

    
187
            return (docToRender, new CachedInfo()
188
            {
189
                TagStartPositions = TagStartPositions,
190
                TagClosingPositions = TagClosingPositions,
191
                TagStartLengths = TagStartLengths,
192
                TagClosingLengths = TagClosingLengths,
193
                TagInstanceCSS = TagInstanceCSS,
194
                NodeDict = NodeDict
195
            });
196
        }
197

    
198
        /// <summary>
199
        /// Wrap text in span when the target text is the whole text of the target tag
200
        /// </summary>
201
        /// <param name="node">text node to wrap</param>
202
        /// <param name="selectionStart">start of the selection in the edited document</param>
203
        /// <param name="selectionEnd">end of the selection in the edited document</param>
204
        /// <param name="start">start of the node</param>
205
        /// <param name="end">end of the node</param>
206
        /// <param name="docToEdit">document to edit</param>
207
        /// <param name="tag">tag we are adding</param>
208
        /// <returns></returns>
209
        private HtmlNode SolveFullFill(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit, AnnotationTagGeneric tag)
210
        {
211
            // full fill
212
            string textSelected = node.InnerText;
213

    
214
            var parentNode = node.ParentNode;
215
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
216
            parentNode.ChildNodes.RemoveAt(nodeIndex);
217

    
218
            EPosition markerPosition = EPosition.MARK_NONE;
219
            if (selectionEnd == end && selectionStart == start)
220
            {
221
                markerPosition = EPosition.MARK_LEFT_RIGHT;
222
            }
223
            else if (selectionEnd == end)
224
            {
225
                markerPosition = EPosition.MARK_RIGHT;
226
            }
227
            else if (selectionStart == start)
228
            {
229
                markerPosition = EPosition.MARK_LEFT;
230
            }
231

    
232
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, start, markerPosition);
233
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
234

    
235
            return spanSelected;
236
        }
237

    
238
        /// <summary>
239
        /// Wrap text in span when the target text starts at the start of the tag and leaves space on the right side
240
        /// </summary>
241
        /// <param name="node">text node to wrap</param>
242
        /// <param name="selectionStart">start of the selection in the edited document</param>
243
        /// <param name="selectionEnd">end of the selection in the edited document</param>
244
        /// <param name="start">start of the node</param>
245
        /// <param name="end">end of the node</param>
246
        /// <param name="docToEdit">document to edit</param>
247
        /// <param name="tag">tag we are adding</param>
248
        /// <returns></returns>
249
        private List<HtmlNode> SolveRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
250
                                             AnnotationTagGeneric tag)
251
        {
252
            // partial fill, end gap
253
            string text = node.InnerText;
254
            string textAfter = text.Substring(Math.Min(selectionStart - start + tag.Length, text.Length));
255
            string textSelected = text.Substring(0, selectionEnd - start);
256

    
257
            var parentNode = node.ParentNode;
258
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
259
            parentNode.ChildNodes.RemoveAt(nodeIndex);
260

    
261
            int spanSelectedStart = start;
262
            int spanAfterStart = start + textSelected.Length;
263

    
264
            var position = EPosition.MARK_RIGHT;
265
            if (selectionStart == start)
266
            {
267
                position = EPosition.MARK_LEFT_RIGHT;
268
            }
269

    
270
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, position);
271
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
272

    
273
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
274
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanAfter);
275

    
276
            return new() { spanSelected, spanAfter };
277
        }
278

    
279
        /// <summary>
280
        /// Wrap text in span when the target text ends at the end of the tag but leaves space on the left
281
        /// </summary>
282
        /// <param name="node">text node to wrap</param>
283
        /// <param name="selectionStart">start of the selection in the edited document</param>
284
        /// <param name="selectionEnd">end of the selection in the edited document</param>
285
        /// <param name="start">start of the node</param>
286
        /// <param name="end">end of the node</param>
287
        /// <param name="docToEdit">document to edit</param>
288
        /// <param name="tag">tag we are adding</param>
289
        /// <returns></returns>
290
        private List<HtmlNode> SolveLeftGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
291
                                             AnnotationTagGeneric tag)
292
        {
293
            // partial fill, start gap
294
            string text = node.InnerText;
295
            string textBefore = text.Substring(0, selectionStart - start);
296
            string textSelected = text.Substring(selectionStart - start, Math.Min(tag.Length, text.Length - textBefore.Length));
297

    
298
            var parentNode = node.ParentNode;
299
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
300
            parentNode.ChildNodes.RemoveAt(nodeIndex);
301

    
302
            int spanBeforeStart = start;
303
            int spanSelectedStart = start + textBefore.Length;
304

    
305
            var position = EPosition.MARK_LEFT;
306
            if (selectionEnd == end)
307
            {
308
                position = EPosition.MARK_LEFT_RIGHT;
309
            }
310

    
311
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
312
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
313

    
314
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, position);
315
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
316

    
317
            return new() { spanSelected, spanBefore };
318
        }
319

    
320
        /// <summary>
321
        /// Wrap text in span when the target text is contained in the node but neither starts or ends match
322
        /// </summary>
323
        /// <param name="node">text node to wrap</param>
324
        /// <param name="selectionStart">start of the selection in the edited document</param>
325
        /// <param name="selectionEnd">end of the selection in the edited document</param>
326
        /// <param name="start">start of the node</param>
327
        /// <param name="end">end of the node</param>
328
        /// <param name="docToEdit">document to edit</param>
329
        /// <param name="tag">tag we are adding</param>
330
        /// <returns></returns>
331
        private List<HtmlNode> SolveLeftRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
332
                                                 AnnotationTagGeneric tag)
333
        {
334
            // partial fill, start gap end gap
335
            string text = node.InnerText;
336
            string textBefore = text.Substring(0, selectionStart - start);
337
            string textAfter = text.Substring(selectionStart - start + tag.Length);
338
            string textSelected = text.Substring(selectionStart - start, tag.Length);
339

    
340
            var parentNode = node.ParentNode;
341
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
342
            parentNode.ChildNodes.RemoveAt(nodeIndex);
343

    
344
            int spanBeforeStart = start;
345
            int spanSelectedStart = start + textBefore.Length;
346
            int spanAfterStart = start + textBefore.Length + textSelected.Length;
347

    
348
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
349
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
350

    
351
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
352
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
353

    
354
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
355
            parentNode.ChildNodes.Insert(nodeIndex + 2, spanAfter);
356

    
357
            return new() { spanSelected, spanBefore, spanAfter };
358
        }
359

    
360
        /// <summary>
361
        /// Create span utility method
362
        /// </summary>
363
        /// <param name="doc">document</param>
364
        /// <param name="text">inner text of the span</param>
365
        /// <param name="tagId">ID of the tag (TAG_ID_ATTRIBUTE_NAME)</param>
366
        /// <param name="instanceId">ID of the instance (TAG_INSTANCE_ATTRIBUTE_NAME)</param>
367
        /// <param name="entityId">Id of the EF entity (TAG_EF_ID_NAME)</param>
368
        /// <param name="startPosition"></param>
369
        /// <param name="position"></param>
370
        /// <returns></returns>
371
        private HtmlNode CreateSpan(HtmlDocument doc, string text, int tagId, Guid? instanceId, Guid? entityId, int startPosition, EPosition position = EPosition.MARK_NONE)
372
        {
373
            HtmlNode span = doc.CreateElement("span");
374
            span.InnerHtml = text;
375
            TagStartPositions.Add(startPosition);
376
            TagStartLengths.Add(0);
377
            TagClosingPositions.Add(startPosition + text.Length);
378
            TagClosingLengths.Add(0);
379
            span.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, tagId.ToString());
380

    
381
            if (instanceId != null)
382
            {
383
                span.AddClass("annotation");
384
                span.Attributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME, instanceId.Value.ToString());
385
                span.Attributes.Add(TAG_EF_ID_ATTRIBUTE_NAME, entityId.ToString());
386
                span.Attributes.Add("id", entityId.ToString());
387

    
388
                if (position == EPosition.MARK_LEFT || position == EPosition.MARK_LEFT_RIGHT)
389
                {
390
                    span.Attributes.Add("start", "1");
391
                }
392
                if (position == EPosition.MARK_RIGHT || position == EPosition.MARK_LEFT_RIGHT)
393
                {
394
                    span.Attributes.Add("end", "1");
395
                }
396
            }
397

    
398
            return span;
399
        }
400

    
401
        private enum EPosition
402
        {
403
            MARK_LEFT = 5,
404
            MARK_RIGHT = 3,
405
            MARK_LEFT_RIGHT = 2,
406
            MARK_NONE = 0
407
        }
408

    
409
        /// <summary>
410
        /// Redirect all links
411
        /// </summary>
412
        /// <param name="descendantsOriginal"></param>
413
        private void ModifyLinks(IEnumerable<HtmlNode> descendantsOriginal)
414
        {
415
            foreach (var descendant in descendantsOriginal)
416
            {
417
                if (descendant.Name == "a")
418
                {
419
                    if (descendant.Attributes.Contains("href"))
420
                    {
421
                        descendant.SetAttributeValue("href", "/link?url=" + descendant.Attributes["href"].Value);
422
                        descendant.SetAttributeValue("target", "_blank");
423
                    }
424
                }
425
            }
426
        }
427

    
428
        /// <summary>
429
        /// Wrap all #text nodes in spans
430
        /// </summary>
431
        /// <param name="descendantsOriginal"></param>
432
        /// <param name="docToEdit"></param>
433
        private void WrapTextInSpan(IEnumerable<HtmlNode> descendantsOriginal, HtmlDocument docToEdit)
434
        {
435
            // Special case for non-html documents
436
            if (descendantsOriginal.Count() == 2)
437
            {
438
                var documentNode = descendantsOriginal.ElementAt(0);
439
                var childNode = descendantsOriginal.ElementAt(1);
440
                if (documentNode.Name == "#document" && childNode.Name == "#text")
441
                {
442
                    HtmlNode coveringSpan = docToEdit.CreateElement("span");
443
                    coveringSpan.InnerHtml = childNode.InnerHtml;
444
                    TagStartPositions.Add(childNode.InnerStartIndex);
445
                    TagStartLengths.Add(0);
446
                    TagClosingPositions.Add(childNode.InnerStartIndex + childNode.InnerLength);
447
                    TagClosingLengths.Add(0);
448
                    coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
449

    
450
                    var parent = NodeDict[documentNode];
451

    
452
                    parent.ChildNodes.RemoveAt(0);
453
                    parent.ChildNodes.Add(coveringSpan);
454

    
455
                    return;
456
                }
457
            }
458

    
459
            foreach (var node in descendantsOriginal)
460
            {
461
                var originalNode = node;
462
                var toEditNode = NodeDict[node];
463

    
464
                if (originalNode.Name.Contains("#"))
465
                {
466
                    continue;
467
                }
468
                else
469
                {
470
                    bool onlyText = true;
471
                    bool onlySubtags = true;
472

    
473
                    foreach (var child in node.ChildNodes)
474
                    {
475
                        if (child.Name.Contains("#"))
476
                        {
477
                            onlySubtags = false;
478
                        }
479
                        else
480
                        {
481
                            onlyText = false;
482
                        }
483
                    }
484

    
485
                    if (onlyText || onlySubtags)
486
                    {
487
                        continue;
488
                    }
489
                    else
490
                    {
491

    
492
                        foreach (var child in node.ChildNodes)
493
                        {
494
                            if (child.Name.Contains("#text"))
495
                            { 
496
                                HtmlNode coveringSpan = docToEdit.CreateElement("span");
497
                                coveringSpan.InnerHtml = child.InnerHtml;
498
                                
499
                                // Add length of 0 - in they were not in the original document
500
                                TagStartPositions.Add(child.InnerStartIndex);
501
                                TagStartLengths.Add(0);
502
                                TagClosingPositions.Add(child.InnerStartIndex + child.InnerLength);
503
                                TagClosingLengths.Add(0);
504
                                coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
505

    
506
                                var parent = NodeDict[node];
507
                                var index = parent.ChildNodes.IndexOf(NodeDict[child]);
508

    
509
                                parent.ChildNodes.RemoveAt(index);
510
                                parent.ChildNodes.Insert(index, coveringSpan);
511
                            }
512
                        }
513
                    }
514
                }
515
            }
516
        }
517

    
518
        /// <summary>
519
        /// Generate CSS for all tag instances
520
        /// </summary>
521
        /// <param name="tags"></param>
522
        private void GenerateCSS(List<AnnotationTagGeneric> tags)
523
        {
524
            /*string inner = "span.annotation {border-bottom: 2px solid;}";
525
            inner += "span {line-height: 30px}\n";*/
526

    
527
            var tagPaddingDict = Intersections.ColorGraph(Intersections.FindIntersections(tags));
528
            foreach (var tag in tags.DistinctBy(t => t.Instance))
529
            {
530
                var padding = (tagPaddingDict[tag] + 2) * 2;
531
                TagInstanceCSS.Add(new()
532
                {
533
                    InstanceId = tag.Instance,
534
                    Color = tag.Tag.Color,
535
                    Padding = padding
536
                });
537
            }
538
        }
539

    
540
        /// <summary>
541
        /// Add IDs to all tags in the original document
542
        /// </summary>
543
        /// <param name="descendantsOriginal"></param>
544
        /// <param name="currentId"></param>
545
        private void AssignIdsToOriginalDocument(IEnumerable<HtmlNode> descendantsOriginal, ref int currentId)
546
        {
547
            foreach (var node in descendantsOriginal)
548
            {
549
                var originalNode = node;
550
                var toEditNode = NodeDict[node];
551

    
552
                if (originalNode.Name.Contains("#"))
553
                {
554
                    continue;
555
                }
556
                else
557
                {
558
                    TagStartPositions.Add(originalNode.OuterStartIndex);
559
                    TagStartLengths.Add(originalNode.InnerStartIndex - originalNode.OuterStartIndex);
560
                    currentId = TagStartPositions.Count - 1;
561
                    toEditNode.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, currentId.ToString());
562

    
563
                    // Cross-referencing of the original document to the edited document
564
                    TagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
565
                    TagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
566
                }
567
            }
568
        }
569

    
570
        private void FillNodeDict(IEnumerable<HtmlNode> descendantsOriginal, IEnumerable<HtmlNode> descendantsToEdit)
571
        {
572
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new
573
            {
574
                Original = orig,
575
                ToEdit = toEdit
576
            });
577
            foreach (var node in zipped)
578
            {
579
                var originalNode = node.Original;
580
                var toEditNode = node.ToEdit;
581
                NodeDict.Add(originalNode, toEditNode);
582
            }
583
        }
584

    
585

    
586
        /*
587
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
588
         */
589

    
590
        /*
591
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
592
         */
593

    
594
        /// <summary>
595
        /// Incrementally modifies the html to render and cached info by adding a new tag
596
        /// </summary>
597
        /// <param name="htmlToEdit"></param>
598
        /// <param name="htmlOriginal"></param>
599
        /// <param name="tagToAdd"></param>
600
        /// <param name="tags"></param>
601
        /// <param name="cachedInfo"></param>
602
        /// <returns></returns>
603
        public (string, CachedInfo) PartialPreprocessHTMLAddTag(string htmlToEdit, string htmlOriginal, AnnotationTagGeneric tagToAdd, List<AnnotationTagGeneric> tags, CachedInfo cachedInfo)
604
        {
605
            UnpackCachedInfo(cachedInfo);
606

    
607
            var docOriginal = new HtmlDocument();
608
            docOriginal.LoadHtml(htmlOriginal);
609
            var docToEdit = new HtmlDocument();
610
            docToEdit.LoadHtml(htmlToEdit);
611

    
612
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
613

    
614
            int i = 0;
615
            List<HtmlNode> addedForSelection = new();
616

    
617
            int descendantsCount = descendantsToEdit.Count();
618
            while (i < descendantsCount)
619
            {
620
                for (; i < descendantsCount; i++)
621
                {
622
                    var node = descendantsToEdit.ElementAt(i);
623
                    if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
624
                        node.ParentNode.Name == "style" || node.ParentNode.Name.StartsWith("#"))
625
                    {
626
                        continue;
627
                    }
628

    
629
                    int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
630

    
631
                    var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
632
                    var end = TagClosingPositions[nodeId];
633

    
634
                    int selectionStart = tagToAdd.Position;
635
                    int selectionEnd = tagToAdd.Position + tagToAdd.Length;
636

    
637
                    if (selectionStart < end && selectionEnd > start)
638
                    {
639
                        if (selectionStart <= start && selectionEnd >= end)
640
                        {
641
                            addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
642
                        }
643
                        else if (selectionStart <= start)
644
                        {
645
                            addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
646
                        }
647
                        else if (selectionEnd >= end)
648
                        {
649
                            addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
650
                        }
651
                        else
652
                        {
653
                            addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
654
                        }
655
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
656
                        descendantsCount = descendantsToEdit.Count();
657
                        break;
658
                    }
659
                }
660
            }
661

    
662
            GenerateCSS(tags);
663
            return (docToEdit.DocumentNode.OuterHtml, new()
664
            {
665
                TagStartPositions = TagStartPositions,
666
                TagStartLengths = TagStartLengths,
667
                TagClosingPositions = TagClosingPositions,
668
                TagClosingLengths = TagClosingLengths,
669
                TagInstanceCSS = TagInstanceCSS,
670
                NodeDict = NodeDict
671
            });
672
        }
673

    
674
        /// <summary>
675
        /// Removing tag effectively just cleans up the attributes and CSS thus has no effect
676
        /// </summary>
677
        /// <param name="htmlToEdit"></param>
678
        /// <param name="htmlOriginal"></param>
679
        /// <param name="tagToRemove"></param>
680
        /// <param name="tags"></param>
681
        /// <param name="cachedInfo"></param>
682
        /// <returns></returns>
683
        public (string, CachedInfo) PartialPreprocessHTMLRemoveTag(string htmlToEdit, string htmlOriginal, AnnotationTagGeneric tagToRemove, List<AnnotationTagGeneric> tags, CachedInfo cachedInfo)
684
        {
685
            UnpackCachedInfo(cachedInfo);
686

    
687
            var docOriginal = new HtmlDocument();
688
            docOriginal.LoadHtml(htmlOriginal);
689
            var docToEdit = new HtmlDocument();
690
            docToEdit.LoadHtml(htmlToEdit);
691

    
692
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
693

    
694
            int i = 0;
695
            int descendantsCount = descendantsToEdit.Count();
696
            while (i < descendantsCount)
697
            {
698
                for (; i < descendantsCount; i++)
699
                {
700
                    var node = descendantsToEdit.ElementAt(i);
701
                    if (!node.Attributes.Contains(TAG_EF_ID_ATTRIBUTE_NAME))
702
                    {
703
                        continue;
704
                    }
705
                    else
706
                    {
707
                        if (node.Attributes[TAG_EF_ID_ATTRIBUTE_NAME].Value != tagToRemove.Id.ToString())
708
                        {
709
                            continue;
710
                        }
711

    
712
                        node.Attributes.Remove(TAG_EF_ID_ATTRIBUTE_NAME);
713
                        node.Attributes.Remove(TAG_INSTANCE_ATTRIBUTE_NAME);
714
                        node.Attributes.Remove("class");
715
                        node.Attributes.Remove("start");
716
                        node.Attributes.Remove("end");
717

    
718
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
719
                        descendantsCount = descendantsToEdit.Count();
720
                        break;
721
                    }
722
                }
723
            }
724

    
725
            GenerateCSS(tags);
726
            return (docToEdit.DocumentNode.OuterHtml, new()
727
            {
728
                TagStartPositions = TagStartPositions,
729
                TagStartLengths = TagStartLengths,
730
                TagClosingPositions = TagClosingPositions,
731
                TagClosingLengths = TagClosingLengths,
732
                TagInstanceCSS = TagInstanceCSS,
733
                NodeDict = NodeDict
734
            });
735
        }
736

    
737
        /*
738
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
739
         */
740
    }
741
}
(1-1/2)