Projekt

Obecné

Profil

Stáhnout (7.71 KB) Statistiky
| Větev: | Tag: | Revize:
1 830652de Vojtěch Bartička
using Core.Entities;
2
using Models.Enums;
3
using Newtonsoft.Json;
4
using System;
5
using System.Collections.Generic;
6
using System.IO.Compression;
7
using System.Linq;
8
using System.Text;
9
using System.Threading.Tasks;
10
11
namespace Core.ZipUtils
12
{
13 a35cb648 Vojtěch Bartička
    /// <summary>
14
    /// Static methods for database exporting
15
    /// </summary>
16 830652de Vojtěch Bartička
    public class Export
17
    {
18 a35cb648 Vojtěch Bartička
        /// <summary>
19
        /// Exports the specified annotations
20
        /// </summary>
21
        /// <param name="documents">list of documents to export</param>
22
        /// <param name="documentAnnotations">document-annotations to export mapping</param>
23
        /// <param name="documentFinalAnnotations">document-final annotations to export mapping</param>
24
        /// <param name="annotationTags">annotation-annotation tags mapping</param>
25
        /// <param name="finalAnnotationTags">final annotation-final annotation tags mapping</param>
26
        /// <returns></returns>
27 830652de Vojtěch Bartička
        public static MemoryStream FullExport(List<Document> documents, Dictionary<Document, List<Annotation>> documentAnnotations, Dictionary<Document, FinalAnnotation> documentFinalAnnotations,
28
            Dictionary<Annotation, List<AnnotationTag>> annotationTags, Dictionary<FinalAnnotation, List<FinalAnnotationTag>> finalAnnotationTags)
29
        {
30
            MemoryStream ms = new MemoryStream();
31 a35cb648 Vojtěch Bartička
            // Set keepOpen to true so that Dispose() does not close the MemoryStream
32 b875be78 Vojtěch Bartička
            var archive = new ZipArchive(ms, ZipArchiveMode.Create, true);
33 830652de Vojtěch Bartička
34
            for (int docIndex = 0; docIndex < documents.Count; docIndex++)
35
            {
36
                var document = documents[docIndex];
37
                string documentDir = $"{docIndex:00000}";
38
39
                // Write document content and name into a JSON file
40
                string documentInfoJson = DumpDocument(document);
41
                CreateFile(documentDir + "/document.json", documentInfoJson, archive);
42
43
                // Deal with annotations
44
                var annotations = documentAnnotations[document];
45
                for (int annotationIndex = 0; annotationIndex < annotations.Count; annotationIndex++)
46
                {
47
                    var annotation = annotations[annotationIndex];
48
                    var tags = annotationTags[annotation].Select(at => at as AnnotationTagGeneric).ToList();
49
                    string annotationInfoJson = DumpAnnotation(annotation, tags, document.Id);
50
                    CreateFile(documentDir + $"/annotation_{annotationIndex:00000}.json", annotationInfoJson, archive);
51
                }
52
53 a35cb648 Vojtěch Bartička
                // Deal with final annotation if it exists
54 830652de Vojtěch Bartička
                var finalAnnotation = documentFinalAnnotations[document];
55 baf6fc22 Vojtěch Bartička
                if (finalAnnotation == null)
56
                {
57
                    continue;
58
                }
59
60 830652de Vojtěch Bartička
                var finalTags = finalAnnotationTags[finalAnnotation].Select(ft => ft as AnnotationTagGeneric).ToList();
61
                string finalAnnotationInfoJson = DumpAnnotation(finalAnnotation, finalTags, document.Id);
62
                CreateFile(documentDir + "/final.json", finalAnnotationInfoJson, archive);
63
            }
64
65 a35cb648 Vojtěch Bartička
            // Dispose of archive to close the header
66 b875be78 Vojtěch Bartička
            archive.Dispose();
67 830652de Vojtěch Bartička
            ms.Position = 0;
68
            return ms;
69
        }
70
71 a35cb648 Vojtěch Bartička
        /// <summary>
72
        /// Creates a file and writes to it
73
        /// </summary>
74
        /// <param name="path">path to file</param>
75
        /// <param name="fileContent">string to write</param>
76
        /// <param name="archive">archive to write it to</param>
77 830652de Vojtěch Bartička
        private static void CreateFile(string path, string fileContent, ZipArchive archive)
78
        {
79
            var entry = archive.CreateEntry(path);
80
            var entryStream = entry.Open();
81
            entryStream.Write(Encoding.UTF8.GetBytes(fileContent));
82
            entryStream.Close();
83
        }
84
85 a35cb648 Vojtěch Bartička
        /// <summary>
86
        /// Dumps a Document into JSON-serialized ExportDocumentInfo
87
        /// </summary>
88
        /// <param name="document">Document to serialize</param>
89
        /// <returns>JSON-serialized ExportDocumentInfo</returns>
90 830652de Vojtěch Bartička
        private static string DumpDocument(Document document)
91
        {
92
            ExportDocumentInfo documentInfo = new();
93
            documentInfo.Name = document.Name;
94
            documentInfo.Content = document.Content.Content;
95
            documentInfo.DocumentId = document.Id;
96
97
            return JsonConvert.SerializeObject(documentInfo);
98
        }
99
100 a35cb648 Vojtěch Bartička
        /// <summary>
101
        /// Dumps annotation into JSON-serialized ExportAnnotationInfo
102
        /// </summary>
103
        /// <param name="annotation">Annotation to dump</param>
104
        /// <param name="tags">Tags related to the annotation</param>
105
        /// <param name="documentId">Id of the document the annotation is related to</param>
106
        /// <returns>JSON-serialized ExportAnnotationInfo</returns>
107 830652de Vojtěch Bartička
        private static string DumpAnnotation(Annotation annotation, List<AnnotationTagGeneric> tags, Guid documentId)
108
        {
109
            ExportAnnotationInfo annotationInfo = new();
110
            annotationInfo.AnnotatorId = annotation.User.Id;
111
            annotationInfo.DocumentId = documentId;
112
113
            foreach (var tag in tags)
114
            {
115
                ExportTagInstanceInfo tagInfo = new()
116
                {
117
118
                    Category = tag.Tag.Category.Name,
119
                    TagName = tag.Tag.Name,
120
                    InstanceId = tag.Instance,
121
                    FirstCharPosition = tag.Position,
122
                    Length = tag.Length,
123
                };
124
125
                if (tag.Tag.SentimentEnabled && tagInfo.Sentiment != null)
126
                {
127
                    tagInfo.Sentiment = tag.Sentiment.ToString().ToLower();
128
                }
129
130
                if (tag.SubTag != null)
131
                {
132
                    tagInfo.SubTagName = tag.SubTag.Name;
133
                }
134
135
                annotationInfo.Tags.Add(tagInfo);
136
            }
137
138
            return JsonConvert.SerializeObject(annotationInfo);
139
        }
140
    }
141
142
    public class ExportDocumentInfo
143
    {
144 a35cb648 Vojtěch Bartička
        /// <summary>
145
        /// Document name
146
        /// </summary>
147 830652de Vojtěch Bartička
        public string Name { get; set; }
148 a35cb648 Vojtěch Bartička
        /// <summary>
149
        /// Original document content
150
        /// </summary>
151 830652de Vojtěch Bartička
        public string Content { get; set; }
152 a35cb648 Vojtěch Bartička
        /// <summary>
153
        /// Id of the document
154
        /// </summary>
155 830652de Vojtěch Bartička
        public Guid DocumentId { get; set; }
156
    }
157
158
    public class ExportAnnotationInfo
159
    {
160 a35cb648 Vojtěch Bartička
        /// <summary>
161
        /// Annotator Id
162
        /// </summary>
163 830652de Vojtěch Bartička
        public Guid AnnotatorId { get; set; }
164 a35cb648 Vojtěch Bartička
        /// <summary>
165
        /// Document Id
166
        /// </summary>
167 830652de Vojtěch Bartička
        public Guid DocumentId { get; set; }
168 a35cb648 Vojtěch Bartička
        /// <summary>
169
        /// Tags 
170
        /// </summary>
171 830652de Vojtěch Bartička
        public List<ExportTagInstanceInfo> Tags { get; set; } = new();
172
    }
173
174
    public class ExportTagInstanceInfo
175
    {
176 a35cb648 Vojtěch Bartička
        /// <summary>
177
        /// Tag category
178
        /// </summary>
179 830652de Vojtěch Bartička
        public string Category { get; set; }
180 a35cb648 Vojtěch Bartička
        /// <summary>
181
        /// Name of the tag (the primary identifier)
182
        /// </summary>
183 830652de Vojtěch Bartička
        public string TagName { get; set; }
184 a35cb648 Vojtěch Bartička
        /// <summary>
185
        /// Name of the subtag (the primary identifier of a subtag)
186
        /// </summary>
187 830652de Vojtěch Bartička
        public string? SubTagName { get; set; }
188 a35cb648 Vojtěch Bartička
        /// <summary>
189
        /// Id of the instance (in case of multipart tags multiple tags share the same Id)
190
        /// </summary>
191 830652de Vojtěch Bartička
        public Guid InstanceId { get; set; }
192 a35cb648 Vojtěch Bartička
        /// <summary>
193
        /// Position in the original document
194
        /// </summary>
195 830652de Vojtěch Bartička
        public int FirstCharPosition { get; set; }
196 a35cb648 Vojtěch Bartička
        /// <summary>
197
        /// Length of the section in chars
198
        /// </summary>
199 830652de Vojtěch Bartička
        public int Length { get; set; }
200 a35cb648 Vojtěch Bartička
        /// <summary>
201
        /// Optional sentiment
202
        /// </summary>
203 830652de Vojtěch Bartička
        public string? Sentiment { get; set; }
204
    }
205
}