Projekt

Obecné

Profil

Stáhnout (7.71 KB) Statistiky
| Větev: | Tag: | Revize:
1
using Core.Entities;
2
using Models.Enums;
3
using Newtonsoft.Json;
4
using System;
5
using System.Collections.Generic;
6
using System.IO.Compression;
7
using System.Linq;
8
using System.Text;
9
using System.Threading.Tasks;
10

    
11
namespace Core.ZipUtils
12
{
13
    /// <summary>
14
    /// Static methods for database exporting
15
    /// </summary>
16
    public class Export
17
    {
18
        /// <summary>
19
        /// Exports the specified annotations
20
        /// </summary>
21
        /// <param name="documents">list of documents to export</param>
22
        /// <param name="documentAnnotations">document-annotations to export mapping</param>
23
        /// <param name="documentFinalAnnotations">document-final annotations to export mapping</param>
24
        /// <param name="annotationTags">annotation-annotation tags mapping</param>
25
        /// <param name="finalAnnotationTags">final annotation-final annotation tags mapping</param>
26
        /// <returns></returns>
27
        public static MemoryStream FullExport(List<Document> documents, Dictionary<Document, List<Annotation>> documentAnnotations, Dictionary<Document, FinalAnnotation> documentFinalAnnotations,
28
            Dictionary<Annotation, List<AnnotationTag>> annotationTags, Dictionary<FinalAnnotation, List<FinalAnnotationTag>> finalAnnotationTags)
29
        {
30
            MemoryStream ms = new MemoryStream();
31
            // Set keepOpen to true so that Dispose() does not close the MemoryStream
32
            var archive = new ZipArchive(ms, ZipArchiveMode.Create, true);
33

    
34
            for (int docIndex = 0; docIndex < documents.Count; docIndex++)
35
            {
36
                var document = documents[docIndex];
37
                string documentDir = $"{docIndex:00000}";
38

    
39
                // Write document content and name into a JSON file
40
                string documentInfoJson = DumpDocument(document);
41
                CreateFile(documentDir + "/document.json", documentInfoJson, archive);
42

    
43
                // Deal with annotations
44
                var annotations = documentAnnotations[document];
45
                for (int annotationIndex = 0; annotationIndex < annotations.Count; annotationIndex++)
46
                {
47
                    var annotation = annotations[annotationIndex];
48
                    var tags = annotationTags[annotation].Select(at => at as AnnotationTagGeneric).ToList();
49
                    string annotationInfoJson = DumpAnnotation(annotation, tags, document.Id);
50
                    CreateFile(documentDir + $"/annotation_{annotationIndex:00000}.json", annotationInfoJson, archive);
51
                }
52

    
53
                // Deal with final annotation if it exists
54
                var finalAnnotation = documentFinalAnnotations[document];
55
                if (finalAnnotation == null)
56
                {
57
                    continue;
58
                }
59

    
60
                var finalTags = finalAnnotationTags[finalAnnotation].Select(ft => ft as AnnotationTagGeneric).ToList();
61
                string finalAnnotationInfoJson = DumpAnnotation(finalAnnotation, finalTags, document.Id);
62
                CreateFile(documentDir + "/final.json", finalAnnotationInfoJson, archive);
63
            }
64

    
65
            // Dispose of archive to close the header
66
            archive.Dispose();
67
            ms.Position = 0;
68
            return ms;
69
        }
70

    
71
        /// <summary>
72
        /// Creates a file and writes to it
73
        /// </summary>
74
        /// <param name="path">path to file</param>
75
        /// <param name="fileContent">string to write</param>
76
        /// <param name="archive">archive to write it to</param>
77
        private static void CreateFile(string path, string fileContent, ZipArchive archive)
78
        {
79
            var entry = archive.CreateEntry(path);
80
            var entryStream = entry.Open();
81
            entryStream.Write(Encoding.UTF8.GetBytes(fileContent));
82
            entryStream.Close();
83
        }
84

    
85
        /// <summary>
86
        /// Dumps a Document into JSON-serialized ExportDocumentInfo
87
        /// </summary>
88
        /// <param name="document">Document to serialize</param>
89
        /// <returns>JSON-serialized ExportDocumentInfo</returns>
90
        private static string DumpDocument(Document document)
91
        {
92
            ExportDocumentInfo documentInfo = new();
93
            documentInfo.Name = document.Name;
94
            documentInfo.Content = document.Content.Content;
95
            documentInfo.DocumentId = document.Id;
96

    
97
            return JsonConvert.SerializeObject(documentInfo);
98
        }
99

    
100
        /// <summary>
101
        /// Dumps annotation into JSON-serialized ExportAnnotationInfo
102
        /// </summary>
103
        /// <param name="annotation">Annotation to dump</param>
104
        /// <param name="tags">Tags related to the annotation</param>
105
        /// <param name="documentId">Id of the document the annotation is related to</param>
106
        /// <returns>JSON-serialized ExportAnnotationInfo</returns>
107
        private static string DumpAnnotation(Annotation annotation, List<AnnotationTagGeneric> tags, Guid documentId)
108
        {
109
            ExportAnnotationInfo annotationInfo = new();
110
            annotationInfo.AnnotatorId = annotation.User.Id;
111
            annotationInfo.DocumentId = documentId;
112

    
113
            foreach (var tag in tags)
114
            {
115
                ExportTagInstanceInfo tagInfo = new()
116
                {
117

    
118
                    Category = tag.Tag.Category.Name,
119
                    TagName = tag.Tag.Name,
120
                    InstanceId = tag.Instance,
121
                    FirstCharPosition = tag.Position,
122
                    Length = tag.Length,
123
                };
124

    
125
                if (tag.Tag.SentimentEnabled && tagInfo.Sentiment != null)
126
                {
127
                    tagInfo.Sentiment = tag.Sentiment.ToString().ToLower();
128
                }
129

    
130
                if (tag.SubTag != null)
131
                {
132
                    tagInfo.SubTagName = tag.SubTag.Name;
133
                }
134

    
135
                annotationInfo.Tags.Add(tagInfo);
136
            }
137

    
138
            return JsonConvert.SerializeObject(annotationInfo);
139
        }
140
    }
141

    
142
    public class ExportDocumentInfo
143
    {
144
        /// <summary>
145
        /// Document name
146
        /// </summary>
147
        public string Name { get; set; }
148
        /// <summary>
149
        /// Original document content
150
        /// </summary>
151
        public string Content { get; set; }
152
        /// <summary>
153
        /// Id of the document
154
        /// </summary>
155
        public Guid DocumentId { get; set; }
156
    }
157

    
158
    public class ExportAnnotationInfo
159
    {
160
        /// <summary>
161
        /// Annotator Id
162
        /// </summary>
163
        public Guid AnnotatorId { get; set; }
164
        /// <summary>
165
        /// Document Id
166
        /// </summary>
167
        public Guid DocumentId { get; set; }
168
        /// <summary>
169
        /// Tags 
170
        /// </summary>
171
        public List<ExportTagInstanceInfo> Tags { get; set; } = new();
172
    }
173

    
174
    public class ExportTagInstanceInfo
175
    {
176
        /// <summary>
177
        /// Tag category
178
        /// </summary>
179
        public string Category { get; set; }
180
        /// <summary>
181
        /// Name of the tag (the primary identifier)
182
        /// </summary>
183
        public string TagName { get; set; }
184
        /// <summary>
185
        /// Name of the subtag (the primary identifier of a subtag)
186
        /// </summary>
187
        public string? SubTagName { get; set; }
188
        /// <summary>
189
        /// Id of the instance (in case of multipart tags multiple tags share the same Id)
190
        /// </summary>
191
        public Guid InstanceId { get; set; }
192
        /// <summary>
193
        /// Position in the original document
194
        /// </summary>
195
        public int FirstCharPosition { get; set; }
196
        /// <summary>
197
        /// Length of the section in chars
198
        /// </summary>
199
        public int Length { get; set; }
200
        /// <summary>
201
        /// Optional sentiment
202
        /// </summary>
203
        public string? Sentiment { get; set; }
204
    }
205
}
    (1-1/1)