1
|
package cz.zcu.kiv.aswi.fulltextsearch;
|
2
|
|
3
|
import cz.zcu.kiv.aswi.fulltextsearch.document.*;
|
4
|
import cz.zcu.kiv.aswi.fulltextsearch.model.SearchResponse;
|
5
|
import javaxt.io.Image;
|
6
|
import org.apache.solr.client.solrj.SolrQuery;
|
7
|
import org.apache.solr.client.solrj.SolrServerException;
|
8
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
9
|
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
|
10
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
11
|
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
|
12
|
import org.apache.solr.common.SolrDocument;
|
13
|
import org.apache.solr.common.SolrDocumentList;
|
14
|
import org.apache.solr.common.SolrInputDocument;
|
15
|
import org.slf4j.Logger;
|
16
|
import org.slf4j.LoggerFactory;
|
17
|
import org.springframework.http.MediaType;
|
18
|
import org.springframework.web.multipart.MultipartFile;
|
19
|
|
20
|
import javax.imageio.ImageIO;
|
21
|
import javax.xml.bind.JAXBException;
|
22
|
import java.awt.image.BufferedImage;
|
23
|
import java.io.ByteArrayInputStream;
|
24
|
import java.io.IOException;
|
25
|
import java.util.*;
|
26
|
import java.util.stream.Collectors;
|
27
|
|
28
|
public class SolrService {
|
29
|
|
30
|
// for docker
|
31
|
private static final String URL = "http://fulltextsearch-solr";
|
32
|
|
33
|
// for localhost
|
34
|
//private static final String URL = "http://localhost";
|
35
|
|
36
|
private static final String CORE_NAME_DOCUMENTS = "documents";
|
37
|
private static final String CORE_NAME_FILES = "files";
|
38
|
|
39
|
private static final String PREFIX_TEXT_LINE = "text_line_";
|
40
|
private static final String PREFIX_TEXT_WORD = "text_word_";
|
41
|
private static final String SUFFIX_COORDS = "_coords";
|
42
|
|
43
|
private static final String FIELD_DOC_FILENAME = "doc_filename";
|
44
|
private static final String FIELD_IMG_FILENAME = "img_filename";
|
45
|
private static final String FIELD_TEXT_REGION = "text_region";
|
46
|
private static final String FIELD_TEXT_REGION_COORDS = "text_region" + SUFFIX_COORDS;
|
47
|
private static final String FIELD_DOC_CONTENT = "doc_content";
|
48
|
private static final String FIELD_IMG_CONTENT = "img_content";
|
49
|
private static final float IMG_OUTPUT_QUALITY = 0.5f;
|
50
|
private static final Logger LOG = LoggerFactory.getLogger(IndexController.class);
|
51
|
|
52
|
private static final int SEARCH_QUERY_ROWS = 100;
|
53
|
|
54
|
private HttpSolrClient solrClientDocuments;
|
55
|
private HttpSolrClient solrClientFiles;
|
56
|
private List<String> fieldNamesDocumentCore = new ArrayList<>();
|
57
|
|
58
|
public SolrService() {
|
59
|
connect();
|
60
|
}
|
61
|
|
62
|
private void connect() {
|
63
|
LOG.info("Connecting to Solr");
|
64
|
String urlString = URL + ":8983/solr/" + CORE_NAME_DOCUMENTS;
|
65
|
solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
|
66
|
|
67
|
urlString = URL + ":8983/solr/" + CORE_NAME_FILES;
|
68
|
solrClientFiles = new HttpSolrClient.Builder(urlString).build();
|
69
|
}
|
70
|
|
71
|
public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException {
|
72
|
LOG.trace("SolrService method uploadFiles called");
|
73
|
Map<String, List<MultipartFile>> pairs = new HashMap<>();
|
74
|
|
75
|
LOG.debug("Preprocessing files for upload to Solr");
|
76
|
for (MultipartFile file: files) {
|
77
|
String filename = file.getOriginalFilename();
|
78
|
if (filename == null) {
|
79
|
LOG.error("Skipping file with empty filename");
|
80
|
continue;
|
81
|
}
|
82
|
|
83
|
String type = file.getContentType();
|
84
|
if (type == null) {
|
85
|
LOG.error("Skipping file " + filename + " (unknown type)");
|
86
|
continue;
|
87
|
}
|
88
|
|
89
|
int index = filename.lastIndexOf('.');
|
90
|
String filenameWithoutExtension = filename;
|
91
|
if (index >= 0) {
|
92
|
filenameWithoutExtension = filename.substring(0, index);
|
93
|
}
|
94
|
|
95
|
if (type.equals(MediaType.IMAGE_JPEG_VALUE) || type.equals(MediaType.IMAGE_PNG_VALUE)
|
96
|
|| type.equals(MediaType.APPLICATION_XML_VALUE) || type.equals(MediaType.TEXT_XML_VALUE)) {
|
97
|
|
98
|
List<MultipartFile> list = pairs.get(filenameWithoutExtension);
|
99
|
if (list == null) {
|
100
|
list = new LinkedList<>();
|
101
|
list.add(file);
|
102
|
pairs.put(filenameWithoutExtension, list);
|
103
|
} else {
|
104
|
list.add(file);
|
105
|
}
|
106
|
LOG.debug("File " + filename + " added");
|
107
|
} else {
|
108
|
LOG.error("Not supported type of file! File " + filename + " is type " + type);
|
109
|
}
|
110
|
}
|
111
|
return addFiles(pairs);
|
112
|
}
|
113
|
|
114
|
private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException {
|
115
|
LOG.trace("SolrService method addFiles called");
|
116
|
LOG.debug("Uploading files to Solr");
|
117
|
getFieldNamesFromDocumentCore();
|
118
|
addBasicFieldsToDocumentsCore();
|
119
|
|
120
|
addAllFieldsToFilesCore();
|
121
|
|
122
|
int count = 0;
|
123
|
Set<String> keys = pairs.keySet();
|
124
|
for (String key: keys) {
|
125
|
List<MultipartFile> pair = pairs.get(key);
|
126
|
if (pair.size() != 2) continue;
|
127
|
|
128
|
MultipartFile first = pair.get(0);
|
129
|
if (first.getContentType().equals(MediaType.APPLICATION_XML_VALUE) || first.getContentType().equals(MediaType.TEXT_XML_VALUE)) {
|
130
|
addToFilesCore(first, pair.get(1));
|
131
|
} else {
|
132
|
addToFilesCore(pair.get(1), first);
|
133
|
}
|
134
|
count += 2;
|
135
|
}
|
136
|
|
137
|
LOG.debug("Commiting Solr Documents and Files core changes");
|
138
|
solrClientFiles.commit();
|
139
|
solrClientDocuments.commit();
|
140
|
|
141
|
LOG.info("Files uploaded");
|
142
|
return count;
|
143
|
}
|
144
|
|
145
|
private void addToFilesCore(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException {
|
146
|
LOG.trace("SolrService method addToFilesCore called");
|
147
|
SolrInputDocument solrInputDocument = new SolrInputDocument();
|
148
|
|
149
|
LOG.debug("Adding a file pair to File core");
|
150
|
solrInputDocument.addField(FIELD_DOC_FILENAME, xmlFile.getOriginalFilename());
|
151
|
solrInputDocument.addField(FIELD_IMG_FILENAME, imageFile.getOriginalFilename());
|
152
|
solrInputDocument.addField(FIELD_DOC_CONTENT, xmlFile.getBytes());
|
153
|
solrInputDocument.addField(FIELD_IMG_CONTENT, Base64.getEncoder().encode(imageFile.getBytes()));
|
154
|
|
155
|
solrClientFiles.add(solrInputDocument);
|
156
|
|
157
|
LOG.debug("Loading XML file");
|
158
|
XMLLoader xmlLoader = new XMLLoader();
|
159
|
PcGts doc = xmlLoader.loadFile(xmlFile.getInputStream(), xmlFile.getOriginalFilename());
|
160
|
addToDocumentsCore(doc, imageFile.getOriginalFilename());
|
161
|
}
|
162
|
|
163
|
private void addAllFieldsToFilesCore() throws IOException, SolrServerException {
|
164
|
LOG.trace("SolrService method addAllFieldsToFilesCore called");
|
165
|
List<String> fieldNamesFilesCore = new ArrayList<>();
|
166
|
|
167
|
SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
|
168
|
SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientFiles);
|
169
|
List<Map<String, Object>> fields = fieldsResponse.getFields();
|
170
|
|
171
|
for (Map<String, Object> field: fields) {
|
172
|
fieldNamesFilesCore.add(field.get("name").toString());
|
173
|
}
|
174
|
|
175
|
String[] filenames = new String[]{ FIELD_DOC_FILENAME, FIELD_IMG_FILENAME };
|
176
|
String[] contents = new String[]{ FIELD_DOC_CONTENT, FIELD_IMG_CONTENT };
|
177
|
|
178
|
LOG.debug("Adding all fields to File core");
|
179
|
for (String name: filenames) {
|
180
|
if (fieldNamesFilesCore.contains(name)) continue;
|
181
|
|
182
|
try {
|
183
|
SchemaRequest.Update request = getAddTextFieldRequest(name);
|
184
|
request.process(solrClientFiles);
|
185
|
} catch (Exception e) {
|
186
|
LOG.error("Couldn't add field = " + name);
|
187
|
LOG.error(e.toString());
|
188
|
}
|
189
|
}
|
190
|
|
191
|
for (String name: contents) {
|
192
|
if (fieldNamesFilesCore.contains(name)) continue;
|
193
|
|
194
|
try {
|
195
|
Map<String, Object> fieldAttributes = new LinkedHashMap<>();
|
196
|
fieldAttributes.put("name", name);
|
197
|
fieldAttributes.put("type", "binary");
|
198
|
fieldAttributes.put("indexed", false);
|
199
|
fieldAttributes.put("stored", true);
|
200
|
fieldAttributes.put("multiValued", false);
|
201
|
SchemaRequest.Update request = new SchemaRequest.AddField(fieldAttributes);
|
202
|
request.process(solrClientFiles);
|
203
|
} catch (Exception e) {
|
204
|
LOG.error("Couldn't add field = " + name);
|
205
|
LOG.error(e.toString());
|
206
|
}
|
207
|
}
|
208
|
}
|
209
|
|
210
|
private void addBasicFieldsToDocumentsCore() {
|
211
|
LOG.trace("SolrService method addBasicFieldsToDocumentsCore called");
|
212
|
LOG.debug("Adding basic fields to Documents core");
|
213
|
addFieldToDocumentCore(FIELD_DOC_FILENAME);
|
214
|
addFieldToDocumentCore(FIELD_IMG_FILENAME);
|
215
|
addFieldToDocumentCore(FIELD_TEXT_REGION);
|
216
|
addFieldToDocumentCore(FIELD_TEXT_REGION_COORDS);
|
217
|
}
|
218
|
|
219
|
private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException {
|
220
|
LOG.trace("SolrService method getFieldNamesFromDocumentCore called");
|
221
|
LOG.debug("Getting field names from Documents core");
|
222
|
SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
|
223
|
SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientDocuments);
|
224
|
List<Map<String, Object>> fields = fieldsResponse.getFields();
|
225
|
|
226
|
for (Map<String, Object> field: fields) {
|
227
|
fieldNamesDocumentCore.add(field.get("name").toString());
|
228
|
}
|
229
|
}
|
230
|
|
231
|
|
232
|
private void addFieldToDocumentCore(String name) {
|
233
|
LOG.trace("SolrService method addFieldToDocumentCore called");
|
234
|
|
235
|
if (fieldNamesDocumentCore.contains(name)) return;
|
236
|
LOG.debug("Adding field to Documents core");
|
237
|
try {
|
238
|
SchemaRequest.Update request = getAddTextFieldRequest(name);
|
239
|
request.process(solrClientDocuments);
|
240
|
fieldNamesDocumentCore.add(name);
|
241
|
} catch (Exception e) {
|
242
|
LOG.error("Couldn't add field = " + name);
|
243
|
LOG.error(e.toString());
|
244
|
}
|
245
|
}
|
246
|
|
247
|
private SchemaRequest.Update getAddTextFieldRequest(String name) {
|
248
|
LOG.trace("SolrService method getAddTextFieldRequest called");
|
249
|
Map<String, Object> fieldAttributes = new LinkedHashMap<>();
|
250
|
fieldAttributes.put("name", name);
|
251
|
fieldAttributes.put("type", "text_general");
|
252
|
fieldAttributes.put("stored", true);
|
253
|
fieldAttributes.put("multiValued", false);
|
254
|
return new SchemaRequest.AddField(fieldAttributes);
|
255
|
}
|
256
|
|
257
|
public void deleteAll() throws IOException, SolrServerException {
|
258
|
LOG.trace("SolrService method getAddTextFieldRequest called");
|
259
|
LOG.info("Deleting ALL from Documents core");
|
260
|
solrClientDocuments.deleteByQuery("*:*");
|
261
|
solrClientDocuments.commit();
|
262
|
|
263
|
LOG.info("Deleting ALL from Files core");
|
264
|
solrClientFiles.deleteByQuery("*:*");
|
265
|
solrClientFiles.commit();
|
266
|
}
|
267
|
|
268
|
public byte[] getImageFromFilesCore(String filename) throws IOException, SolrServerException {
|
269
|
LOG.trace("SolrService method getImageFromFilesCore called");
|
270
|
LOG.debug("Getting an image from Files core");
|
271
|
SolrQuery solrQuery = new SolrQuery();
|
272
|
solrQuery.set("q", FIELD_IMG_FILENAME + ":" + "\"" + filename + "\"");
|
273
|
SolrDocumentList results = solrClientFiles.query(solrQuery).getResults();
|
274
|
if (results.getNumFound() < 1) {
|
275
|
LOG.error("Image " + filename + " not found!");
|
276
|
} else if (results.getNumFound() > 1) {
|
277
|
LOG.error("Image " + filename + " has multiple files. Using first one...");
|
278
|
}
|
279
|
|
280
|
SolrDocument result = results.get(0);
|
281
|
return (byte[]) result.getFieldValue(FIELD_IMG_CONTENT);
|
282
|
}
|
283
|
|
284
|
public SearchResponse query(String query) throws IOException, SolrServerException {
|
285
|
LOG.trace("SolrService method query called");
|
286
|
LOG.debug("Processing query - creating Solr query");
|
287
|
SolrQuery solrQuery = new SolrQuery();
|
288
|
solrQuery.set("q", FIELD_TEXT_REGION + ":" + query);
|
289
|
solrQuery.setHighlight(true);
|
290
|
solrQuery.addHighlightField(PREFIX_TEXT_LINE + "*");
|
291
|
solrQuery.addHighlightField(PREFIX_TEXT_WORD + "*");
|
292
|
solrQuery.setRows(SEARCH_QUERY_ROWS);
|
293
|
LOG.debug("Processing query - sending Solr query");
|
294
|
QueryResponse response = solrClientDocuments.query(solrQuery);
|
295
|
|
296
|
SolrDocumentList docList = response.getResults();
|
297
|
Map<String, Map<String, List<String>>> highlight = response.getHighlighting();
|
298
|
|
299
|
SearchResponse searchResponse = new SearchResponse();
|
300
|
searchResponse.setExpression(query);
|
301
|
|
302
|
LOG.debug("Processing query - creating Search response");
|
303
|
List<TextRegion> textRegions = new LinkedList<>();
|
304
|
for (SolrDocument solrDocument: docList) {
|
305
|
TextRegion textRegion = new TextRegion();
|
306
|
|
307
|
// text, coords
|
308
|
textRegion.setRegionText((String) solrDocument.getFieldValue(FIELD_TEXT_REGION));
|
309
|
String pointsString = (String) solrDocument.getFieldValue(FIELD_TEXT_REGION_COORDS);
|
310
|
Point[] points = Coords.parsePointString(pointsString);
|
311
|
textRegion.setRegionCoords(new Coords(points));
|
312
|
|
313
|
// filename
|
314
|
String documentName = ((String) solrDocument.getFieldValue(FIELD_DOC_FILENAME)).replaceAll(".xml", "");
|
315
|
textRegion.setDocumentName(documentName);
|
316
|
|
317
|
// text lines and text words
|
318
|
Map<String, List<String>> map = highlight.get(solrDocument.getFieldValue("id").toString());
|
319
|
Set<String> linesAndWordsNames = map.keySet();
|
320
|
|
321
|
Set<String> linesNames = linesAndWordsNames.stream().filter(name -> name.startsWith(PREFIX_TEXT_LINE)).collect(Collectors.toSet());
|
322
|
Set<String> wordNames = linesAndWordsNames.stream().filter(name -> name.startsWith(PREFIX_TEXT_WORD)).collect(Collectors.toSet());
|
323
|
|
324
|
Map<String, TextLine> textLines = new HashMap<>();
|
325
|
for (String lineName: linesNames) {
|
326
|
TextLine textLine = new TextLine();
|
327
|
|
328
|
textLine.setLineText((String) solrDocument.getFieldValue(lineName));
|
329
|
String coordsValue = (String) solrDocument.getFieldValue(lineName + SUFFIX_COORDS);
|
330
|
Point[] linePoints = Coords.parsePointString(coordsValue);
|
331
|
|
332
|
//-----
|
333
|
for (Point p: linePoints){
|
334
|
p.setX(p.getX() - textRegion.getRegionCoords().getPoints()[0].getX());
|
335
|
p.setY(p.getY() - textRegion.getRegionCoords().getPoints()[0].getY());
|
336
|
}
|
337
|
//-----
|
338
|
|
339
|
textLine.setLineCoords(new Coords(linePoints));
|
340
|
|
341
|
textLines.put(lineName, textLine);
|
342
|
}
|
343
|
|
344
|
Map<String, List<TextWord>> textWords = new HashMap<>();
|
345
|
for (String wordName: wordNames) {
|
346
|
TextWord textWord = new TextWord();
|
347
|
|
348
|
textWord.setWordText((String) solrDocument.getFieldValue(wordName));
|
349
|
String coordsValue = (String) solrDocument.getFieldValue(wordName + SUFFIX_COORDS);
|
350
|
Point[] wordPoints = Coords.parsePointString(coordsValue);
|
351
|
|
352
|
//-----
|
353
|
for (Point p: wordPoints){
|
354
|
p.setX(p.getX() - textRegion.getRegionCoords().getPoints()[0].getX());
|
355
|
p.setY(p.getY() - textRegion.getRegionCoords().getPoints()[0].getY());
|
356
|
}
|
357
|
//-----
|
358
|
|
359
|
textWord.setWordCoords(new Coords(wordPoints));
|
360
|
|
361
|
String lineName = wordName.substring(wordName.indexOf(PREFIX_TEXT_LINE));
|
362
|
|
363
|
List<TextWord> lineWords = textWords.get(lineName);
|
364
|
if (lineWords == null) {
|
365
|
lineWords = new LinkedList<>();
|
366
|
}
|
367
|
|
368
|
lineWords.add(textWord);
|
369
|
textWords.put(lineName, lineWords);
|
370
|
}
|
371
|
|
372
|
for (String lineName: textWords.keySet()) {
|
373
|
TextLine line = textLines.get(lineName);
|
374
|
line.setTextWords(textWords.get(lineName).toArray(new TextWord[0]));
|
375
|
}
|
376
|
|
377
|
textRegion.setTextLines(textLines.values().toArray(new TextLine[0]));
|
378
|
|
379
|
// cropping image
|
380
|
String imgFilename = (String) solrDocument.getFieldValue(FIELD_IMG_FILENAME);
|
381
|
byte[] imageBytes = getImageFromFilesCore(imgFilename);
|
382
|
|
383
|
BufferedImage img = ImageIO.read(new ByteArrayInputStream(Base64.getDecoder().decode(imageBytes)));
|
384
|
Image cropImg = new Image(img);
|
385
|
cropImg.crop(textRegion.getRegionCoords().getPoints()[0].getX(),textRegion.getRegionCoords().getPoints()[0].getY(),
|
386
|
textRegion.getRegionCoords().getPoints()[1].getX()-textRegion.getRegionCoords().getPoints()[0].getX(),
|
387
|
textRegion.getRegionCoords().getPoints()[2].getY()-textRegion.getRegionCoords().getPoints()[0].getY());
|
388
|
|
389
|
cropImg.setOutputQuality(IMG_OUTPUT_QUALITY);
|
390
|
|
391
|
imageBytes = cropImg.getByteArray();
|
392
|
imageBytes = Base64.getEncoder().encode(imageBytes);
|
393
|
|
394
|
// recalculating Coords - during init for words and lines
|
395
|
for (int k = 1; k<textRegion.getRegionCoords().getPoints().length; k++){
|
396
|
textRegion.getRegionCoords().getPoints()[k].setX(textRegion.getRegionCoords().getPoints()[k].getX() - textRegion.getRegionCoords().getPoints()[0].getX());
|
397
|
textRegion.getRegionCoords().getPoints()[k].setY(textRegion.getRegionCoords().getPoints()[k].getY() - textRegion.getRegionCoords().getPoints()[0].getY());
|
398
|
}
|
399
|
textRegion.getRegionCoords().getPoints()[0].setX(0);
|
400
|
textRegion.getRegionCoords().getPoints()[0].setY(0);
|
401
|
|
402
|
textRegion.setImageCut(new String(imageBytes));
|
403
|
|
404
|
// image size
|
405
|
BufferedImage image = ImageIO.read(new ByteArrayInputStream(Base64.getDecoder().decode(imageBytes)));
|
406
|
DocumentSize documentSize = new DocumentSize(image.getHeight(), image.getWidth());
|
407
|
textRegion.setDocumentSize(documentSize);
|
408
|
|
409
|
// generate random string id for response of given length
|
410
|
textRegion.setRandomId(Utils.generateRandomStringId(10));
|
411
|
|
412
|
textRegions.add(textRegion);
|
413
|
}
|
414
|
|
415
|
searchResponse.setTextRegions(textRegions.toArray(new TextRegion[0]));
|
416
|
LOG.debug("Processing query - sending Search response");
|
417
|
LOG.info("Query received and processed");
|
418
|
return searchResponse;
|
419
|
}
|
420
|
|
421
|
private void addToDocumentsCore(PcGts document, String imageFilename) throws IOException, SolrServerException {
|
422
|
LOG.trace("SolrService method addToDocumentsCore called");
|
423
|
LOG.debug("Adding to Documents core");
|
424
|
List<TextRegion> regions = document.getPage().getTextRegions();
|
425
|
if (regions == null || regions.size() == 0) return;
|
426
|
|
427
|
for (int i = 0; i < regions.size(); i++) {
|
428
|
TextRegion textRegion = regions.get(i);
|
429
|
SolrInputDocument solrInputDocument = new SolrInputDocument();
|
430
|
|
431
|
solrInputDocument.addField(FIELD_DOC_FILENAME, document.getFilename());
|
432
|
solrInputDocument.addField(FIELD_IMG_FILENAME, imageFilename);
|
433
|
solrInputDocument.addField(FIELD_TEXT_REGION, textRegion.getTextEquiv().getUnicode());
|
434
|
solrInputDocument.addField(FIELD_TEXT_REGION_COORDS, textRegion.getRegionCoords().getPointsString());
|
435
|
|
436
|
TextLine[] textLines = textRegion.getTextLines();
|
437
|
if (textLines == null || textLines.length == 0) continue;
|
438
|
for (int j = 0; j < textLines.length; j++) {
|
439
|
TextLine textLine = textLines[j];
|
440
|
String prefix = PREFIX_TEXT_LINE + j;
|
441
|
|
442
|
addFieldToDocumentCore(prefix);
|
443
|
solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
|
444
|
|
445
|
addFieldToDocumentCore(prefix + SUFFIX_COORDS);
|
446
|
solrInputDocument.addField(prefix + SUFFIX_COORDS, textLine.getLineCoords().getPointsString());
|
447
|
|
448
|
TextWord[] textWords = textLine.getTextWords();
|
449
|
if (textWords == null || textWords.length == 0) continue;
|
450
|
for (int k = 0; k < textWords.length; k++) {
|
451
|
TextWord textWord = textWords[k];
|
452
|
String wordPrefix = PREFIX_TEXT_WORD + k + "_" + prefix;
|
453
|
|
454
|
addFieldToDocumentCore(wordPrefix);
|
455
|
solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode());
|
456
|
|
457
|
addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS);
|
458
|
solrInputDocument.addField(wordPrefix + SUFFIX_COORDS, textWord.getWordCoords().getPointsString());
|
459
|
}
|
460
|
}
|
461
|
solrClientDocuments.add(solrInputDocument);
|
462
|
}
|
463
|
}
|
464
|
|
465
|
public String info() throws IOException, SolrServerException {
|
466
|
LOG.trace("SolrService method info called");
|
467
|
LOG.debug("Displaying information about files in Solr");
|
468
|
SolrQuery q = new SolrQuery("*:*");
|
469
|
q.setRows(0); // don't actually request any data
|
470
|
return "Number of documents in " + CORE_NAME_DOCUMENTS + " core: " + solrClientDocuments.query(q).getResults().getNumFound() +
|
471
|
"\nNumber of documents in " + CORE_NAME_FILES + " core: " + solrClientFiles.query(q).getResults().getNumFound();
|
472
|
}
|
473
|
|
474
|
public List<String> listAllFiles() throws IOException, SolrServerException {
|
475
|
LOG.trace("SolrService method listAllFiles called");
|
476
|
LOG.debug("Displaying ALL files in Solr");
|
477
|
SolrQuery solrQuery = new SolrQuery();
|
478
|
solrQuery.set("q", FIELD_DOC_FILENAME + ":*");
|
479
|
solrQuery.setRows(500); // todo not a magic number!
|
480
|
SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults();
|
481
|
return docList.stream().map(doc -> doc.getFieldValue(FIELD_DOC_FILENAME).toString().replaceAll(".xml", "")).collect(Collectors.toList());
|
482
|
}
|
483
|
|
484
|
public String listSingleFile(String documentName) throws IOException, SolrServerException {
|
485
|
LOG.trace("SolrService method listSingleFile called");
|
486
|
LOG.debug("Displaying single file in Solr");
|
487
|
SolrQuery solrQuery = new SolrQuery();
|
488
|
solrQuery.set("q", FIELD_DOC_FILENAME + ":*");
|
489
|
//solrQuery.set("q", FIELD_DOC_FILENAME + ":" + documentName + ".xml");
|
490
|
solrQuery.setRows(500); // todo not a magic number!
|
491
|
SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults();
|
492
|
String ret = "";
|
493
|
|
494
|
for (SolrDocument solrDocument : docList){
|
495
|
if (solrDocument.getFieldValue(FIELD_DOC_FILENAME).toString().equals(documentName + ".xml")){
|
496
|
String imgFilename = (String) solrDocument.getFieldValue(FIELD_IMG_FILENAME);
|
497
|
byte[] imageBytes = getImageFromFilesCore(imgFilename);
|
498
|
|
499
|
BufferedImage img = ImageIO.read(new ByteArrayInputStream(Base64.getDecoder().decode(imageBytes)));
|
500
|
Image cropImg = new Image(img);
|
501
|
cropImg.setOutputQuality(IMG_OUTPUT_QUALITY);
|
502
|
|
503
|
imageBytes = cropImg.getByteArray();
|
504
|
imageBytes = Base64.getEncoder().encode(imageBytes);
|
505
|
ret = new String(imageBytes);
|
506
|
break;
|
507
|
}
|
508
|
}
|
509
|
if (ret.equals("")){
|
510
|
LOG.error("File was NOT found in Solr");
|
511
|
} else{
|
512
|
LOG.debug("File was found in Solr");
|
513
|
}
|
514
|
return ret;
|
515
|
}
|
516
|
|
517
|
}
|