12 |
12 |
import org.apache.solr.common.SolrDocument;
|
13 |
13 |
import org.apache.solr.common.SolrDocumentList;
|
14 |
14 |
import org.apache.solr.common.SolrInputDocument;
|
|
15 |
import org.slf4j.Logger;
|
|
16 |
import org.slf4j.LoggerFactory;
|
15 |
17 |
import org.springframework.http.MediaType;
|
16 |
18 |
import org.springframework.web.multipart.MultipartFile;
|
17 |
19 |
|
... | ... | |
45 |
47 |
private static final String FIELD_DOC_CONTENT = "doc_content";
|
46 |
48 |
private static final String FIELD_IMG_CONTENT = "img_content";
|
47 |
49 |
private static final float IMG_OUTPUT_QUALITY = 0.1f;
|
|
50 |
private static final Logger LOG = LoggerFactory.getLogger(IndexController.class);
|
48 |
51 |
|
49 |
52 |
private static final int SEARCH_QUERY_ROWS = 100;
|
50 |
53 |
|
... | ... | |
57 |
60 |
}
|
58 |
61 |
|
59 |
62 |
private void connect() {
|
|
63 |
LOG.info("Connecting to Solr");
|
60 |
64 |
String urlString = URL + ":8983/solr/" + CORE_NAME_DOCUMENTS;
|
61 |
65 |
solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
|
62 |
66 |
|
... | ... | |
65 |
69 |
}
|
66 |
70 |
|
67 |
71 |
public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException {
|
|
72 |
LOG.trace("SolrService method uploadFiles called");
|
68 |
73 |
Map<String, List<MultipartFile>> pairs = new HashMap<>();
|
69 |
74 |
|
|
75 |
LOG.debug("Preprocessing files for upload to Solr");
|
70 |
76 |
for (MultipartFile file: files) {
|
71 |
77 |
String filename = file.getOriginalFilename();
|
72 |
78 |
if (filename == null) {
|
73 |
|
System.out.println("Skipping file with empty filename.");
|
|
79 |
LOG.error("Skipping file with empty filename");
|
74 |
80 |
continue;
|
75 |
81 |
}
|
76 |
82 |
|
77 |
83 |
String type = file.getContentType();
|
78 |
84 |
if (type == null) {
|
79 |
|
System.out.println("Skipping file " + filename + " (unknown type).");
|
|
85 |
LOG.error("Skipping file " + filename + " (unknown type)");
|
80 |
86 |
continue;
|
81 |
87 |
}
|
82 |
88 |
|
... | ... | |
97 |
103 |
} else {
|
98 |
104 |
list.add(file);
|
99 |
105 |
}
|
100 |
|
|
|
106 |
LOG.debug("File " + filename + " added");
|
101 |
107 |
} else {
|
102 |
|
System.out.println("Not supported type of file! File " + filename + " is type " + type + ".");
|
|
108 |
LOG.error("Not supported type of file! File " + filename + " is type " + type);
|
103 |
109 |
}
|
104 |
110 |
}
|
105 |
|
|
106 |
111 |
return addFiles(pairs);
|
107 |
112 |
}
|
108 |
113 |
|
109 |
114 |
private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException {
|
|
115 |
LOG.trace("SolrService method addFiles called");
|
|
116 |
LOG.debug("Uploading files to Solr");
|
110 |
117 |
getFieldNamesFromDocumentCore();
|
111 |
118 |
addBasicFieldsToDocumentsCore();
|
112 |
119 |
|
... | ... | |
124 |
131 |
} else {
|
125 |
132 |
addToFilesCore(pair.get(1), first);
|
126 |
133 |
}
|
127 |
|
|
128 |
134 |
count += 2;
|
129 |
135 |
}
|
130 |
136 |
|
|
137 |
LOG.debug("Commiting Solr Documents and Files core changes");
|
131 |
138 |
solrClientFiles.commit();
|
132 |
139 |
solrClientDocuments.commit();
|
133 |
140 |
|
|
141 |
LOG.info("Files uploaded");
|
134 |
142 |
return count;
|
135 |
143 |
}
|
136 |
144 |
|
137 |
145 |
private void addToFilesCore(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException {
|
|
146 |
LOG.trace("SolrService method addToFilesCore called");
|
138 |
147 |
SolrInputDocument solrInputDocument = new SolrInputDocument();
|
139 |
148 |
|
|
149 |
LOG.debug("Adding a file pair to File core");
|
140 |
150 |
solrInputDocument.addField(FIELD_DOC_FILENAME, xmlFile.getOriginalFilename());
|
141 |
151 |
solrInputDocument.addField(FIELD_IMG_FILENAME, imageFile.getOriginalFilename());
|
142 |
152 |
solrInputDocument.addField(FIELD_DOC_CONTENT, xmlFile.getBytes());
|
... | ... | |
144 |
154 |
|
145 |
155 |
solrClientFiles.add(solrInputDocument);
|
146 |
156 |
|
|
157 |
LOG.debug("Loading XML file");
|
147 |
158 |
XMLLoader xmlLoader = new XMLLoader();
|
148 |
159 |
PcGts doc = xmlLoader.loadFile(xmlFile.getInputStream(), xmlFile.getOriginalFilename());
|
149 |
160 |
addToDocumentsCore(doc, imageFile.getOriginalFilename());
|
150 |
161 |
}
|
151 |
162 |
|
152 |
163 |
private void addAllFieldsToFilesCore() throws IOException, SolrServerException {
|
|
164 |
LOG.trace("SolrService method addAllFieldsToFilesCore called");
|
153 |
165 |
List<String> fieldNamesFilesCore = new ArrayList<>();
|
154 |
166 |
|
155 |
167 |
SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
|
... | ... | |
163 |
175 |
String[] filenames = new String[]{ FIELD_DOC_FILENAME, FIELD_IMG_FILENAME };
|
164 |
176 |
String[] contents = new String[]{ FIELD_DOC_CONTENT, FIELD_IMG_CONTENT };
|
165 |
177 |
|
|
178 |
LOG.debug("Adding all fields to File core");
|
166 |
179 |
for (String name: filenames) {
|
167 |
180 |
if (fieldNamesFilesCore.contains(name)) continue;
|
168 |
181 |
|
... | ... | |
170 |
183 |
SchemaRequest.Update request = getAddTextFieldRequest(name);
|
171 |
184 |
request.process(solrClientFiles);
|
172 |
185 |
} catch (Exception e) {
|
173 |
|
System.out.println("Couldn't add field = " + name);
|
174 |
|
e.printStackTrace();
|
|
186 |
LOG.error("Couldn't add field = " + name);
|
|
187 |
LOG.error(e.toString());
|
175 |
188 |
}
|
176 |
189 |
}
|
177 |
190 |
|
... | ... | |
188 |
201 |
SchemaRequest.Update request = new SchemaRequest.AddField(fieldAttributes);
|
189 |
202 |
request.process(solrClientFiles);
|
190 |
203 |
} catch (Exception e) {
|
191 |
|
System.out.println("Couldn't add field = " + name);
|
192 |
|
e.printStackTrace();
|
|
204 |
LOG.error("Couldn't add field = " + name);
|
|
205 |
LOG.error(e.toString());
|
193 |
206 |
}
|
194 |
207 |
}
|
195 |
208 |
}
|
196 |
209 |
|
197 |
210 |
private void addBasicFieldsToDocumentsCore() {
|
|
211 |
LOG.trace("SolrService method addBasicFieldsToDocumentsCore called");
|
|
212 |
LOG.debug("Adding basic fields to Documents core");
|
198 |
213 |
addFieldToDocumentCore(FIELD_DOC_FILENAME);
|
199 |
214 |
addFieldToDocumentCore(FIELD_IMG_FILENAME);
|
200 |
215 |
addFieldToDocumentCore(FIELD_TEXT_REGION);
|
... | ... | |
202 |
217 |
}
|
203 |
218 |
|
204 |
219 |
private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException {
|
|
220 |
LOG.trace("SolrService method getFieldNamesFromDocumentCore called");
|
|
221 |
LOG.debug("Getting field names from Documents core");
|
205 |
222 |
SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
|
206 |
223 |
SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientDocuments);
|
207 |
224 |
List<Map<String, Object>> fields = fieldsResponse.getFields();
|
... | ... | |
213 |
230 |
|
214 |
231 |
|
215 |
232 |
private void addFieldToDocumentCore(String name) {
|
216 |
|
if (fieldNamesDocumentCore.contains(name)) return;
|
|
233 |
LOG.trace("SolrService method addFieldToDocumentCore called");
|
217 |
234 |
|
|
235 |
if (fieldNamesDocumentCore.contains(name)) return;
|
|
236 |
LOG.debug("Adding field to Documents core");
|
218 |
237 |
try {
|
219 |
238 |
SchemaRequest.Update request = getAddTextFieldRequest(name);
|
220 |
239 |
request.process(solrClientDocuments);
|
221 |
240 |
fieldNamesDocumentCore.add(name);
|
222 |
241 |
} catch (Exception e) {
|
223 |
|
System.out.println("Couldn't add field = " + name);
|
224 |
|
e.printStackTrace();
|
|
242 |
LOG.error("Couldn't add field = " + name);
|
|
243 |
LOG.error(e.toString());
|
225 |
244 |
}
|
226 |
245 |
}
|
227 |
246 |
|
228 |
247 |
private SchemaRequest.Update getAddTextFieldRequest(String name) {
|
|
248 |
LOG.trace("SolrService method getAddTextFieldRequest called");
|
229 |
249 |
Map<String, Object> fieldAttributes = new LinkedHashMap<>();
|
230 |
250 |
fieldAttributes.put("name", name);
|
231 |
251 |
fieldAttributes.put("type", "text_general");
|
... | ... | |
235 |
255 |
}
|
236 |
256 |
|
237 |
257 |
public void deleteAll() throws IOException, SolrServerException {
|
|
258 |
LOG.trace("SolrService method getAddTextFieldRequest called");
|
|
259 |
LOG.info("Deleting ALL from Documents core");
|
238 |
260 |
solrClientDocuments.deleteByQuery("*:*");
|
239 |
261 |
solrClientDocuments.commit();
|
240 |
262 |
|
|
263 |
LOG.info("Deleting ALL from Files core");
|
241 |
264 |
solrClientFiles.deleteByQuery("*:*");
|
242 |
265 |
solrClientFiles.commit();
|
243 |
266 |
}
|
244 |
267 |
|
245 |
268 |
public byte[] getImageFromFilesCore(String filename) throws IOException, SolrServerException {
|
|
269 |
LOG.trace("SolrService method getImageFromFilesCore called");
|
|
270 |
LOG.debug("Getting an image from Files core");
|
246 |
271 |
SolrQuery solrQuery = new SolrQuery();
|
247 |
272 |
solrQuery.set("q", FIELD_IMG_FILENAME + ":" + "\"" + filename + "\"");
|
248 |
273 |
SolrDocumentList results = solrClientFiles.query(solrQuery).getResults();
|
249 |
274 |
if (results.getNumFound() < 1) {
|
250 |
|
System.out.println("Image " + filename + " not found!");
|
|
275 |
LOG.error("Image " + filename + " not found!");
|
251 |
276 |
} else if (results.getNumFound() > 1) {
|
252 |
|
System.out.println("Image " + filename + " has multiple files. Using first one...");
|
|
277 |
LOG.error("Image " + filename + " has multiple files. Using first one...");
|
253 |
278 |
}
|
254 |
279 |
|
255 |
280 |
SolrDocument result = results.get(0);
|
... | ... | |
257 |
282 |
}
|
258 |
283 |
|
259 |
284 |
public SearchResponse query(String query) throws IOException, SolrServerException {
|
|
285 |
LOG.trace("SolrService method query called");
|
|
286 |
LOG.debug("Processing query - creating Solr query");
|
260 |
287 |
SolrQuery solrQuery = new SolrQuery();
|
261 |
288 |
solrQuery.set("q", FIELD_TEXT_REGION + ":" + query);
|
262 |
289 |
solrQuery.setHighlight(true);
|
263 |
290 |
solrQuery.addHighlightField(PREFIX_TEXT_LINE + "*");
|
264 |
291 |
solrQuery.addHighlightField(PREFIX_TEXT_WORD + "*");
|
265 |
292 |
solrQuery.setRows(SEARCH_QUERY_ROWS);
|
|
293 |
LOG.debug("Processing query - sending Solr query");
|
266 |
294 |
QueryResponse response = solrClientDocuments.query(solrQuery);
|
267 |
295 |
|
268 |
296 |
SolrDocumentList docList = response.getResults();
|
... | ... | |
271 |
299 |
SearchResponse searchResponse = new SearchResponse();
|
272 |
300 |
searchResponse.setExpression(query);
|
273 |
301 |
|
|
302 |
LOG.debug("Processing query - creating Search response");
|
274 |
303 |
List<TextRegion> textRegions = new LinkedList<>();
|
275 |
304 |
for (SolrDocument solrDocument: docList) {
|
276 |
305 |
TextRegion textRegion = new TextRegion();
|
... | ... | |
384 |
413 |
}
|
385 |
414 |
|
386 |
415 |
searchResponse.setTextRegions(textRegions.toArray(new TextRegion[0]));
|
387 |
|
|
|
416 |
LOG.debug("Processing query - sending Search response");
|
|
417 |
LOG.info("Query received and processed");
|
388 |
418 |
return searchResponse;
|
389 |
419 |
}
|
390 |
420 |
|
391 |
421 |
private void addToDocumentsCore(PcGts document, String imageFilename) throws IOException, SolrServerException {
|
|
422 |
LOG.trace("SolrService method addToDocumentsCore called");
|
|
423 |
LOG.debug("Adding to Documents core");
|
392 |
424 |
List<TextRegion> regions = document.getPage().getTextRegions();
|
393 |
425 |
if (regions == null || regions.size() == 0) return;
|
394 |
426 |
|
... | ... | |
426 |
458 |
solrInputDocument.addField(wordPrefix + SUFFIX_COORDS, textWord.getWordCoords().getPointsString());
|
427 |
459 |
}
|
428 |
460 |
}
|
429 |
|
|
430 |
461 |
solrClientDocuments.add(solrInputDocument);
|
431 |
462 |
}
|
432 |
463 |
}
|
433 |
464 |
|
434 |
465 |
public String info() throws IOException, SolrServerException {
|
|
466 |
LOG.trace("SolrService method info called");
|
|
467 |
LOG.debug("Displaying information about files in Solr");
|
435 |
468 |
SolrQuery q = new SolrQuery("*:*");
|
436 |
469 |
q.setRows(0); // don't actually request any data
|
437 |
470 |
return "Number of documents in " + CORE_NAME_DOCUMENTS + " core: " + solrClientDocuments.query(q).getResults().getNumFound() +
|
... | ... | |
439 |
472 |
}
|
440 |
473 |
|
441 |
474 |
public List<String> listAllFiles() throws IOException, SolrServerException {
|
|
475 |
LOG.trace("SolrService method listAllFiles called");
|
|
476 |
LOG.debug("Displaying ALL files in Solr");
|
442 |
477 |
SolrQuery solrQuery = new SolrQuery();
|
443 |
478 |
solrQuery.set("q", FIELD_DOC_FILENAME + ":*");
|
444 |
479 |
solrQuery.setRows(500); // todo not a magic number!
|
Re #7813: Logování BE
- vytvořeno logování BE