Revize 1e6d072b
Přidáno uživatelem Jitka Poubová před téměř 5 roky(ů)
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/FulltextSearchApplication.java | ||
---|---|---|
11 | 11 |
private static final Logger LOG = LoggerFactory.getLogger(FulltextSearchApplication.class); |
12 | 12 |
|
13 | 13 |
public static void main(String[] args) { |
14 |
|
|
15 | 14 |
LOG.info("Starting Spring BE"); |
16 | 15 |
SpringApplication.run(FulltextSearchApplication.class, args); |
17 | 16 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/IndexController.java | ||
---|---|---|
1 | 1 |
package cz.zcu.kiv.aswi.fulltextsearch; |
2 | 2 |
|
3 | 3 |
import cz.zcu.kiv.aswi.fulltextsearch.model.*; |
4 |
import org.apache.solr.client.solrj.SolrServerException; |
|
4 | 5 |
import org.springframework.http.HttpStatus; |
5 | 6 |
import org.springframework.http.ResponseEntity; |
6 | 7 |
import org.springframework.web.bind.annotation.*; |
... | ... | |
8 | 9 |
import org.slf4j.Logger; |
9 | 10 |
import org.slf4j.LoggerFactory; |
10 | 11 |
|
12 |
import javax.xml.bind.JAXBException; |
|
13 |
import javax.xml.stream.XMLStreamException; |
|
14 |
import java.io.IOException; |
|
11 | 15 |
import java.util.ArrayList; |
12 | 16 |
import java.util.Date; |
13 | 17 |
import java.util.List; |
... | ... | |
17 | 21 |
public class IndexController { |
18 | 22 |
|
19 | 23 |
private static final Logger LOG = LoggerFactory.getLogger(IndexController.class); |
24 |
|
|
20 | 25 |
private SolrService solrService = new SolrService(); |
21 | 26 |
|
22 | 27 |
@PostMapping("/") |
23 |
public SearchResponse index(@RequestBody SearchRequest searchRequest) { |
|
24 |
LOG.trace("Method index called"); |
|
28 |
public SearchResponse search(@RequestBody SearchRequest searchRequest) { |
|
25 | 29 |
try { |
26 |
LOG.debug("Calling SolrService and its Query method"); |
|
27 |
return solrService.query(searchRequest); |
|
30 |
LOG.trace("Method search called"); |
|
31 |
|
|
32 |
LOG.debug("Calling SolrService and its search method"); |
|
33 |
return solrService.search(searchRequest); |
|
34 |
} catch (SolrServerException e) { |
|
35 |
LOG.error("SolrServerException: " + e.toString()); |
|
36 |
} catch (IOException e) { |
|
37 |
LOG.error("IOException: " + e.toString()); |
|
28 | 38 |
} catch (Exception e) { |
29 |
LOG.error("Cannot get method Query from SolrService returning NULL"); |
|
30 |
LOG.error(e.toString()); |
|
31 |
return null; |
|
39 |
LOG.error("Unknown exception: " + e.toString()); |
|
32 | 40 |
} |
41 |
|
|
42 |
LOG.error("Cannot get method search from SolrService returning NULL"); |
|
43 |
return null; |
|
33 | 44 |
} |
34 | 45 |
|
35 | 46 |
@PostMapping("/upload") |
... | ... | |
41 | 52 |
int uploaded = solrService.uploadFiles(files); |
42 | 53 |
message = "Successfully uploaded " + uploaded + " files"; |
43 | 54 |
return ResponseEntity.status(HttpStatus.OK).body(new ResponseMessage(message)); |
55 |
} catch (IOException e) { |
|
56 |
LOG.error("IOException: " + e.toString()); |
|
57 |
message = "Could not upload the files! " + e.toString(); |
|
58 |
} catch (SolrServerException e) { |
|
59 |
LOG.error("SolrServerException: " + e.toString()); |
|
60 |
message = "Could not upload the files! " + e.toString(); |
|
61 |
} catch (XMLStreamException e) { |
|
62 |
LOG.error("XMLStreamException: " + e.toString()); |
|
63 |
message = "Could not upload the files! " + e.toString(); |
|
64 |
} catch (JAXBException e) { |
|
65 |
LOG.error("JAXBException: " + e.toString()); |
|
66 |
message = "Could not upload the files! " + e.toString(); |
|
44 | 67 |
} catch (Exception e) { |
45 |
LOG.error("Cannot get method uploadFiles from SolrService"); |
|
46 |
LOG.error(e.toString()); |
|
68 |
LOG.error("Unknown exception: " + e.toString()); |
|
47 | 69 |
message = "Could not upload the files! " + e.toString(); |
48 |
return ResponseEntity.status(HttpStatus.EXPECTATION_FAILED).body(new ResponseMessage(message)); |
|
49 | 70 |
} |
71 |
|
|
72 |
LOG.error("Cannot get method uploadFiles from SolrService"); |
|
73 |
return ResponseEntity.status(HttpStatus.EXPECTATION_FAILED).body(new ResponseMessage(message)); |
|
50 | 74 |
} |
51 | 75 |
|
52 | 76 |
@GetMapping("/documents") |
... | ... | |
57 | 81 |
try { |
58 | 82 |
LOG.debug("Calling SolrService and its listAllFiles method"); |
59 | 83 |
List<String> filenames = solrService.listAllFiles(); |
60 |
int id = 0; // todo
|
|
84 |
int id = 0; |
|
61 | 85 |
for (String filename: filenames) { |
62 |
response.add(new DocumentResponse(filename, id++, new Date(), new Date()));
|
|
86 |
response.add(new DocumentResponse(filename, id++, new Date())); |
|
63 | 87 |
} |
88 |
|
|
89 |
return response; |
|
90 |
} catch (SolrServerException e) { |
|
91 |
LOG.error("SolrServerException: " + e.toString()); |
|
92 |
} catch (IOException e) { |
|
93 |
LOG.error("IOException: " + e.toString()); |
|
64 | 94 |
} catch (Exception e) { |
65 |
LOG.error("Cannot get method listAllFiles from SolrService"); |
|
66 |
LOG.error(e.toString()); |
|
95 |
LOG.error("Unknown exception: " + e.toString()); |
|
67 | 96 |
} |
97 |
|
|
98 |
LOG.error("Cannot get method listAllFiles from SolrService"); |
|
68 | 99 |
return response; |
69 | 100 |
} |
70 | 101 |
|
... | ... | |
76 | 107 |
try { |
77 | 108 |
LOG.debug("Calling SolrService and its listSingleFile method"); |
78 | 109 |
ret = solrService.listSingleFile(documentName); |
110 |
return new DocumentDetailResponse(ret); |
|
111 |
} catch (SolrServerException e) { |
|
112 |
LOG.error("SolrServerException: " + e.toString()); |
|
113 |
} catch (IOException e) { |
|
114 |
LOG.error("IOException: " + e.toString()); |
|
79 | 115 |
} catch (Exception e) { |
80 |
LOG.error("Cannot get method listSingleFile from SolrService"); |
|
81 |
LOG.error(e.toString()); |
|
116 |
LOG.error("Unknown exception: " + e.toString()); |
|
82 | 117 |
} |
83 |
return new DocumentDetailResponse(ret); |
|
84 |
} |
|
85 | 118 |
|
86 |
@GetMapping("/delete") |
|
87 |
public String delete() { |
|
88 |
LOG.trace("Method delete called"); |
|
89 |
String response; |
|
90 |
|
|
91 |
try { |
|
92 |
LOG.debug("Calling SolrService and its deleteAll method"); |
|
93 |
solrService.deleteAll(); |
|
94 |
response = "All documents have been deleted."; |
|
95 |
} catch (Exception e) { |
|
96 |
LOG.error("Cannot get method deleteAll from SolrService"); |
|
97 |
LOG.error(e.toString()); |
|
98 |
response = "Unknown exception"; |
|
99 |
} |
|
100 |
return response; |
|
119 |
return new DocumentDetailResponse(ret); |
|
101 | 120 |
} |
102 | 121 |
|
103 |
@GetMapping("/info") |
|
104 |
public String info() { |
|
105 |
LOG.trace("Method info called"); |
|
106 |
String response; |
|
107 |
|
|
108 |
try { |
|
109 |
LOG.debug("Calling SolrService and its info method"); |
|
110 |
response = solrService.info(); |
|
111 |
} catch (Exception e) { |
|
112 |
LOG.error("Cannot get method info from SolrService"); |
|
113 |
LOG.error(e.toString()); |
|
114 |
response = "Unknown exception"; |
|
115 |
} |
|
116 |
return response; |
|
117 |
} |
|
118 | 122 |
} |
119 | 123 |
|
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java | ||
---|---|---|
22 | 22 |
|
23 | 23 |
import javax.imageio.ImageIO; |
24 | 24 |
import javax.xml.bind.JAXBException; |
25 |
import javax.xml.stream.XMLStreamException; |
|
25 | 26 |
import java.awt.image.BufferedImage; |
26 | 27 |
import java.io.ByteArrayInputStream; |
27 | 28 |
import java.io.IOException; |
... | ... | |
30 | 31 |
|
31 | 32 |
public class SolrService { |
32 | 33 |
|
33 |
// for docker |
|
34 |
private static final String URL = "http://fulltextsearch-solr"; |
|
34 |
private static final Logger LOG = LoggerFactory.getLogger(SolrService.class); |
|
35 | 35 |
|
36 |
// for localhost |
|
37 |
//private static final String URL = "http://localhost"; |
|
36 |
private static final String URL = "http://fulltextsearch-solr"; |
|
38 | 37 |
|
39 | 38 |
private static final String CORE_NAME_DOCUMENTS = "documents"; |
40 | 39 |
private static final String CORE_NAME_FILES = "files"; |
... | ... | |
49 | 48 |
private static final String FIELD_TEXT_REGION_COORDS = "text_region" + SUFFIX_COORDS; |
50 | 49 |
private static final String FIELD_DOC_CONTENT = "doc_content"; |
51 | 50 |
private static final String FIELD_IMG_CONTENT = "img_content"; |
52 |
private static final float IMG_OUTPUT_QUALITY = 0.5f; |
|
53 |
private static final Logger LOG = LoggerFactory.getLogger(SolrService.class); |
|
51 |
private static final String FIELD_UPLOADED_DATE = "uploadedDate"; |
|
54 | 52 |
|
55 |
private static final int SEARCH_QUERY_ROWS = 100;
|
|
53 |
private static final float IMG_OUTPUT_QUALITY = 0.5f;
|
|
56 | 54 |
|
57 | 55 |
private static final int DEFAULT_LINE_FIELD_COUNT = 150; |
58 | 56 |
private static final int DEFAULT_WORD_FIELD_COUNT = 30; |
... | ... | |
63 | 61 |
private HttpSolrClient solrClientFiles; |
64 | 62 |
private List<String> fieldNamesDocumentCore = new ArrayList<>(); |
65 | 63 |
|
64 |
/** |
|
65 |
* Creates SolrService - connects to Solr. |
|
66 |
*/ |
|
66 | 67 |
public SolrService() { |
67 | 68 |
connect(); |
68 | 69 |
} |
69 | 70 |
|
71 |
/** |
|
72 |
* Connects to Solr - to both cores documents and files. |
|
73 |
* If such cores do not exists, it creates them. |
|
74 |
*/ |
|
70 | 75 |
private void connect() { |
71 |
LOG.info("Waiting for solr => 10 seconds");
|
|
76 |
LOG.info("Waiting for Solr => 10 seconds");
|
|
72 | 77 |
try { |
73 | 78 |
Thread.sleep(10000); |
74 | 79 |
} catch (InterruptedException e) { |
75 |
LOG.error("Cannot sleep thread", e); |
|
80 |
LOG.error("Cannot sleep thread, not waiting for Solr!", e);
|
|
76 | 81 |
} |
77 | 82 |
|
78 | 83 |
LOG.info("Connecting to Solr..."); |
79 | 84 |
String urlString = URL + ":8983/solr/"; |
80 | 85 |
|
81 | 86 |
try { |
82 |
// documents |
|
87 |
// core documents
|
|
83 | 88 |
solrClientDocuments = new HttpSolrClient.Builder(urlString).build(); |
84 |
if (CoreAdminRequest.getStatus(CORE_NAME_DOCUMENTS, solrClientDocuments).getCoreStatus(CORE_NAME_DOCUMENTS).size() < 1) { |
|
85 |
CoreAdminRequest.createCore(CORE_NAME_DOCUMENTS, CORE_NAME_DOCUMENTS, solrClientDocuments, "conf/solrconfig.xml", "documents-schema.xml"); |
|
86 |
LOG.info("Core " + CORE_NAME_DOCUMENTS + " has been created."); |
|
87 |
} else { |
|
88 |
LOG.info("Core " + CORE_NAME_DOCUMENTS + " already exists."); |
|
89 |
} |
|
89 |
createCore(solrClientDocuments, "documents-schema.xml"); |
|
90 | 90 |
solrClientDocuments.setBaseURL(solrClientDocuments.getBaseURL() + "/" + CORE_NAME_DOCUMENTS); |
91 | 91 |
|
92 |
// files |
|
92 |
// core files
|
|
93 | 93 |
solrClientFiles = new HttpSolrClient.Builder(urlString).build(); |
94 |
if (CoreAdminRequest.getStatus(CORE_NAME_FILES, solrClientFiles).getCoreStatus(CORE_NAME_FILES).size() < 1) { |
|
95 |
CoreAdminRequest.createCore(CORE_NAME_FILES, CORE_NAME_FILES, solrClientFiles, "conf/solrconfig.xml", "files-schema.xml"); |
|
96 |
LOG.info("Core " + CORE_NAME_FILES + " has been created."); |
|
97 |
} else { |
|
98 |
LOG.info("Core " + CORE_NAME_FILES + " already exists."); |
|
99 |
} |
|
94 |
createCore(solrClientFiles, "files-schema.xml"); |
|
100 | 95 |
solrClientFiles.setBaseURL(solrClientFiles.getBaseURL() + "/" + CORE_NAME_FILES); |
101 | 96 |
|
102 | 97 |
LOG.info("Connected to Solr."); |
... | ... | |
105 | 100 |
} |
106 | 101 |
} |
107 | 102 |
|
108 |
public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException { |
|
109 |
LOG.trace("SolrService method uploadFiles called"); |
|
110 |
Map<String, List<MultipartFile>> pairs = new HashMap<>(); |
|
111 |
|
|
112 |
LOG.debug("Preprocessing files for upload to Solr"); |
|
113 |
for (MultipartFile file: files) { |
|
114 |
String filename = file.getOriginalFilename(); |
|
115 |
if (filename == null) { |
|
116 |
LOG.error("Skipping file with empty filename"); |
|
117 |
continue; |
|
118 |
} |
|
119 |
|
|
120 |
String type = file.getContentType(); |
|
121 |
if (type == null) { |
|
122 |
LOG.error("Skipping file " + filename + " (unknown type)"); |
|
123 |
continue; |
|
124 |
} |
|
125 |
|
|
126 |
int index = filename.lastIndexOf('.'); |
|
127 |
String filenameWithoutExtension = filename; |
|
128 |
if (index >= 0) { |
|
129 |
filenameWithoutExtension = filename.substring(0, index); |
|
130 |
} |
|
103 |
/** |
|
104 |
* Creates core corresponding with given client only if this core doesn't already exists. |
|
105 |
* |
|
106 |
* @param client client |
|
107 |
* @param schemaFilename schema filename |
|
108 |
* @throws IOException if IO problem occurs |
|
109 |
* @throws SolrServerException if SolrServer problem occurs |
|
110 |
*/ |
|
111 |
private void createCore(HttpSolrClient client, String schemaFilename) throws IOException, SolrServerException { |
|
112 |
String coreName = (client == solrClientDocuments) ? CORE_NAME_DOCUMENTS : CORE_NAME_FILES; |
|
113 |
|
|
114 |
if (CoreAdminRequest |
|
115 |
.getStatus(coreName, client) |
|
116 |
.getCoreStatus(coreName) |
|
117 |
.size() < 1) { |
|
118 |
CoreAdminRequest.createCore(coreName, coreName, client, "conf/solrconfig.xml", schemaFilename); |
|
119 |
LOG.info("Core " + coreName + " has been created."); |
|
120 |
} else { |
|
121 |
LOG.info("Core " + coreName + " already exists."); |
|
122 |
} |
|
123 |
} |
|
131 | 124 |
|
132 |
if (type.equals(MediaType.IMAGE_JPEG_VALUE) || type.equals(MediaType.IMAGE_PNG_VALUE) |
|
133 |
|| type.equals(MediaType.APPLICATION_XML_VALUE) || type.equals(MediaType.TEXT_XML_VALUE)) { |
|
125 |
/** |
|
126 |
* Uploads files to both Solr cores. |
|
127 |
* |
|
128 |
* @param files files to upload |
|
129 |
* @return number of successfully uploaded files |
|
130 |
* @throws IOException if IO problem occurs |
|
131 |
* @throws SolrServerException if SolrServer problem occurs |
|
132 |
* @throws JAXBException if JAXB problem occurs |
|
133 |
* @throws XMLStreamException if XMLStream problem occurs |
|
134 |
*/ |
|
135 |
public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException, XMLStreamException { |
|
136 |
LOG.trace("SolrService method uploadFiles called"); |
|
134 | 137 |
|
135 |
List<MultipartFile> list = pairs.get(filenameWithoutExtension); |
|
136 |
if (list == null) { |
|
137 |
list = new LinkedList<>(); |
|
138 |
list.add(file); |
|
139 |
pairs.put(filenameWithoutExtension, list); |
|
140 |
} else { |
|
141 |
list.add(file); |
|
142 |
} |
|
143 |
LOG.debug("File " + filename + " added"); |
|
144 |
} else { |
|
145 |
LOG.error("Not supported type of file! File " + filename + " is type " + type); |
|
146 |
} |
|
147 |
} |
|
138 |
Map<String, List<MultipartFile>> pairs = Utils.prepareFiles(files); |
|
148 | 139 |
return addFiles(pairs); |
149 | 140 |
} |
150 | 141 |
|
151 |
private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException { |
|
142 |
/** |
|
143 |
* Adds prepared pairs of files to both Solr cores. |
|
144 |
* |
|
145 |
* @param pairs pairs of files (image file as PNG or JPEG + XML file) |
|
146 |
* @return number of successfully uploaded files |
|
147 |
* @throws IOException if IO problem occurs |
|
148 |
* @throws SolrServerException if SolrServer problem occurs |
|
149 |
* @throws JAXBException if JAXB problem occurs |
|
150 |
* @throws XMLStreamException if XMLStream problem occurs |
|
151 |
*/ |
|
152 |
private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException, XMLStreamException { |
|
152 | 153 |
LOG.trace("SolrService method addFiles called"); |
153 | 154 |
LOG.debug("Uploading files to Solr"); |
154 |
getFieldNamesFromDocumentCore(); |
|
155 |
getFieldNamesFromDocumentsCore();
|
|
155 | 156 |
|
156 | 157 |
int count = 0; |
157 | 158 |
Set<String> keys = pairs.keySet(); |
... | ... | |
161 | 162 |
|
162 | 163 |
MultipartFile first = pair.get(0); |
163 | 164 |
if (first.getContentType().equals(MediaType.APPLICATION_XML_VALUE) || first.getContentType().equals(MediaType.TEXT_XML_VALUE)) { |
164 |
addToFilesCore(first, pair.get(1));
|
|
165 |
addToCores(first, pair.get(1));
|
|
165 | 166 |
} else { |
166 |
addToFilesCore(pair.get(1), first);
|
|
167 |
addToCores(pair.get(1), first);
|
|
167 | 168 |
} |
168 | 169 |
count += 2; |
169 | 170 |
} |
... | ... | |
176 | 177 |
return count; |
177 | 178 |
} |
178 | 179 |
|
179 |
private void addToFilesCore(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException { |
|
180 |
LOG.trace("SolrService method addToFilesCore called"); |
|
180 |
/** |
|
181 |
* Adds both XML and image file to both Solr cores. |
|
182 |
* |
|
183 |
* @param xmlFile XML file |
|
184 |
* @param imageFile image file (JPEG or PNG) |
|
185 |
* @throws IOException if IO problem occurs |
|
186 |
* @throws SolrServerException if SolrServer problem occurs |
|
187 |
* @throws JAXBException if JAXB problem occurs |
|
188 |
* @throws XMLStreamException if XMLStream problem occurs |
|
189 |
*/ |
|
190 |
private void addToCores(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException, XMLStreamException { |
|
191 |
LOG.trace("SolrService method addToCores called"); |
|
181 | 192 |
SolrInputDocument solrInputDocument = new SolrInputDocument(); |
182 | 193 |
|
183 | 194 |
LOG.debug("Adding a file pair to File core"); |
... | ... | |
186 | 197 |
solrInputDocument.addField(FIELD_DOC_CONTENT, xmlFile.getBytes()); |
187 | 198 |
solrInputDocument.addField(FIELD_IMG_CONTENT, Base64.getEncoder().encode(imageFile.getBytes())); |
188 | 199 |
|
200 |
// adding to core files |
|
189 | 201 |
solrClientFiles.add(solrInputDocument); |
190 | 202 |
|
191 | 203 |
LOG.debug("Loading XML file"); |
192 | 204 |
XMLLoader xmlLoader = new XMLLoader(); |
193 | 205 |
PcGts doc = xmlLoader.loadFile(xmlFile.getInputStream(), xmlFile.getOriginalFilename()); |
206 |
|
|
207 |
// adding to core documents |
|
194 | 208 |
addToDocumentsCore(doc, imageFile.getOriginalFilename()); |
195 | 209 |
} |
196 | 210 |
|
197 |
private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException { |
|
198 |
LOG.trace("SolrService method getFieldNamesFromDocumentCore called"); |
|
199 |
LOG.debug("Getting field names from Documents core"); |
|
211 |
/** |
|
212 |
* Gets all field names from Solr core documents. |
|
213 |
* |
|
214 |
* @throws IOException if IO problem occurs |
|
215 |
* @throws SolrServerException if SolrServer problem occurs |
|
216 |
*/ |
|
217 |
private void getFieldNamesFromDocumentsCore() throws IOException, SolrServerException { |
|
218 |
LOG.trace("SolrService method getFieldNamesFromDocumentsCore called"); |
|
219 |
LOG.debug("Getting field names from Solr core documents"); |
|
200 | 220 |
SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields(); |
201 | 221 |
SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientDocuments); |
202 | 222 |
List<Map<String, Object>> fields = fieldsResponse.getFields(); |
... | ... | |
206 | 226 |
} |
207 | 227 |
} |
208 | 228 |
|
209 |
|
|
210 |
private void addFieldToDocumentCore(String name, SchemaRequest.Update request) { |
|
211 |
LOG.trace("SolrService method addFieldToDocumentCore called"); |
|
229 |
/** |
|
230 |
* Adds new field to Solr core documents. |
|
231 |
* |
|
232 |
* @param name field name |
|
233 |
* @param request request |
|
234 |
*/ |
|
235 |
private void addFieldToDocumentsCore(String name, SchemaRequest.Update request) { |
|
236 |
LOG.trace("SolrService method addFieldToDocumentsCore called"); |
|
212 | 237 |
|
213 | 238 |
if (fieldNamesDocumentCore.contains(name)) return; |
214 |
LOG.debug("Adding field \"" + name + "\" to Documents core");
|
|
239 |
LOG.debug("Adding field \"" + name + "\" to Solr core documents");
|
|
215 | 240 |
try { |
216 | 241 |
request.process(solrClientDocuments); |
217 | 242 |
fieldNamesDocumentCore.add(name); |
218 | 243 |
} catch (Exception e) { |
219 |
LOG.error("Couldn't add field = " + name); |
|
244 |
LOG.error("Couldn't add field = " + name + " to core documents!");
|
|
220 | 245 |
LOG.error(e.toString()); |
221 | 246 |
} |
222 | 247 |
} |
223 | 248 |
|
224 |
private Map<String, Object> getFieldAttributes(String name) { |
|
225 |
Map<String, Object> fieldAttributes = new LinkedHashMap<>(); |
|
226 |
fieldAttributes.put("name", name); |
|
227 |
fieldAttributes.put("type", "text_general"); |
|
228 |
fieldAttributes.put("stored", true); |
|
229 |
fieldAttributes.put("multiValued", false); |
|
230 |
return fieldAttributes; |
|
231 |
} |
|
232 |
|
|
233 |
private SchemaRequest.Update getAddTextFieldRequest(String name) { |
|
234 |
LOG.trace("SolrService method getAddTextFieldRequest called"); |
|
235 |
Map<String, Object> fieldAttributes = getFieldAttributes(name); |
|
236 |
return new SchemaRequest.AddField(fieldAttributes); |
|
237 |
} |
|
238 |
|
|
239 |
private SchemaRequest.Update getNotIndexedAddTextFieldRequest(String name) { |
|
240 |
LOG.trace("SolrService method getAddTextFieldRequest called"); |
|
241 |
Map<String, Object> fieldAttributes = getFieldAttributes(name); |
|
242 |
fieldAttributes.put("indexed", false); |
|
243 |
return new SchemaRequest.AddField(fieldAttributes); |
|
244 |
} |
|
245 |
|
|
246 |
public void deleteAll() throws IOException, SolrServerException { |
|
247 |
LOG.trace("SolrService method getAddTextFieldRequest called"); |
|
248 |
LOG.info("Deleting ALL from Documents core"); |
|
249 |
solrClientDocuments.deleteByQuery("*:*"); |
|
250 |
solrClientDocuments.commit(); |
|
251 |
|
|
252 |
LOG.info("Deleting ALL from Files core"); |
|
253 |
solrClientFiles.deleteByQuery("*:*"); |
|
254 |
solrClientFiles.commit(); |
|
255 |
} |
|
256 |
|
|
249 |
/** |
|
250 |
* Return image as bytes from Solr core files. |
|
251 |
* |
|
252 |
* @param filename image filename |
|
253 |
* @return image bytes |
|
254 |
* @throws IOException if IO problem occurs |
|
255 |
* @throws SolrServerException if SolrServer problem occurs |
|
256 |
*/ |
|
257 | 257 |
public byte[] getImageFromFilesCore(String filename) throws IOException, SolrServerException { |
258 | 258 |
LOG.trace("SolrService method getImageFromFilesCore called"); |
259 | 259 |
LOG.debug("Getting an image from Files core"); |
260 | 260 |
SolrQuery solrQuery = new SolrQuery(); |
261 | 261 |
solrQuery.set("q", FIELD_IMG_FILENAME + ":" + "\"" + filename + "\""); |
262 |
|
|
262 | 263 |
SolrDocumentList results = solrClientFiles.query(solrQuery).getResults(); |
263 | 264 |
if (results.getNumFound() < 1) { |
264 |
LOG.error("Image " + filename + " not found!"); |
|
265 |
LOG.error("Image " + filename + " not found! Returning NULL."); |
|
266 |
return null; |
|
265 | 267 |
} else if (results.getNumFound() > 1) { |
266 |
LOG.error("Image " + filename + " has multiple files. Using first one...");
|
|
268 |
LOG.warn("Image " + filename + " has multiple files. Using first one...");
|
|
267 | 269 |
} |
268 | 270 |
|
269 | 271 |
SolrDocument result = results.get(0); |
270 | 272 |
return (byte[]) result.getFieldValue(FIELD_IMG_CONTENT); |
271 | 273 |
} |
272 | 274 |
|
273 |
public SearchResponse query(SearchRequest searchRequest) throws IOException, SolrServerException { |
|
274 |
LOG.trace("SolrService method query called"); |
|
275 |
LOG.debug("Processing query - creating Solr query"); |
|
275 |
/** |
|
276 |
* Calls Solr query and returns found text regions. |
|
277 |
* |
|
278 |
* @param searchRequest request |
|
279 |
* @return response |
|
280 |
* @throws IOException if IO problem occurs |
|
281 |
* @throws SolrServerException if SolrServer problem occurs |
|
282 |
*/ |
|
283 |
public SearchResponse search(SearchRequest searchRequest) throws IOException, SolrServerException { |
|
284 |
LOG.trace("SolrService method search called"); |
|
285 |
LOG.debug("Processing search request - creating Solr query"); |
|
276 | 286 |
|
277 | 287 |
String query = searchRequest.getExpression().toLowerCase(); |
278 | 288 |
SolrQuery solrQuery = new SolrQuery(); |
... | ... | |
280 | 290 |
solrQuery.setHighlight(true); |
281 | 291 |
solrQuery.addHighlightField(PREFIX_TEXT_LINE + "*"); |
282 | 292 |
solrQuery.addHighlightField(PREFIX_TEXT_WORD + "*"); |
283 |
solrQuery.setRows(SEARCH_QUERY_ROWS); |
|
284 | 293 |
solrQuery.addSort(FIELD_DOC_FILENAME, SolrQuery.ORDER.asc); |
285 | 294 |
LOG.debug("Processing query - sending Solr query"); |
286 | 295 |
QueryResponse response = solrClientDocuments.query(solrQuery); |
... | ... | |
291 | 300 |
// if no documents found, try spell checking and call query again with first alternative |
292 | 301 |
if (searchRequest.isEnableSpellCheck() && docList.size() < 1) { |
293 | 302 |
LOG.info("No documents found, trying spell check..."); |
294 |
SpellCheckResponse spellCheck = response.getSpellCheckResponse(); |
|
295 |
|
|
296 |
if (spellCheck != null && spellCheck.getSuggestionMap().values().size() > 0) { |
|
297 |
SpellCheckResponse.Suggestion first = spellCheck.getSuggestionMap().values().iterator().next(); |
|
298 |
List<String> alternatives = first.getAlternatives(); |
|
299 | 303 |
|
300 |
if (alternatives.size() > 0) { |
|
301 |
String alternative = alternatives.get(0); |
|
302 |
LOG.info("Spell check found alternative \"" + alternative + "\", calling query again with this alternative as parameter."); |
|
303 |
return query(new SearchRequest(alternative, searchRequest.getTimestamp(), false)); |
|
304 |
} |
|
304 |
SpellCheckResponse spellCheck = response.getSpellCheckResponse(); |
|
305 |
String firstAlternative = Utils.getFirstAlternative(spellCheck); |
|
306 |
if (firstAlternative != null && !firstAlternative.isEmpty()) { |
|
307 |
LOG.info("Spell check found alternative \"" + firstAlternative + "\", calling query again with this alternative as parameter."); |
|
308 |
return search(new SearchRequest(firstAlternative, searchRequest.getTimestamp(), false)); |
|
309 |
} else { |
|
310 |
LOG.info("Spell check found no alternative."); |
|
305 | 311 |
} |
306 |
|
|
307 |
LOG.info("Spell check found no alternative."); |
|
308 | 312 |
} |
309 | 313 |
|
310 |
SearchResponse searchResponse = new SearchResponse(); |
|
311 |
searchResponse.setExpression(query); |
|
312 |
|
|
313 | 314 |
LOG.debug("Processing query - creating Search response"); |
314 | 315 |
List<TextRegion> textRegions = new LinkedList<>(); |
315 | 316 |
for (SolrDocument solrDocument: docList) { |
... | ... | |
344 | 345 |
int y2 = textRegion.getRegionCoords().getPoints()[2].getY(); |
345 | 346 |
|
346 | 347 |
if (wordNames.size() == 1 || (wordNames.size() == 0 && linesNames.size() == 1)) { |
347 |
String coordsValue = "";
|
|
348 |
String coordsValue; |
|
348 | 349 |
|
349 | 350 |
if (wordNames.size() == 1) { |
350 | 351 |
coordsValue = (String) solrDocument.getFieldValue(wordNames.stream().findFirst().get() + SUFFIX_COORDS); |
351 |
|
|
352 | 352 |
} else{ |
353 | 353 |
coordsValue = (String) solrDocument.getFieldValue(linesNames.stream().findFirst().get() + SUFFIX_COORDS); |
354 | 354 |
} |
... | ... | |
454 | 454 |
textRegions.add(textRegion); |
455 | 455 |
} |
456 | 456 |
|
457 |
// creating search response |
|
458 |
SearchResponse searchResponse = new SearchResponse(); |
|
459 |
searchResponse.setExpression(query); |
|
457 | 460 |
searchResponse.setTextRegions(textRegions.toArray(new TextRegion[0])); |
458 | 461 |
LOG.debug("Processing query - sending Search response"); |
459 | 462 |
LOG.info("Query received and processed"); |
460 | 463 |
return searchResponse; |
461 | 464 |
} |
462 | 465 |
|
466 |
/** |
|
467 |
* Adds document to Solr core documents. |
|
468 |
* |
|
469 |
* @param document document to add |
|
470 |
* @param imageFilename image filenama |
|
471 |
* @throws IOException if IO problem occurs |
|
472 |
* @throws SolrServerException if SolrServer problem occurs |
|
473 |
*/ |
|
463 | 474 |
private void addToDocumentsCore(PcGts document, String imageFilename) throws IOException, SolrServerException { |
464 | 475 |
LOG.trace("SolrService method addToDocumentsCore called"); |
465 | 476 |
LOG.debug("Adding to Documents core"); |
... | ... | |
470 | 481 |
TextRegion textRegion = regions.get(i); |
471 | 482 |
SolrInputDocument solrInputDocument = new SolrInputDocument(); |
472 | 483 |
|
473 |
solrInputDocument.addField(FIELD_DOC_FILENAME, document.getFilename());
|
|
484 |
solrInputDocument.addField(FIELD_DOC_FILENAME, document.getDocumentName());
|
|
474 | 485 |
solrInputDocument.addField(FIELD_IMG_FILENAME, imageFilename); |
475 | 486 |
solrInputDocument.addField(FIELD_TEXT_REGION, textRegion.getTextEquiv().getUnicode()); |
476 | 487 |
solrInputDocument.addField(FIELD_TEXT_REGION_COORDS, textRegion.getRegionCoords().getPointsString()); |
... | ... | |
482 | 493 |
String prefix = PREFIX_TEXT_LINE + j; |
483 | 494 |
|
484 | 495 |
if (j >= DEFAULT_LINE_FIELD_COUNT) { |
485 |
addFieldToDocumentCore(prefix, getAddTextFieldRequest(prefix));
|
|
486 |
addFieldToDocumentCore(prefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
|
|
496 |
addFieldToDocumentsCore(prefix, Utils.getAddTextFieldRequest(prefix));
|
|
497 |
addFieldToDocumentsCore(prefix + SUFFIX_COORDS, Utils.getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
|
|
487 | 498 |
} |
488 | 499 |
|
489 | 500 |
solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode()); |
... | ... | |
496 | 507 |
String wordPrefix = PREFIX_TEXT_WORD + k + "_" + prefix; |
497 | 508 |
|
498 | 509 |
if (k >= DEFAULT_WORD_FIELD_COUNT) { |
499 |
addFieldToDocumentCore(wordPrefix, getAddTextFieldRequest(wordPrefix));
|
|
500 |
addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
|
|
510 |
addFieldToDocumentsCore(wordPrefix, Utils.getAddTextFieldRequest(wordPrefix));
|
|
511 |
addFieldToDocumentsCore(wordPrefix + SUFFIX_COORDS, Utils.getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
|
|
501 | 512 |
} |
502 | 513 |
|
503 | 514 |
solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode()); |
... | ... | |
508 | 519 |
} |
509 | 520 |
} |
510 | 521 |
|
511 |
public String info() throws IOException, SolrServerException { |
|
512 |
LOG.trace("SolrService method info called"); |
|
513 |
LOG.debug("Displaying information about files in Solr"); |
|
514 |
SolrQuery q = new SolrQuery("*:*"); |
|
515 |
q.setRows(0); // don't actually request any data |
|
516 |
return "Number of documents in " + CORE_NAME_DOCUMENTS + " core: " + solrClientDocuments.query(q).getResults().getNumFound() + |
|
517 |
"\nNumber of documents in " + CORE_NAME_FILES + " core: " + solrClientFiles.query(q).getResults().getNumFound(); |
|
518 |
} |
|
519 |
|
|
520 | 522 |
public List<String> listAllFiles() throws IOException, SolrServerException { |
521 | 523 |
LOG.trace("SolrService method listAllFiles called"); |
522 | 524 |
LOG.debug("Displaying ALL files in Solr"); |
523 | 525 |
SolrQuery solrQuery = new SolrQuery(); |
524 | 526 |
solrQuery.set("q", FIELD_DOC_FILENAME + ":*"); |
525 |
solrQuery.setRows(SEARCH_QUERY_ROWS); |
|
526 | 527 |
SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults(); |
527 | 528 |
return docList.stream().map(doc -> doc.getFieldValue(FIELD_DOC_FILENAME).toString().replaceAll(".xml", "")).collect(Collectors.toList()); |
528 | 529 |
} |
... | ... | |
531 | 532 |
LOG.trace("SolrService method listSingleFile called"); |
532 | 533 |
LOG.debug("Displaying single file in Solr"); |
533 | 534 |
SolrQuery solrQuery = new SolrQuery(); |
534 |
//solrQuery.set("q", FIELD_DOC_FILENAME + ":*"); |
|
535 | 535 |
solrQuery.set("q", FIELD_DOC_FILENAME + ":" + documentName + ".xml"); |
536 |
solrQuery.setRows(SEARCH_QUERY_ROWS);
|
|
536 |
solrQuery.setRows(1); // return only one file
|
|
537 | 537 |
SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults(); |
538 | 538 |
|
539 | 539 |
String imgFilename = (String) docList.get(0).getFieldValue(FIELD_IMG_FILENAME); |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/Utils.java | ||
---|---|---|
1 | 1 |
package cz.zcu.kiv.aswi.fulltextsearch; |
2 | 2 |
|
3 |
import org.apache.solr.client.solrj.request.schema.SchemaRequest; |
|
4 |
import org.apache.solr.client.solrj.response.SpellCheckResponse; |
|
5 |
import org.slf4j.Logger; |
|
6 |
import org.slf4j.LoggerFactory; |
|
7 |
import org.springframework.http.MediaType; |
|
8 |
import org.springframework.web.multipart.MultipartFile; |
|
9 |
|
|
3 | 10 |
import java.nio.charset.StandardCharsets; |
4 |
import java.util.Random;
|
|
11 |
import java.util.*;
|
|
5 | 12 |
|
6 | 13 |
public class Utils { |
7 | 14 |
|
15 |
private static final Logger LOG = LoggerFactory.getLogger(Utils.class); |
|
16 |
|
|
17 |
/** |
|
18 |
* Generates random string id. |
|
19 |
* |
|
20 |
* @param length length |
|
21 |
* @return random string id |
|
22 |
*/ |
|
8 | 23 |
public static String generateRandomStringId(int length) { |
9 | 24 |
|
10 | 25 |
// length is bounded by 256 Character |
... | ... | |
33 | 48 |
|
34 | 49 |
return r.toString(); |
35 | 50 |
} |
51 |
|
|
52 |
/** |
|
53 |
* Creates pairs image+XML from given files. |
|
54 |
* |
|
55 |
* @param files files to prepare |
|
56 |
* @return pairs of files (key = filename without extension, value = XML file and image file) |
|
57 |
*/ |
|
58 |
public static Map<String, List<MultipartFile>> prepareFiles(List<MultipartFile> files) { |
|
59 |
LOG.debug("Preprocessing files for upload to Solr"); |
|
60 |
|
|
61 |
Map<String, List<MultipartFile>> pairs = new HashMap<>(); |
|
62 |
for (MultipartFile file: files) { |
|
63 |
String filename = file.getOriginalFilename(); |
|
64 |
if (filename == null) { |
|
65 |
LOG.error("Skipping file with empty filename"); |
|
66 |
continue; |
|
67 |
} |
|
68 |
|
|
69 |
String type = file.getContentType(); |
|
70 |
if (type == null) { |
|
71 |
LOG.error("Skipping file " + filename + " (unknown type)"); |
|
72 |
continue; |
|
73 |
} |
|
74 |
|
|
75 |
int index = filename.lastIndexOf('.'); |
|
76 |
String filenameWithoutExtension = filename; |
|
77 |
if (index >= 0) { |
|
78 |
filenameWithoutExtension = filename.substring(0, index); |
|
79 |
} |
|
80 |
|
|
81 |
if (type.equals(MediaType.IMAGE_JPEG_VALUE) || type.equals(MediaType.IMAGE_PNG_VALUE) |
|
82 |
|| type.equals(MediaType.APPLICATION_XML_VALUE) || type.equals(MediaType.TEXT_XML_VALUE)) { |
|
83 |
|
|
84 |
List<MultipartFile> list = pairs.get(filenameWithoutExtension); |
|
85 |
if (list == null) { |
|
86 |
list = new LinkedList<>(); |
|
87 |
list.add(file); |
|
88 |
pairs.put(filenameWithoutExtension, list); |
|
89 |
} else { |
|
90 |
list.add(file); |
|
91 |
} |
|
92 |
LOG.debug("File " + filename + " added"); |
|
93 |
} else { |
|
94 |
LOG.error("Not supported type of file! File " + filename + " is type " + type); |
|
95 |
} |
|
96 |
} |
|
97 |
|
|
98 |
return pairs; |
|
99 |
} |
|
100 |
|
|
101 |
/** |
|
102 |
* Returns field attributes. |
|
103 |
* |
|
104 |
* @param name filed name |
|
105 |
* @return attributes |
|
106 |
*/ |
|
107 |
private static Map<String, Object> getFieldAttributes(String name) { |
|
108 |
Map<String, Object> fieldAttributes = new LinkedHashMap<>(); |
|
109 |
fieldAttributes.put("name", name); |
|
110 |
fieldAttributes.put("type", "text_general"); |
|
111 |
fieldAttributes.put("stored", true); |
|
112 |
fieldAttributes.put("multiValued", false); |
|
113 |
return fieldAttributes; |
|
114 |
} |
|
115 |
|
|
116 |
/** |
|
117 |
* Returns schema request for adding text field. |
|
118 |
* |
|
119 |
* @param name field name |
|
120 |
* @return schema request |
|
121 |
*/ |
|
122 |
public static SchemaRequest.Update getAddTextFieldRequest(String name) { |
|
123 |
LOG.trace("SolrService method getAddTextFieldRequest called"); |
|
124 |
Map<String, Object> fieldAttributes = getFieldAttributes(name); |
|
125 |
return new SchemaRequest.AddField(fieldAttributes); |
|
126 |
} |
|
127 |
|
|
128 |
/** |
|
129 |
* Returns schema request for adding text field which won't be indexed. |
|
130 |
* |
|
131 |
* @param name field name |
|
132 |
* @return schema request |
|
133 |
*/ |
|
134 |
public static SchemaRequest.Update getNotIndexedAddTextFieldRequest(String name) { |
|
135 |
LOG.trace("SolrService method getAddTextFieldRequest called"); |
|
136 |
Map<String, Object> fieldAttributes = getFieldAttributes(name); |
|
137 |
fieldAttributes.put("indexed", false); |
|
138 |
return new SchemaRequest.AddField(fieldAttributes); |
|
139 |
} |
|
140 |
|
|
141 |
/** |
|
142 |
* Returns first alternative according to Solr spellcheck response |
|
143 |
* or NULL if such alternative doesn't exists. |
|
144 |
* |
|
145 |
* @param spellCheck spellcheck response |
|
146 |
* @return first alternative |
|
147 |
*/ |
|
148 |
public static String getFirstAlternative(SpellCheckResponse spellCheck) { |
|
149 |
if (spellCheck != null && spellCheck.getSuggestionMap().values().size() > 0) { |
|
150 |
SpellCheckResponse.Suggestion first = spellCheck.getSuggestionMap().values().iterator().next(); |
|
151 |
List<String> alternatives = first.getAlternatives(); |
|
152 |
|
|
153 |
if (alternatives.size() > 0) { |
|
154 |
return alternatives.get(0); |
|
155 |
} |
|
156 |
} |
|
157 |
|
|
158 |
return null; |
|
159 |
} |
|
160 |
|
|
36 | 161 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/XMLLoader.java | ||
---|---|---|
5 | 5 |
import javax.xml.bind.JAXBContext; |
6 | 6 |
import javax.xml.bind.JAXBException; |
7 | 7 |
import javax.xml.bind.Unmarshaller; |
8 |
import javax.xml.stream.XMLInputFactory; |
|
9 |
import javax.xml.stream.XMLStreamException; |
|
10 |
import javax.xml.stream.XMLStreamReader; |
|
11 |
import javax.xml.stream.util.StreamReaderDelegate; |
|
8 | 12 |
import java.io.InputStream; |
9 | 13 |
import java.util.logging.Level; |
10 | 14 |
|
11 | 15 |
public class XMLLoader { |
12 | 16 |
|
13 |
public PcGts loadFile(InputStream file, String filename) throws JAXBException { |
|
17 |
/** |
|
18 |
* Parses XML file and returns it as object. |
|
19 |
* |
|
20 |
* @param file XML file |
|
21 |
* @param filename filename |
|
22 |
* @return parsed XML |
|
23 |
* @throws JAXBException if JAXB problem occurs |
|
24 |
* @throws XMLStreamException if XMLStream problem occurs |
|
25 |
*/ |
|
26 |
public PcGts loadFile(InputStream file, String filename) throws JAXBException, XMLStreamException { |
|
14 | 27 |
JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class); |
15 | 28 |
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); |
29 |
XMLStreamReader xmlStreamReader = XMLInputFactory.newFactory().createXMLStreamReader(file); |
|
16 | 30 |
|
17 | 31 |
// ignoring unused elements |
18 | 32 |
java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST); |
19 | 33 |
jaxbUnmarshaller.setEventHandler(event -> event.getMessage().toLowerCase().contains("unexpected element")); |
20 | 34 |
|
21 |
PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file); |
|
22 |
document.setFilename(filename); |
|
35 |
XMLReaderWithoutNamespace xmlReaderWithoutNamespace = new XMLReaderWithoutNamespace(xmlStreamReader); |
|
36 |
PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(xmlReaderWithoutNamespace); |
|
37 |
document.setDocumentName(filename); |
|
23 | 38 |
|
24 | 39 |
return document; |
25 | 40 |
} |
26 | 41 |
|
42 |
class XMLReaderWithoutNamespace extends StreamReaderDelegate { |
|
43 |
public XMLReaderWithoutNamespace(XMLStreamReader reader) { |
|
44 |
super(reader); |
|
45 |
} |
|
46 |
|
|
47 |
@Override |
|
48 |
public String getAttributeNamespace(int arg0) { |
|
49 |
return ""; |
|
50 |
} |
|
51 |
@Override |
|
52 |
public String getNamespaceURI() { |
|
53 |
return ""; |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
27 | 57 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Coords.java | ||
---|---|---|
25 | 25 |
this.points = points; |
26 | 26 |
} |
27 | 27 |
|
28 |
public String getPointsString() { |
|
29 |
return pointsString; |
|
30 |
} |
|
31 |
|
|
32 |
public void setPointsString(String pointsString) { |
|
33 |
this.pointsString = pointsString; |
|
34 |
} |
|
35 |
|
|
36 |
public Point[] getPoints() { |
|
37 |
return points; |
|
38 |
} |
|
39 |
|
|
40 |
public void setPoints(Point[] points) { |
|
41 |
this.points = points; |
|
42 |
} |
|
43 |
|
|
44 |
@Override |
|
45 |
public String toString() { |
|
46 |
return this.pointsString; |
|
47 |
} |
|
48 |
|
|
28 |
/** |
|
29 |
* Parses string of points into array of Point objects. |
|
30 |
* |
|
31 |
* @param pointsString points as string |
|
32 |
* @return array of Point objects |
|
33 |
*/ |
|
49 | 34 |
public static Point[] parsePointString(String pointsString) { |
50 | 35 |
List<Point> pointList = new LinkedList<>(); |
51 | 36 |
if (pointsString == null || pointsString.isEmpty()) return pointList.toArray(new Point[0]); |
... | ... | |
64 | 49 |
|
65 | 50 |
return pointList.toArray(new Point[0]); |
66 | 51 |
} |
52 |
|
|
53 |
@Override |
|
54 |
public String toString() { |
|
55 |
return this.pointsString; |
|
56 |
} |
|
57 |
|
|
58 |
public String getPointsString() { |
|
59 |
return pointsString; |
|
60 |
} |
|
61 |
|
|
62 |
public void setPointsString(String pointsString) { |
|
63 |
this.pointsString = pointsString; |
|
64 |
} |
|
65 |
|
|
66 |
public Point[] getPoints() { |
|
67 |
return points; |
|
68 |
} |
|
69 |
|
|
70 |
public void setPoints(Point[] points) { |
|
71 |
this.points = points; |
|
72 |
} |
|
73 |
|
|
67 | 74 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Page.java | ||
---|---|---|
11 | 11 |
@XmlElement(name = "TextRegion") |
12 | 12 |
private List<TextRegion> textRegions; |
13 | 13 |
|
14 |
private String filename; |
|
14 |
/** document name (including extension) */ |
|
15 |
private String documentName; |
|
16 |
|
|
17 |
public void setDocumentName(String documentName) { |
|
18 |
this.documentName = documentName; |
|
19 |
if (this.textRegions == null) return; |
|
20 |
|
|
21 |
for (int i = 0; i < textRegions.size(); i++) { |
|
22 |
TextRegion textRegion = textRegions.get(i); |
|
23 |
textRegion.setDocumentName(documentName); |
|
24 |
} |
|
25 |
} |
|
15 | 26 |
|
16 | 27 |
public List<TextRegion> getTextRegions() { |
17 | 28 |
return textRegions; |
... | ... | |
21 | 32 |
this.textRegions = textRegions; |
22 | 33 |
} |
23 | 34 |
|
24 |
public String getFilename() { |
|
25 |
return filename; |
|
35 |
/** |
|
36 |
* Return document name (including extension). |
|
37 |
* |
|
38 |
* @return document name (including extension) |
|
39 |
*/ |
|
40 |
public String getDocumentName() { |
|
41 |
return documentName; |
|
26 | 42 |
} |
27 | 43 |
|
28 |
public void setFilename(String filename) { |
|
29 |
this.filename = filename; |
|
30 |
if (this.textRegions == null) return; |
|
31 |
|
|
32 |
for (int i = 0; i < textRegions.size(); i++) { |
|
33 |
TextRegion textRegion = textRegions.get(i); |
|
34 |
textRegion.setDocumentName(filename); |
|
35 |
} |
|
36 |
} |
|
37 | 44 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/PcGts.java | ||
---|---|---|
12 | 12 |
@XmlElement(name = "Page") |
13 | 13 |
private Page page; |
14 | 14 |
|
15 |
private String filename; |
|
15 |
/** document name (including extension) */ |
|
16 |
private String documentName; |
|
17 |
|
|
18 |
/** |
|
19 |
* Sets given document name to this object, page and all text regions. |
|
20 |
* |
|
21 |
* @param documentName document name (including extension) |
|
22 |
*/ |
|
23 |
public void setDocumentName(String documentName) { |
|
24 |
this.documentName = documentName; |
|
25 |
this.page.setDocumentName(documentName); |
|
26 |
} |
|
16 | 27 |
|
17 | 28 |
public Page getPage() { |
18 | 29 |
return page; |
... | ... | |
22 | 33 |
this.page = page; |
23 | 34 |
} |
24 | 35 |
|
25 |
public String getFilename() { |
|
26 |
return filename; |
|
36 |
/** |
|
37 |
* Return document name (including extension). |
|
38 |
* |
|
39 |
* @return document name (including extension) |
|
40 |
*/ |
|
41 |
public String getDocumentName() { |
|
42 |
return documentName; |
|
27 | 43 |
} |
28 | 44 |
|
29 |
public void setFilename(String filename) { |
|
30 |
this.filename = filename; |
|
31 |
this.page.setFilename(filename); |
|
32 |
} |
|
33 | 45 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextRegion.java | ||
---|---|---|
13 | 13 |
private Coords regionCoords; |
14 | 14 |
private String regionText; |
15 | 15 |
private TextLine[] textLines; |
16 |
/** document name (including extension) */ |
|
16 | 17 |
private String documentName; |
17 | 18 |
private DocumentSize documentSize; |
18 | 19 |
private String randomId; |
... | ... | |
47 | 48 |
this.textLines = textLines; |
48 | 49 |
} |
49 | 50 |
|
51 |
/** |
|
52 |
* Return document name (including extension). |
|
53 |
* |
|
54 |
* @return document name (including extension) |
|
55 |
*/ |
|
50 | 56 |
public String getDocumentName() { |
51 | 57 |
return documentName; |
52 | 58 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/package-info.java | ||
---|---|---|
1 |
@XmlSchema( |
|
2 |
namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15", |
|
3 |
elementFormDefault = XmlNsForm.QUALIFIED) |
|
4 |
package cz.zcu.kiv.aswi.fulltextsearch.document; |
|
5 |
|
|
6 |
import javax.xml.bind.annotation.XmlNsForm; |
|
7 |
import javax.xml.bind.annotation.XmlSchema; |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/model/DocumentResponse.java | ||
---|---|---|
6 | 6 |
|
7 | 7 |
private String documentName; |
8 | 8 |
private int id; |
9 |
private Date originDate; |
|
10 | 9 |
private Date uploadedDate; |
11 | 10 |
|
12 |
public DocumentResponse(String documentName, int id, Date originDate, Date uploadedDate) {
|
|
11 |
public DocumentResponse(String documentName, int id, Date uploadedDate) { |
|
13 | 12 |
this.documentName = documentName; |
14 | 13 |
this.id = id; |
15 |
this.originDate = originDate; |
|
16 | 14 |
this.uploadedDate = uploadedDate; |
17 | 15 |
} |
18 | 16 |
|
... | ... | |
32 | 30 |
this.id = id; |
33 | 31 |
} |
34 | 32 |
|
35 |
public Date getOriginDate() { |
|
36 |
return originDate; |
|
37 |
} |
|
38 |
|
|
39 |
public void setOriginDate(Date originDate) { |
|
40 |
this.originDate = originDate; |
|
41 |
} |
|
42 |
|
|
43 | 33 |
public Date getUploadedDate() { |
44 | 34 |
return uploadedDate; |
45 | 35 |
} |
be/fulltextsearch/src/test/java/cz/zcu/kiv/aswi/fulltextsearch/HttpRequestTest.java | ||
---|---|---|
22 | 22 |
|
23 | 23 |
@Test |
24 | 24 |
public void infoTest() throws Exception { |
25 |
assertThat(this.restTemplate.getForObject("http://localhost:" + port + "/info", |
|
26 |
String.class)).contains(controller.info()); |
|
25 |
/*assertThat(this.restTemplate.getForObject("http://localhost:" + port + "/info",
|
|
26 |
String.class)).contains(controller.info());*/
|
|
27 | 27 |
} |
28 | 28 |
|
29 | 29 |
} |
solr/core-configs/documents/conf/solrconfig.xml | ||
---|---|---|
697 | 697 |
--> |
698 | 698 |
<lst name="defaults"> |
699 | 699 |
<str name="echoParams">explicit</str> |
700 |
<int name="rows">10</int> |
|
701 |
|
|
700 |
<int name="rows">100</int> |
|
702 | 701 |
<str name="df">text_region</str> |
703 | 702 |
|
703 |
<str name="spellcheck">true</str> |
|
704 | 704 |
<str name="spellcheck.dictionary">default</str> |
705 |
<str name="spellcheck">on</str> |
|
706 |
<str name="spellcheck.extendedResults">true</str> |
|
707 |
<str name="spellcheck.count">10</str> |
|
708 |
<str name="spellcheck.alternativeTermCount">5</str> |
|
709 |
<str name="spellcheck.maxResultsForSuggest">5</str> |
|
710 |
<str name="spellcheck.collate">true</str> |
|
711 |
<str name="spellcheck.collateExtendedResults">true</str> |
|
712 |
<str name="spellcheck.maxCollationTries">10</str> |
|
713 |
<str name="spellcheck.maxCollations">5</str> |
|
705 |
<str name="spellcheck.count">1</str> |
|
714 | 706 |
</lst> |
715 | 707 |
<arr name="last-components"> |
716 | 708 |
<str>spellcheck</str> |
Také k dispozici: Unified diff
Re #8031: Programová příručka BE
- dopsání komentářů
- smazání nepotřebného kódu
- vylepšené členění do tříd a metod