3 |
3 |
import cz.zcu.kiv.aswi.fulltextsearch.document.PcGts;
|
4 |
4 |
import cz.zcu.kiv.aswi.fulltextsearch.document.TextLine;
|
5 |
5 |
import cz.zcu.kiv.aswi.fulltextsearch.document.TextRegion;
|
|
6 |
import cz.zcu.kiv.aswi.fulltextsearch.model.QueryDocumentResponse;
|
6 |
7 |
import org.apache.solr.client.solrj.SolrQuery;
|
7 |
8 |
import org.apache.solr.client.solrj.SolrServerException;
|
8 |
9 |
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
... | ... | |
15 |
16 |
|
16 |
17 |
import javax.xml.bind.JAXBException;
|
17 |
18 |
import java.io.IOException;
|
|
19 |
import java.io.InputStream;
|
18 |
20 |
import java.util.*;
|
19 |
21 |
|
20 |
22 |
public class SolrService {
|
... | ... | |
77 |
79 |
|
78 |
80 |
private void addBasicFieldsToSolr() {
|
79 |
81 |
addFieldToSolr(FIELD_DOC_FILENAME);
|
80 |
|
//addFieldToSolr(FIELD_IMG_FILENAME); // todo
|
|
82 |
addFieldToSolr(FIELD_IMG_FILENAME);
|
81 |
83 |
addFieldToSolr(FIELD_TEXT_REGION);
|
82 |
84 |
addFieldToSolr(FIELD_TEXT_REGION_COORDS);
|
83 |
85 |
}
|
... | ... | |
101 |
103 |
fieldAttributes.put("name", name);
|
102 |
104 |
fieldAttributes.put("type", "text_general");
|
103 |
105 |
fieldAttributes.put("stored", true);
|
|
106 |
fieldAttributes.put("multiValued", false);
|
104 |
107 |
SchemaRequest.Update addFieldRequest = new SchemaRequest.AddField(fieldAttributes);
|
105 |
108 |
addFieldRequest.process(solrClient);
|
106 |
109 |
|
... | ... | |
116 |
119 |
solrClient.commit();
|
117 |
120 |
}
|
118 |
121 |
|
119 |
|
public String query(String query) throws IOException, SolrServerException {
|
|
122 |
public List<QueryDocumentResponse> query(String query) throws IOException, SolrServerException {
|
120 |
123 |
SolrQuery solrQuery = new SolrQuery();
|
121 |
124 |
solrQuery.set("q", FIELD_TEXT_REGION + ":" + query);
|
|
125 |
solrQuery.setHighlight(true);
|
|
126 |
solrQuery.addHighlightField(PREFIX_TEXT_LINE + "*");
|
122 |
127 |
QueryResponse response = solrClient.query(solrQuery);
|
123 |
128 |
|
124 |
129 |
SolrDocumentList docList = response.getResults();
|
125 |
|
Set<String> ids = getUniqueIds(docList);
|
|
130 |
Map<String, Map<String, List<String>>> highlight = response.getHighlighting();
|
|
131 |
|
|
132 |
List<QueryDocumentResponse> results = new ArrayList<>();
|
|
133 |
for (SolrDocument solrDocument: docList) {
|
|
134 |
QueryDocumentResponse documentResponse = new QueryDocumentResponse();
|
|
135 |
|
|
136 |
// filename, text region
|
|
137 |
documentResponse.setDoc_filename((String) solrDocument.getFieldValue(FIELD_DOC_FILENAME));
|
|
138 |
documentResponse.setText_region((String) solrDocument.getFieldValue(FIELD_TEXT_REGION));
|
|
139 |
documentResponse.setText_region_coords((String) solrDocument.getFieldValue(FIELD_TEXT_REGION_COORDS));
|
|
140 |
|
|
141 |
// text lines
|
|
142 |
Map<String, List<String>> map = highlight.get(solrDocument.getFieldValue("id").toString());
|
|
143 |
Set<String> linesNames = map.keySet();
|
|
144 |
Map<String, String> linesMap = new HashMap<>();
|
|
145 |
for (String lineName: linesNames) {
|
|
146 |
String value = (String) solrDocument.getFieldValue(lineName);
|
|
147 |
String coordsValue = (String) solrDocument.getFieldValue(lineName + SUFFIX_COORDS);
|
|
148 |
linesMap.put(lineName, value);
|
|
149 |
linesMap.put(lineName + SUFFIX_COORDS, coordsValue);
|
|
150 |
}
|
|
151 |
documentResponse.setText_lines(linesMap);
|
|
152 |
|
|
153 |
// image
|
|
154 |
String imgFilename = (String) solrDocument.getFieldValue(FIELD_IMG_FILENAME);
|
|
155 |
InputStream is = getClass().getClassLoader().getResourceAsStream(SolrService.DATA_DIR + imgFilename);
|
|
156 |
String encodedImage = Base64.getEncoder().encodeToString(is.readAllBytes());
|
|
157 |
documentResponse.setImage(encodedImage);
|
126 |
158 |
|
127 |
|
return "Found in " + ids.size() + " documents with ids=" + String.join(", ", ids);
|
|
159 |
results.add(documentResponse);
|
|
160 |
}
|
|
161 |
|
|
162 |
return results;
|
128 |
163 |
}
|
129 |
164 |
|
130 |
165 |
private void addDocument(PcGts document) throws IOException, SolrServerException {
|
... | ... | |
136 |
171 |
SolrInputDocument solrInputDocument = new SolrInputDocument();
|
137 |
172 |
|
138 |
173 |
solrInputDocument.addField(FIELD_DOC_FILENAME, document.getFilename());
|
|
174 |
solrInputDocument.addField(FIELD_IMG_FILENAME, document.getFilename().replaceAll(".xml", ".png")); // todo
|
139 |
175 |
solrInputDocument.addField(FIELD_TEXT_REGION, textRegion.getTextEquiv().getUnicode());
|
140 |
176 |
solrInputDocument.addField(FIELD_TEXT_REGION_COORDS, textRegion.getCoords().getPoints());
|
141 |
177 |
|
... | ... | |
147 |
183 |
|
148 |
184 |
addFieldToSolr(prefix);
|
149 |
185 |
solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
|
|
186 |
|
|
187 |
addFieldToSolr(prefix + SUFFIX_COORDS);
|
150 |
188 |
solrInputDocument.addField(prefix + SUFFIX_COORDS, textLine.getCoords().getPoints());
|
151 |
189 |
}
|
152 |
190 |
|
Re #7805: Vracení nalezených dokumentů na query
- na / to vrací všechny dokumenty, které dané slovo obsahují
- součástí JSONu je také PNG obrázek zakódovaný v base64