Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 1e6d072b

Přidáno uživatelem Jitka Poubová před téměř 5 roky(ů)

Re #8031: Programová příručka BE
- dopsání komentářů
- smazání nepotřebného kódu
- vylepšené členění do tříd a metod

Zobrazit rozdíly:

be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/FulltextSearchApplication.java
11 11
	private static final Logger LOG = LoggerFactory.getLogger(FulltextSearchApplication.class);
12 12

  
13 13
	public static void main(String[] args) {
14

  
15 14
		LOG.info("Starting Spring BE");
16 15
		SpringApplication.run(FulltextSearchApplication.class, args);
17 16
	}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/IndexController.java
1 1
package cz.zcu.kiv.aswi.fulltextsearch;
2 2

  
3 3
import cz.zcu.kiv.aswi.fulltextsearch.model.*;
4
import org.apache.solr.client.solrj.SolrServerException;
4 5
import org.springframework.http.HttpStatus;
5 6
import org.springframework.http.ResponseEntity;
6 7
import org.springframework.web.bind.annotation.*;
......
8 9
import org.slf4j.Logger;
9 10
import org.slf4j.LoggerFactory;
10 11

  
12
import javax.xml.bind.JAXBException;
13
import javax.xml.stream.XMLStreamException;
14
import java.io.IOException;
11 15
import java.util.ArrayList;
12 16
import java.util.Date;
13 17
import java.util.List;
......
17 21
public class IndexController {
18 22

  
19 23
    private static final Logger LOG = LoggerFactory.getLogger(IndexController.class);
24

  
20 25
    private SolrService solrService = new SolrService();
21 26

  
22 27
    @PostMapping("/")
23
    public SearchResponse index(@RequestBody SearchRequest searchRequest) {
24
        LOG.trace("Method index called");
28
    public SearchResponse search(@RequestBody SearchRequest searchRequest) {
25 29
        try {
26
            LOG.debug("Calling SolrService and its Query method");
27
            return solrService.query(searchRequest);
30
            LOG.trace("Method search called");
31

  
32
            LOG.debug("Calling SolrService and its search method");
33
            return solrService.search(searchRequest);
34
        } catch (SolrServerException e) {
35
            LOG.error("SolrServerException: " + e.toString());
36
        } catch (IOException e) {
37
            LOG.error("IOException: " + e.toString());
28 38
        } catch (Exception e) {
29
            LOG.error("Cannot get method Query from SolrService returning NULL");
30
            LOG.error(e.toString());
31
            return null;
39
            LOG.error("Unknown exception: " + e.toString());
32 40
        }
41

  
42
        LOG.error("Cannot get method search from SolrService returning NULL");
43
        return null;
33 44
    }
34 45

  
35 46
    @PostMapping("/upload")
......
41 52
            int uploaded = solrService.uploadFiles(files);
42 53
            message = "Successfully uploaded " + uploaded + " files";
43 54
            return ResponseEntity.status(HttpStatus.OK).body(new ResponseMessage(message));
55
        } catch (IOException e) {
56
            LOG.error("IOException: " + e.toString());
57
            message = "Could not upload the files! " + e.toString();
58
        } catch (SolrServerException e) {
59
            LOG.error("SolrServerException: " + e.toString());
60
            message = "Could not upload the files! " + e.toString();
61
        } catch (XMLStreamException e) {
62
            LOG.error("XMLStreamException: " + e.toString());
63
            message = "Could not upload the files! " + e.toString();
64
        } catch (JAXBException e) {
65
            LOG.error("JAXBException: " + e.toString());
66
            message = "Could not upload the files! " + e.toString();
44 67
        } catch (Exception e) {
45
            LOG.error("Cannot get method uploadFiles from SolrService");
46
            LOG.error(e.toString());
68
            LOG.error("Unknown exception: " + e.toString());
47 69
            message = "Could not upload the files! " + e.toString();
48
            return ResponseEntity.status(HttpStatus.EXPECTATION_FAILED).body(new ResponseMessage(message));
49 70
        }
71

  
72
        LOG.error("Cannot get method uploadFiles from SolrService");
73
        return ResponseEntity.status(HttpStatus.EXPECTATION_FAILED).body(new ResponseMessage(message));
50 74
    }
51 75

  
52 76
    @GetMapping("/documents")
......
57 81
        try {
58 82
            LOG.debug("Calling SolrService and its listAllFiles method");
59 83
            List<String> filenames = solrService.listAllFiles();
60
            int id = 0; // todo
84
            int id = 0;
61 85
            for (String filename: filenames) {
62
                response.add(new DocumentResponse(filename, id++, new Date(), new Date()));
86
                response.add(new DocumentResponse(filename, id++, new Date()));
63 87
            }
88

  
89
            return response;
90
        } catch (SolrServerException e) {
91
            LOG.error("SolrServerException: " + e.toString());
92
        } catch (IOException e) {
93
            LOG.error("IOException: " + e.toString());
64 94
        } catch (Exception e) {
65
            LOG.error("Cannot get method listAllFiles from SolrService");
66
            LOG.error(e.toString());
95
            LOG.error("Unknown exception: " + e.toString());
67 96
        }
97

  
98
        LOG.error("Cannot get method listAllFiles from SolrService");
68 99
        return response;
69 100
    }
70 101

  
......
76 107
        try {
77 108
            LOG.debug("Calling SolrService and its listSingleFile method");
78 109
            ret = solrService.listSingleFile(documentName);
110
            return new DocumentDetailResponse(ret);
111
        } catch (SolrServerException e) {
112
            LOG.error("SolrServerException: " + e.toString());
113
        } catch (IOException e) {
114
            LOG.error("IOException: " + e.toString());
79 115
        } catch (Exception e) {
80
            LOG.error("Cannot get method listSingleFile from SolrService");
81
            LOG.error(e.toString());
116
            LOG.error("Unknown exception: " + e.toString());
82 117
        }
83
        return new DocumentDetailResponse(ret);
84
    }
85 118

  
86
    @GetMapping("/delete")
87
    public String delete() {
88
        LOG.trace("Method delete called");
89
        String response;
90

  
91
        try {
92
            LOG.debug("Calling SolrService and its deleteAll method");
93
            solrService.deleteAll();
94
            response = "All documents have been deleted.";
95
        } catch (Exception e) {
96
            LOG.error("Cannot get method deleteAll from SolrService");
97
            LOG.error(e.toString());
98
            response = "Unknown exception";
99
        }
100
        return response;
119
        return new DocumentDetailResponse(ret);
101 120
    }
102 121

  
103
    @GetMapping("/info")
104
    public String info() {
105
        LOG.trace("Method info called");
106
        String response;
107

  
108
        try {
109
            LOG.debug("Calling SolrService and its info method");
110
            response = solrService.info();
111
        } catch (Exception e) {
112
            LOG.error("Cannot get method info from SolrService");
113
            LOG.error(e.toString());
114
            response = "Unknown exception";
115
        }
116
        return response;
117
    }
118 122
}
119 123

  
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java
22 22

  
23 23
import javax.imageio.ImageIO;
24 24
import javax.xml.bind.JAXBException;
25
import javax.xml.stream.XMLStreamException;
25 26
import java.awt.image.BufferedImage;
26 27
import java.io.ByteArrayInputStream;
27 28
import java.io.IOException;
......
30 31

  
31 32
public class SolrService {
32 33

  
33
    // for docker
34
    private static final String URL = "http://fulltextsearch-solr";
34
    private static final Logger LOG = LoggerFactory.getLogger(SolrService.class);
35 35

  
36
    // for localhost
37
    //private static final String URL = "http://localhost";
36
    private static final String URL = "http://fulltextsearch-solr";
38 37

  
39 38
    private static final String CORE_NAME_DOCUMENTS = "documents";
40 39
    private static final String CORE_NAME_FILES = "files";
......
49 48
    private static final String FIELD_TEXT_REGION_COORDS = "text_region" + SUFFIX_COORDS;
50 49
    private static final String FIELD_DOC_CONTENT = "doc_content";
51 50
    private static final String FIELD_IMG_CONTENT = "img_content";
52
    private static final float IMG_OUTPUT_QUALITY = 0.5f;
53
    private static final Logger LOG = LoggerFactory.getLogger(SolrService.class);
51
    private static final String FIELD_UPLOADED_DATE = "uploadedDate";
54 52

  
55
    private static final int SEARCH_QUERY_ROWS = 100;
53
    private static final float IMG_OUTPUT_QUALITY = 0.5f;
56 54

  
57 55
    private static final int DEFAULT_LINE_FIELD_COUNT = 150;
58 56
    private static final int DEFAULT_WORD_FIELD_COUNT = 30;
......
63 61
    private HttpSolrClient solrClientFiles;
64 62
    private List<String> fieldNamesDocumentCore = new ArrayList<>();
65 63

  
64
    /**
65
     * Creates SolrService - connects to Solr.
66
     */
66 67
    public SolrService() {
67 68
        connect();
68 69
    }
69 70

  
71
    /**
72
     * Connects to Solr - to both cores documents and files.
73
     * If such cores do not exists, it creates them.
74
     */
70 75
    private void connect() {
71
        LOG.info("Waiting for solr => 10 seconds");
76
        LOG.info("Waiting for Solr => 10 seconds");
72 77
        try {
73 78
            Thread.sleep(10000);
74 79
        } catch (InterruptedException e) {
75
            LOG.error("Cannot sleep thread", e);
80
            LOG.error("Cannot sleep thread, not waiting for Solr!", e);
76 81
        }
77 82

  
78 83
        LOG.info("Connecting to Solr...");
79 84
        String urlString = URL + ":8983/solr/";
80 85

  
81 86
        try {
82
            // documents
87
            // core documents
83 88
            solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
84
            if (CoreAdminRequest.getStatus(CORE_NAME_DOCUMENTS, solrClientDocuments).getCoreStatus(CORE_NAME_DOCUMENTS).size() < 1) {
85
                CoreAdminRequest.createCore(CORE_NAME_DOCUMENTS, CORE_NAME_DOCUMENTS, solrClientDocuments, "conf/solrconfig.xml", "documents-schema.xml");
86
                LOG.info("Core " + CORE_NAME_DOCUMENTS + " has been created.");
87
            } else {
88
                LOG.info("Core " + CORE_NAME_DOCUMENTS + " already exists.");
89
            }
89
            createCore(solrClientDocuments, "documents-schema.xml");
90 90
            solrClientDocuments.setBaseURL(solrClientDocuments.getBaseURL() + "/" +  CORE_NAME_DOCUMENTS);
91 91

  
92
            // files
92
            // core files
93 93
            solrClientFiles = new HttpSolrClient.Builder(urlString).build();
94
            if (CoreAdminRequest.getStatus(CORE_NAME_FILES, solrClientFiles).getCoreStatus(CORE_NAME_FILES).size() < 1) {
95
                CoreAdminRequest.createCore(CORE_NAME_FILES, CORE_NAME_FILES, solrClientFiles, "conf/solrconfig.xml", "files-schema.xml");
96
                LOG.info("Core " + CORE_NAME_FILES + " has been created.");
97
            } else {
98
                LOG.info("Core " + CORE_NAME_FILES + " already exists.");
99
            }
94
            createCore(solrClientFiles, "files-schema.xml");
100 95
            solrClientFiles.setBaseURL(solrClientFiles.getBaseURL() + "/"  + CORE_NAME_FILES);
101 96

  
102 97
            LOG.info("Connected to Solr.");
......
105 100
        }
106 101
    }
107 102

  
108
    public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException {
109
        LOG.trace("SolrService method uploadFiles called");
110
        Map<String, List<MultipartFile>> pairs = new HashMap<>();
111

  
112
        LOG.debug("Preprocessing files for upload to Solr");
113
        for (MultipartFile file: files) {
114
            String filename = file.getOriginalFilename();
115
            if (filename == null) {
116
                LOG.error("Skipping file with empty filename");
117
                continue;
118
            }
119

  
120
            String type = file.getContentType();
121
            if (type == null) {
122
                LOG.error("Skipping file " + filename + " (unknown type)");
123
                continue;
124
            }
125

  
126
            int index = filename.lastIndexOf('.');
127
            String filenameWithoutExtension = filename;
128
            if (index >= 0) {
129
                filenameWithoutExtension = filename.substring(0, index);
130
            }
103
    /**
104
     * Creates core corresponding with given client only if this core doesn't already exists.
105
     *
106
     * @param client client
107
     * @param schemaFilename schema filename
108
     * @throws IOException if IO problem occurs
109
     * @throws SolrServerException if SolrServer problem occurs
110
     */
111
    private void createCore(HttpSolrClient client, String schemaFilename) throws IOException, SolrServerException {
112
        String coreName = (client == solrClientDocuments) ? CORE_NAME_DOCUMENTS : CORE_NAME_FILES;
113

  
114
        if (CoreAdminRequest
115
                .getStatus(coreName, client)
116
                .getCoreStatus(coreName)
117
                .size() < 1) {
118
            CoreAdminRequest.createCore(coreName, coreName, client, "conf/solrconfig.xml", schemaFilename);
119
            LOG.info("Core " + coreName + " has been created.");
120
        } else {
121
            LOG.info("Core " + coreName + " already exists.");
122
        }
123
    }
131 124

  
132
            if (type.equals(MediaType.IMAGE_JPEG_VALUE) || type.equals(MediaType.IMAGE_PNG_VALUE)
133
                    || type.equals(MediaType.APPLICATION_XML_VALUE) || type.equals(MediaType.TEXT_XML_VALUE)) {
125
    /**
126
     * Uploads files to both Solr cores.
127
     *
128
     * @param files files to upload
129
     * @return number of successfully uploaded files
130
     * @throws IOException if IO problem occurs
131
     * @throws SolrServerException if SolrServer problem occurs
132
     * @throws JAXBException if JAXB problem occurs
133
     * @throws XMLStreamException if XMLStream problem occurs
134
     */
135
    public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException, XMLStreamException {
136
        LOG.trace("SolrService method uploadFiles called");
134 137

  
135
                List<MultipartFile> list = pairs.get(filenameWithoutExtension);
136
                if (list == null) {
137
                    list = new LinkedList<>();
138
                    list.add(file);
139
                    pairs.put(filenameWithoutExtension, list);
140
                } else {
141
                    list.add(file);
142
                }
143
                LOG.debug("File " + filename + " added");
144
            } else {
145
                LOG.error("Not supported type of file! File " + filename + " is type " + type);
146
            }
147
        }
138
        Map<String, List<MultipartFile>> pairs = Utils.prepareFiles(files);
148 139
        return addFiles(pairs);
149 140
    }
150 141

  
151
    private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException {
142
    /**
143
     * Adds prepared pairs of files to both Solr cores.
144
     *
145
     * @param pairs pairs of files (image file as PNG or JPEG + XML file)
146
     * @return number of successfully uploaded files
147
     * @throws IOException if IO problem occurs
148
     * @throws SolrServerException if SolrServer problem occurs
149
     * @throws JAXBException if JAXB problem occurs
150
     * @throws XMLStreamException if XMLStream problem occurs
151
     */
152
    private int addFiles(Map<String, List<MultipartFile>> pairs) throws IOException, SolrServerException, JAXBException, XMLStreamException {
152 153
        LOG.trace("SolrService method addFiles called");
153 154
        LOG.debug("Uploading files to Solr");
154
        getFieldNamesFromDocumentCore();
155
        getFieldNamesFromDocumentsCore();
155 156

  
156 157
        int count = 0;
157 158
        Set<String> keys = pairs.keySet();
......
161 162

  
162 163
            MultipartFile first = pair.get(0);
163 164
            if (first.getContentType().equals(MediaType.APPLICATION_XML_VALUE) || first.getContentType().equals(MediaType.TEXT_XML_VALUE)) {
164
                addToFilesCore(first, pair.get(1));
165
                addToCores(first, pair.get(1));
165 166
            } else {
166
                addToFilesCore(pair.get(1), first);
167
                addToCores(pair.get(1), first);
167 168
            }
168 169
            count += 2;
169 170
        }
......
176 177
        return count;
177 178
    }
178 179

  
179
    private void addToFilesCore(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException {
180
        LOG.trace("SolrService method addToFilesCore called");
180
    /**
181
     * Adds both XML and image file to both Solr cores.
182
     *
183
     * @param xmlFile XML file
184
     * @param imageFile image file (JPEG or PNG)
185
     * @throws IOException if IO problem occurs
186
     * @throws SolrServerException if SolrServer problem occurs
187
     * @throws JAXBException if JAXB problem occurs
188
     * @throws XMLStreamException if XMLStream problem occurs
189
     */
190
    private void addToCores(MultipartFile xmlFile, MultipartFile imageFile) throws IOException, SolrServerException, JAXBException, XMLStreamException {
191
        LOG.trace("SolrService method addToCores called");
181 192
        SolrInputDocument solrInputDocument = new SolrInputDocument();
182 193

  
183 194
        LOG.debug("Adding a file pair to File core");
......
186 197
        solrInputDocument.addField(FIELD_DOC_CONTENT, xmlFile.getBytes());
187 198
        solrInputDocument.addField(FIELD_IMG_CONTENT, Base64.getEncoder().encode(imageFile.getBytes()));
188 199

  
200
        // adding to core files
189 201
        solrClientFiles.add(solrInputDocument);
190 202

  
191 203
        LOG.debug("Loading XML file");
192 204
        XMLLoader xmlLoader = new XMLLoader();
193 205
        PcGts doc = xmlLoader.loadFile(xmlFile.getInputStream(), xmlFile.getOriginalFilename());
206

  
207
        // adding to core documents
194 208
        addToDocumentsCore(doc, imageFile.getOriginalFilename());
195 209
    }
196 210

  
197
    private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException {
198
        LOG.trace("SolrService method getFieldNamesFromDocumentCore called");
199
        LOG.debug("Getting field names from Documents core");
211
    /**
212
     * Gets all field names from Solr core documents.
213
     *
214
     * @throws IOException if IO problem occurs
215
     * @throws SolrServerException if SolrServer problem occurs
216
     */
217
    private void getFieldNamesFromDocumentsCore() throws IOException, SolrServerException {
218
        LOG.trace("SolrService method getFieldNamesFromDocumentsCore called");
219
        LOG.debug("Getting field names from Solr core documents");
200 220
        SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
201 221
        SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientDocuments);
202 222
        List<Map<String, Object>> fields = fieldsResponse.getFields();
......
206 226
        }
207 227
    }
208 228

  
209

  
210
    private void addFieldToDocumentCore(String name, SchemaRequest.Update request) {
211
        LOG.trace("SolrService method addFieldToDocumentCore called");
229
    /**
230
     * Adds new field to Solr core documents.
231
     *
232
     * @param name field name
233
     * @param request request
234
     */
235
    private void addFieldToDocumentsCore(String name, SchemaRequest.Update request) {
236
        LOG.trace("SolrService method addFieldToDocumentsCore called");
212 237

  
213 238
        if (fieldNamesDocumentCore.contains(name)) return;
214
        LOG.debug("Adding field \"" + name + "\" to Documents core");
239
        LOG.debug("Adding field \"" + name + "\" to Solr core documents");
215 240
        try {
216 241
            request.process(solrClientDocuments);
217 242
            fieldNamesDocumentCore.add(name);
218 243
        } catch (Exception e) {
219
            LOG.error("Couldn't add field = " + name);
244
            LOG.error("Couldn't add field = " + name + " to core documents!");
220 245
            LOG.error(e.toString());
221 246
        }
222 247
    }
223 248

  
224
    private Map<String, Object> getFieldAttributes(String name) {
225
        Map<String, Object> fieldAttributes = new LinkedHashMap<>();
226
        fieldAttributes.put("name", name);
227
        fieldAttributes.put("type", "text_general");
228
        fieldAttributes.put("stored", true);
229
        fieldAttributes.put("multiValued", false);
230
        return fieldAttributes;
231
    }
232

  
233
    private SchemaRequest.Update getAddTextFieldRequest(String name) {
234
        LOG.trace("SolrService method getAddTextFieldRequest called");
235
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
236
        return new SchemaRequest.AddField(fieldAttributes);
237
    }
238

  
239
    private SchemaRequest.Update getNotIndexedAddTextFieldRequest(String name) {
240
        LOG.trace("SolrService method getAddTextFieldRequest called");
241
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
242
        fieldAttributes.put("indexed", false);
243
        return new SchemaRequest.AddField(fieldAttributes);
244
    }
245

  
246
    public void deleteAll() throws IOException, SolrServerException {
247
        LOG.trace("SolrService method getAddTextFieldRequest called");
248
        LOG.info("Deleting ALL from Documents core");
249
        solrClientDocuments.deleteByQuery("*:*");
250
        solrClientDocuments.commit();
251

  
252
        LOG.info("Deleting ALL from Files core");
253
        solrClientFiles.deleteByQuery("*:*");
254
        solrClientFiles.commit();
255
    }
256

  
249
    /**
250
     * Return image as bytes from Solr core files.
251
     *
252
     * @param filename image filename
253
     * @return image bytes
254
     * @throws IOException if IO problem occurs
255
     * @throws SolrServerException if SolrServer problem occurs
256
     */
257 257
    public byte[] getImageFromFilesCore(String filename) throws IOException, SolrServerException {
258 258
        LOG.trace("SolrService method getImageFromFilesCore called");
259 259
        LOG.debug("Getting an image from Files core");
260 260
        SolrQuery solrQuery = new SolrQuery();
261 261
        solrQuery.set("q", FIELD_IMG_FILENAME + ":" + "\"" + filename + "\"");
262

  
262 263
        SolrDocumentList results = solrClientFiles.query(solrQuery).getResults();
263 264
        if (results.getNumFound() < 1) {
264
            LOG.error("Image " + filename + " not found!");
265
            LOG.error("Image " + filename + " not found! Returning NULL.");
266
            return null;
265 267
        } else if (results.getNumFound() > 1) {
266
            LOG.error("Image " + filename + " has multiple files. Using first one...");
268
            LOG.warn("Image " + filename + " has multiple files. Using first one...");
267 269
        }
268 270

  
269 271
        SolrDocument result = results.get(0);
270 272
        return (byte[]) result.getFieldValue(FIELD_IMG_CONTENT);
271 273
    }
272 274

  
273
    public SearchResponse query(SearchRequest searchRequest) throws IOException, SolrServerException  {
274
        LOG.trace("SolrService method query called");
275
        LOG.debug("Processing query - creating Solr query");
275
    /**
276
     * Calls Solr query and returns found text regions.
277
     *
278
     * @param searchRequest request
279
     * @return response
280
     * @throws IOException if IO problem occurs
281
     * @throws SolrServerException if SolrServer problem occurs
282
     */
283
    public SearchResponse search(SearchRequest searchRequest) throws IOException, SolrServerException  {
284
        LOG.trace("SolrService method search called");
285
        LOG.debug("Processing search request - creating Solr query");
276 286

  
277 287
        String query = searchRequest.getExpression().toLowerCase();
278 288
        SolrQuery solrQuery = new SolrQuery();
......
280 290
        solrQuery.setHighlight(true);
281 291
        solrQuery.addHighlightField(PREFIX_TEXT_LINE +  "*");
282 292
        solrQuery.addHighlightField(PREFIX_TEXT_WORD +  "*");
283
        solrQuery.setRows(SEARCH_QUERY_ROWS);
284 293
        solrQuery.addSort(FIELD_DOC_FILENAME, SolrQuery.ORDER.asc);
285 294
        LOG.debug("Processing query - sending Solr query");
286 295
        QueryResponse response = solrClientDocuments.query(solrQuery);
......
291 300
        // if no documents found, try spell checking and call query again with first alternative
292 301
        if (searchRequest.isEnableSpellCheck() && docList.size() < 1) {
293 302
            LOG.info("No documents found, trying spell check...");
294
            SpellCheckResponse spellCheck = response.getSpellCheckResponse();
295

  
296
            if (spellCheck != null && spellCheck.getSuggestionMap().values().size() > 0) {
297
                SpellCheckResponse.Suggestion first = spellCheck.getSuggestionMap().values().iterator().next();
298
                List<String> alternatives = first.getAlternatives();
299 303

  
300
                if (alternatives.size() > 0) {
301
                    String alternative = alternatives.get(0);
302
                    LOG.info("Spell check found alternative \"" + alternative + "\", calling query again with this alternative as parameter.");
303
                    return query(new SearchRequest(alternative, searchRequest.getTimestamp(), false));
304
                }
304
            SpellCheckResponse spellCheck = response.getSpellCheckResponse();
305
            String firstAlternative = Utils.getFirstAlternative(spellCheck);
306
            if (firstAlternative != null && !firstAlternative.isEmpty()) {
307
                LOG.info("Spell check found alternative \"" + firstAlternative + "\", calling query again with this alternative as parameter.");
308
                return search(new SearchRequest(firstAlternative, searchRequest.getTimestamp(), false));
309
            } else {
310
                LOG.info("Spell check found no alternative.");
305 311
            }
306

  
307
            LOG.info("Spell check found no alternative.");
308 312
        }
309 313

  
310
        SearchResponse searchResponse = new SearchResponse();
311
        searchResponse.setExpression(query);
312

  
313 314
        LOG.debug("Processing query - creating Search response");
314 315
        List<TextRegion> textRegions = new LinkedList<>();
315 316
        for (SolrDocument solrDocument: docList) {
......
344 345
            int y2 = textRegion.getRegionCoords().getPoints()[2].getY();
345 346

  
346 347
            if (wordNames.size() == 1 || (wordNames.size() == 0 && linesNames.size() == 1)) {
347
                String coordsValue = "";
348
                String coordsValue;
348 349

  
349 350
                if (wordNames.size() == 1) {
350 351
                    coordsValue = (String) solrDocument.getFieldValue(wordNames.stream().findFirst().get() + SUFFIX_COORDS);
351

  
352 352
                } else{
353 353
                    coordsValue = (String) solrDocument.getFieldValue(linesNames.stream().findFirst().get() + SUFFIX_COORDS);
354 354
                }
......
454 454
            textRegions.add(textRegion);
455 455
        }
456 456

  
457
        // creating search response
458
        SearchResponse searchResponse = new SearchResponse();
459
        searchResponse.setExpression(query);
457 460
        searchResponse.setTextRegions(textRegions.toArray(new TextRegion[0]));
458 461
        LOG.debug("Processing query - sending Search response");
459 462
        LOG.info("Query received and processed");
460 463
        return searchResponse;
461 464
    }
462 465

  
466
    /**
467
     * Adds document to Solr core documents.
468
     *
469
     * @param document  document to add
470
     * @param imageFilename image filenama
471
     * @throws IOException if IO problem occurs
472
     * @throws SolrServerException if SolrServer problem occurs
473
     */
463 474
    private void addToDocumentsCore(PcGts document, String imageFilename) throws IOException, SolrServerException {
464 475
        LOG.trace("SolrService method addToDocumentsCore called");
465 476
        LOG.debug("Adding to Documents core");
......
470 481
            TextRegion textRegion = regions.get(i);
471 482
            SolrInputDocument solrInputDocument = new SolrInputDocument();
472 483

  
473
            solrInputDocument.addField(FIELD_DOC_FILENAME, document.getFilename());
484
            solrInputDocument.addField(FIELD_DOC_FILENAME, document.getDocumentName());
474 485
            solrInputDocument.addField(FIELD_IMG_FILENAME, imageFilename);
475 486
            solrInputDocument.addField(FIELD_TEXT_REGION, textRegion.getTextEquiv().getUnicode());
476 487
            solrInputDocument.addField(FIELD_TEXT_REGION_COORDS, textRegion.getRegionCoords().getPointsString());
......
482 493
                String prefix = PREFIX_TEXT_LINE + j;
483 494

  
484 495
                if (j >= DEFAULT_LINE_FIELD_COUNT) {
485
                    addFieldToDocumentCore(prefix, getAddTextFieldRequest(prefix));
486
                    addFieldToDocumentCore(prefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
496
                    addFieldToDocumentsCore(prefix, Utils.getAddTextFieldRequest(prefix));
497
                    addFieldToDocumentsCore(prefix + SUFFIX_COORDS, Utils.getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
487 498
                }
488 499

  
489 500
                solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
......
496 507
                    String wordPrefix = PREFIX_TEXT_WORD + k + "_" + prefix;
497 508

  
498 509
                    if (k >= DEFAULT_WORD_FIELD_COUNT) {
499
                        addFieldToDocumentCore(wordPrefix, getAddTextFieldRequest(wordPrefix));
500
                        addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
510
                        addFieldToDocumentsCore(wordPrefix, Utils.getAddTextFieldRequest(wordPrefix));
511
                        addFieldToDocumentsCore(wordPrefix + SUFFIX_COORDS, Utils.getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
501 512
                    }
502 513

  
503 514
                    solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode());
......
508 519
        }
509 520
    }
510 521

  
511
    public String info() throws IOException, SolrServerException {
512
        LOG.trace("SolrService method info called");
513
        LOG.debug("Displaying information about files in Solr");
514
        SolrQuery q = new SolrQuery("*:*");
515
        q.setRows(0);  // don't actually request any data
516
        return "Number of documents in " + CORE_NAME_DOCUMENTS + " core: " + solrClientDocuments.query(q).getResults().getNumFound() +
517
                "\nNumber of documents in " + CORE_NAME_FILES + " core: " + solrClientFiles.query(q).getResults().getNumFound();
518
    }
519

  
520 522
    public List<String> listAllFiles() throws IOException, SolrServerException {
521 523
        LOG.trace("SolrService method listAllFiles called");
522 524
        LOG.debug("Displaying ALL files in Solr");
523 525
        SolrQuery solrQuery = new SolrQuery();
524 526
        solrQuery.set("q", FIELD_DOC_FILENAME + ":*");
525
        solrQuery.setRows(SEARCH_QUERY_ROWS);
526 527
        SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults();
527 528
        return docList.stream().map(doc -> doc.getFieldValue(FIELD_DOC_FILENAME).toString().replaceAll(".xml", "")).collect(Collectors.toList());
528 529
    }
......
531 532
        LOG.trace("SolrService method listSingleFile called");
532 533
        LOG.debug("Displaying single file in Solr");
533 534
        SolrQuery solrQuery = new SolrQuery();
534
        //solrQuery.set("q", FIELD_DOC_FILENAME + ":*");
535 535
        solrQuery.set("q", FIELD_DOC_FILENAME + ":" + documentName + ".xml");
536
        solrQuery.setRows(SEARCH_QUERY_ROWS);
536
        solrQuery.setRows(1); // return only one file
537 537
        SolrDocumentList docList = solrClientFiles.query(solrQuery).getResults();
538 538

  
539 539
        String imgFilename = (String) docList.get(0).getFieldValue(FIELD_IMG_FILENAME);
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/Utils.java
1 1
package cz.zcu.kiv.aswi.fulltextsearch;
2 2

  
3
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
4
import org.apache.solr.client.solrj.response.SpellCheckResponse;
5
import org.slf4j.Logger;
6
import org.slf4j.LoggerFactory;
7
import org.springframework.http.MediaType;
8
import org.springframework.web.multipart.MultipartFile;
9

  
3 10
import java.nio.charset.StandardCharsets;
4
import java.util.Random;
11
import java.util.*;
5 12

  
6 13
public class Utils {
7 14

  
15
    private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
16

  
17
    /**
18
     * Generates random string id.
19
     *
20
     * @param length    length
21
     * @return random string id
22
     */
8 23
    public static String generateRandomStringId(int length) {
9 24

  
10 25
        // length is bounded by 256 Character
......
33 48

  
34 49
        return r.toString();
35 50
    }
51

  
52
    /**
53
     * Creates pairs image+XML from given files.
54
     *
55
     * @param files files to prepare
56
     * @return pairs of files (key = filename without extension, value = XML file and image file)
57
     */
58
    public static Map<String, List<MultipartFile>> prepareFiles(List<MultipartFile> files) {
59
        LOG.debug("Preprocessing files for upload to Solr");
60

  
61
        Map<String, List<MultipartFile>> pairs = new HashMap<>();
62
        for (MultipartFile file: files) {
63
            String filename = file.getOriginalFilename();
64
            if (filename == null) {
65
                LOG.error("Skipping file with empty filename");
66
                continue;
67
            }
68

  
69
            String type = file.getContentType();
70
            if (type == null) {
71
                LOG.error("Skipping file " + filename + " (unknown type)");
72
                continue;
73
            }
74

  
75
            int index = filename.lastIndexOf('.');
76
            String filenameWithoutExtension = filename;
77
            if (index >= 0) {
78
                filenameWithoutExtension = filename.substring(0, index);
79
            }
80

  
81
            if (type.equals(MediaType.IMAGE_JPEG_VALUE) || type.equals(MediaType.IMAGE_PNG_VALUE)
82
                    || type.equals(MediaType.APPLICATION_XML_VALUE) || type.equals(MediaType.TEXT_XML_VALUE)) {
83

  
84
                List<MultipartFile> list = pairs.get(filenameWithoutExtension);
85
                if (list == null) {
86
                    list = new LinkedList<>();
87
                    list.add(file);
88
                    pairs.put(filenameWithoutExtension, list);
89
                } else {
90
                    list.add(file);
91
                }
92
                LOG.debug("File " + filename + " added");
93
            } else {
94
                LOG.error("Not supported type of file! File " + filename + " is type " + type);
95
            }
96
        }
97

  
98
        return pairs;
99
    }
100

  
101
    /**
102
     * Returns field attributes.
103
     *
104
     * @param name filed name
105
     * @return attributes
106
     */
107
    private static Map<String, Object> getFieldAttributes(String name) {
108
        Map<String, Object> fieldAttributes = new LinkedHashMap<>();
109
        fieldAttributes.put("name", name);
110
        fieldAttributes.put("type", "text_general");
111
        fieldAttributes.put("stored", true);
112
        fieldAttributes.put("multiValued", false);
113
        return fieldAttributes;
114
    }
115

  
116
    /**
117
     * Returns schema request for adding text field.
118
     *
119
     * @param name field name
120
     * @return schema request
121
     */
122
    public static SchemaRequest.Update getAddTextFieldRequest(String name) {
123
        LOG.trace("SolrService method getAddTextFieldRequest called");
124
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
125
        return new SchemaRequest.AddField(fieldAttributes);
126
    }
127

  
128
    /**
129
     * Returns schema request for adding text field which won't be indexed.
130
     *
131
     * @param name field name
132
     * @return schema request
133
     */
134
    public static SchemaRequest.Update getNotIndexedAddTextFieldRequest(String name) {
135
        LOG.trace("SolrService method getAddTextFieldRequest called");
136
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
137
        fieldAttributes.put("indexed", false);
138
        return new SchemaRequest.AddField(fieldAttributes);
139
    }
140

  
141
    /**
142
     * Returns first alternative according to Solr spellcheck response
143
     * or NULL if such alternative doesn't exists.
144
     *
145
     * @param spellCheck spellcheck response
146
     * @return first alternative
147
     */
148
    public static String getFirstAlternative(SpellCheckResponse spellCheck) {
149
        if (spellCheck != null && spellCheck.getSuggestionMap().values().size() > 0) {
150
            SpellCheckResponse.Suggestion first = spellCheck.getSuggestionMap().values().iterator().next();
151
            List<String> alternatives = first.getAlternatives();
152

  
153
            if (alternatives.size() > 0) {
154
                return alternatives.get(0);
155
            }
156
        }
157

  
158
        return null;
159
    }
160

  
36 161
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/XMLLoader.java
5 5
import javax.xml.bind.JAXBContext;
6 6
import javax.xml.bind.JAXBException;
7 7
import javax.xml.bind.Unmarshaller;
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLStreamException;
10
import javax.xml.stream.XMLStreamReader;
11
import javax.xml.stream.util.StreamReaderDelegate;
8 12
import java.io.InputStream;
9 13
import java.util.logging.Level;
10 14

  
11 15
public class XMLLoader {
12 16

  
13
    public PcGts loadFile(InputStream file, String filename) throws JAXBException {
17
    /**
18
     * Parses XML file and returns it as object.
19
     *
20
     * @param file      XML file
21
     * @param filename  filename
22
     * @return parsed XML
23
     * @throws JAXBException if JAXB problem occurs
24
     * @throws XMLStreamException if XMLStream problem occurs
25
     */
26
    public PcGts loadFile(InputStream file, String filename) throws JAXBException, XMLStreamException {
14 27
        JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class);
15 28
        Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
29
        XMLStreamReader xmlStreamReader = XMLInputFactory.newFactory().createXMLStreamReader(file);
16 30

  
17 31
        // ignoring unused elements
18 32
        java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST);
19 33
        jaxbUnmarshaller.setEventHandler(event -> event.getMessage().toLowerCase().contains("unexpected element"));
20 34

  
21
        PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file);
22
        document.setFilename(filename);
35
        XMLReaderWithoutNamespace xmlReaderWithoutNamespace = new XMLReaderWithoutNamespace(xmlStreamReader);
36
        PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(xmlReaderWithoutNamespace);
37
        document.setDocumentName(filename);
23 38

  
24 39
        return document;
25 40
    }
26 41

  
42
    class XMLReaderWithoutNamespace extends StreamReaderDelegate {
43
        public XMLReaderWithoutNamespace(XMLStreamReader reader) {
44
            super(reader);
45
        }
46

  
47
        @Override
48
        public String getAttributeNamespace(int arg0) {
49
            return "";
50
        }
51
        @Override
52
        public String getNamespaceURI() {
53
            return "";
54
        }
55
    }
56

  
27 57
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Coords.java
25 25
        this.points = points;
26 26
    }
27 27

  
28
    public String getPointsString() {
29
        return pointsString;
30
    }
31

  
32
    public void setPointsString(String pointsString) {
33
        this.pointsString = pointsString;
34
    }
35

  
36
    public Point[] getPoints() {
37
        return points;
38
    }
39

  
40
    public void setPoints(Point[] points) {
41
        this.points = points;
42
    }
43

  
44
    @Override
45
    public String toString() {
46
        return this.pointsString;
47
    }
48

  
28
    /**
29
     * Parses string of points into array of Point objects.
30
     *
31
     * @param pointsString points as string
32
     * @return array of Point objects
33
     */
49 34
    public static Point[] parsePointString(String pointsString) {
50 35
        List<Point> pointList = new LinkedList<>();
51 36
        if (pointsString == null || pointsString.isEmpty()) return pointList.toArray(new Point[0]);
......
64 49

  
65 50
        return pointList.toArray(new Point[0]);
66 51
    }
52

  
53
    @Override
54
    public String toString() {
55
        return this.pointsString;
56
    }
57

  
58
    public String getPointsString() {
59
        return pointsString;
60
    }
61

  
62
    public void setPointsString(String pointsString) {
63
        this.pointsString = pointsString;
64
    }
65

  
66
    public Point[] getPoints() {
67
        return points;
68
    }
69

  
70
    public void setPoints(Point[] points) {
71
        this.points = points;
72
    }
73

  
67 74
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Page.java
11 11
    @XmlElement(name = "TextRegion")
12 12
    private List<TextRegion> textRegions;
13 13

  
14
    private String filename;
14
    /** document name (including extension) */
15
    private String documentName;
16

  
17
    public void setDocumentName(String documentName) {
18
        this.documentName = documentName;
19
        if (this.textRegions == null) return;
20

  
21
        for (int i = 0; i < textRegions.size(); i++) {
22
            TextRegion textRegion = textRegions.get(i);
23
            textRegion.setDocumentName(documentName);
24
        }
25
    }
15 26

  
16 27
    public List<TextRegion> getTextRegions() {
17 28
        return textRegions;
......
21 32
        this.textRegions = textRegions;
22 33
    }
23 34

  
24
    public String getFilename() {
25
        return filename;
35
    /**
36
     * Return document name (including extension).
37
     *
38
     * @return document name (including extension)
39
     */
40
    public String getDocumentName() {
41
        return documentName;
26 42
    }
27 43

  
28
    public void setFilename(String filename) {
29
        this.filename = filename;
30
        if (this.textRegions == null) return;
31

  
32
        for (int i = 0; i < textRegions.size(); i++) {
33
            TextRegion textRegion = textRegions.get(i);
34
            textRegion.setDocumentName(filename);
35
        }
36
    }
37 44
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/PcGts.java
12 12
    @XmlElement(name = "Page")
13 13
    private Page page;
14 14

  
15
    private String filename;
15
    /** document name (including extension) */
16
    private String documentName;
17

  
18
    /**
19
     * Sets given document name to this object, page and all text regions.
20
     *
21
     * @param documentName document name (including extension)
22
     */
23
    public void setDocumentName(String documentName) {
24
        this.documentName = documentName;
25
        this.page.setDocumentName(documentName);
26
    }
16 27

  
17 28
    public Page getPage() {
18 29
        return page;
......
22 33
        this.page = page;
23 34
    }
24 35

  
25
    public String getFilename() {
26
        return filename;
36
    /**
37
     * Return document name (including extension).
38
     *
39
     * @return document name (including extension)
40
     */
41
    public String getDocumentName() {
42
        return documentName;
27 43
    }
28 44

  
29
    public void setFilename(String filename) {
30
        this.filename = filename;
31
        this.page.setFilename(filename);
32
    }
33 45
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextRegion.java
13 13
    private Coords regionCoords;
14 14
    private String regionText;
15 15
    private TextLine[] textLines;
16
    /** document name (including extension) */
16 17
    private String documentName;
17 18
    private DocumentSize documentSize;
18 19
    private String randomId;
......
47 48
        this.textLines = textLines;
48 49
    }
49 50

  
51
    /**
52
     * Return document name (including extension).
53
     *
54
     * @return document name (including extension)
55
     */
50 56
    public String getDocumentName() {
51 57
        return documentName;
52 58
    }
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/package-info.java
1
@XmlSchema(
2
        namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15",
3
        elementFormDefault = XmlNsForm.QUALIFIED)
4
package cz.zcu.kiv.aswi.fulltextsearch.document;
5

  
6
import javax.xml.bind.annotation.XmlNsForm;
7
import javax.xml.bind.annotation.XmlSchema;
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/model/DocumentResponse.java
6 6

  
7 7
    private String documentName;
8 8
    private int id;
9
    private Date originDate;
10 9
    private Date uploadedDate;
11 10

  
12
    public DocumentResponse(String documentName, int id, Date originDate, Date uploadedDate) {
11
    public DocumentResponse(String documentName, int id, Date uploadedDate) {
13 12
        this.documentName = documentName;
14 13
        this.id = id;
15
        this.originDate = originDate;
16 14
        this.uploadedDate = uploadedDate;
17 15
    }
18 16

  
......
32 30
        this.id = id;
33 31
    }
34 32

  
35
    public Date getOriginDate() {
36
        return originDate;
37
    }
38

  
39
    public void setOriginDate(Date originDate) {
40
        this.originDate = originDate;
41
    }
42

  
43 33
    public Date getUploadedDate() {
44 34
        return uploadedDate;
45 35
    }
be/fulltextsearch/src/test/java/cz/zcu/kiv/aswi/fulltextsearch/HttpRequestTest.java
22 22

  
23 23
    @Test
24 24
    public void infoTest() throws Exception {
25
        assertThat(this.restTemplate.getForObject("http://localhost:" + port + "/info",
26
                String.class)).contains(controller.info());
25
        /*assertThat(this.restTemplate.getForObject("http://localhost:" + port + "/info",
26
                String.class)).contains(controller.info());*/
27 27
    }
28 28

  
29 29
}
solr/core-configs/documents/conf/solrconfig.xml
697 697
      -->
698 698
    <lst name="defaults">
699 699
      <str name="echoParams">explicit</str>
700
      <int name="rows">10</int>
701
	  
700
      <int name="rows">100</int>	 
702 701
	  <str name="df">text_region</str>	  
703 702
	  
703
	  <str name="spellcheck">true</str>	  
704 704
	  <str name="spellcheck.dictionary">default</str>
705
      <str name="spellcheck">on</str>
706
      <str name="spellcheck.extendedResults">true</str>
707
      <str name="spellcheck.count">10</str>
708
      <str name="spellcheck.alternativeTermCount">5</str>
709
      <str name="spellcheck.maxResultsForSuggest">5</str>
710
      <str name="spellcheck.collate">true</str>
711
      <str name="spellcheck.collateExtendedResults">true</str>
712
      <str name="spellcheck.maxCollationTries">10</str>
713
      <str name="spellcheck.maxCollations">5</str>
705
	  <str name="spellcheck.count">1</str>       
714 706
    </lst>
715 707
    <arr name="last-components">
716 708
      <str>spellcheck</str>

Také k dispozici: Unified diff