Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 6b3bc31e

Přidáno uživatelem Jitka Poubová před téměř 5 roky(ů)

Re #7918: Zrychlení nahrávání dokumentů
- vytvoření vlastních schémat (schema.xml) pro Solr
- neindexování polí se souřadnicemi

Zobrazit rozdíly:

be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java
6 6
import org.apache.solr.client.solrj.SolrQuery;
7 7
import org.apache.solr.client.solrj.SolrServerException;
8 8
import org.apache.solr.client.solrj.impl.HttpSolrClient;
9
import org.apache.solr.client.solrj.request.CoreAdminRequest;
9 10
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
10 11
import org.apache.solr.client.solrj.response.QueryResponse;
11 12
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
......
47 48
    private static final String FIELD_DOC_CONTENT = "doc_content";
48 49
    private static final String FIELD_IMG_CONTENT = "img_content";
49 50
    private static final float IMG_OUTPUT_QUALITY = 0.5f;
50
    private static final Logger LOG = LoggerFactory.getLogger(IndexController.class);
51
    private static final Logger LOG = LoggerFactory.getLogger(SolrService.class);
51 52

  
52 53
    private static final int SEARCH_QUERY_ROWS = 100;
53 54

  
......
61 62

  
62 63
    private void connect() {
63 64
        LOG.info("Connecting to Solr");
64
        String urlString = URL + ":8983/solr/" + CORE_NAME_DOCUMENTS;
65
        solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
65
        String urlString = URL + ":8983/solr/";
66 66

  
67
        urlString = URL + ":8983/solr/" + CORE_NAME_FILES;
68
        solrClientFiles = new HttpSolrClient.Builder(urlString).build();
67
        try {
68
            solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
69
            CoreAdminRequest.createCore(CORE_NAME_DOCUMENTS, CORE_NAME_DOCUMENTS, solrClientDocuments, "conf/solrconfig.xml", "documents-schema.xml");
70
            solrClientDocuments.setBaseURL(solrClientDocuments.getBaseURL() + "/" +  CORE_NAME_DOCUMENTS);
71

  
72
            solrClientFiles = new HttpSolrClient.Builder(urlString).build();
73
            CoreAdminRequest.createCore(CORE_NAME_FILES, CORE_NAME_FILES, solrClientFiles, "conf/solrconfig.xml", "files-schema.xml");
74
            solrClientFiles.setBaseURL(solrClientFiles.getBaseURL() + "/"  + CORE_NAME_FILES);
75
        } catch (SolrServerException e) {
76
            e.printStackTrace();
77
        } catch (IOException e) {
78
            e.printStackTrace();
79
        }
69 80
    }
70 81

  
71 82
    public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException {
......
210 221
    private void addBasicFieldsToDocumentsCore() {
211 222
        LOG.trace("SolrService method addBasicFieldsToDocumentsCore called");
212 223
        LOG.debug("Adding basic fields to Documents core");
213
        addFieldToDocumentCore(FIELD_DOC_FILENAME);
214
        addFieldToDocumentCore(FIELD_IMG_FILENAME);
215
        addFieldToDocumentCore(FIELD_TEXT_REGION);
216
        addFieldToDocumentCore(FIELD_TEXT_REGION_COORDS);
224
        addFieldToDocumentCore(FIELD_DOC_FILENAME, getAddTextFieldRequest(FIELD_DOC_FILENAME));
225
        addFieldToDocumentCore(FIELD_IMG_FILENAME, getAddTextFieldRequest(FIELD_IMG_FILENAME));
226
        addFieldToDocumentCore(FIELD_TEXT_REGION, getAddTextFieldRequest(FIELD_TEXT_REGION));
227
        addFieldToDocumentCore(FIELD_TEXT_REGION_COORDS, getNotIndexedAddTextFieldRequest(FIELD_TEXT_REGION_COORDS));
217 228
    }
218 229

  
219 230
    private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException {
......
229 240
    }
230 241

  
231 242

  
232
    private void addFieldToDocumentCore(String name) {
243
    private void addFieldToDocumentCore(String name, SchemaRequest.Update request) {
233 244
        LOG.trace("SolrService method addFieldToDocumentCore called");
234 245

  
235 246
        if (fieldNamesDocumentCore.contains(name)) return;
236
        LOG.debug("Adding field to Documents core");
247
        LOG.debug("Adding field \"" + name + "\" to Documents core");
237 248
        try {
238
            SchemaRequest.Update request = getAddTextFieldRequest(name);
239 249
            request.process(solrClientDocuments);
240 250
            fieldNamesDocumentCore.add(name);
241 251
        } catch (Exception e) {
......
244 254
        }
245 255
    }
246 256

  
247
    private SchemaRequest.Update getAddTextFieldRequest(String name) {
248
        LOG.trace("SolrService method getAddTextFieldRequest called");
257
    private Map<String, Object> getFieldAttributes(String name) {
249 258
        Map<String, Object> fieldAttributes = new LinkedHashMap<>();
250 259
        fieldAttributes.put("name", name);
251 260
        fieldAttributes.put("type", "text_general");
252 261
        fieldAttributes.put("stored", true);
253 262
        fieldAttributes.put("multiValued", false);
263
        return fieldAttributes;
264
    }
265

  
266
    private SchemaRequest.Update getAddTextFieldRequest(String name) {
267
        LOG.trace("SolrService method getAddTextFieldRequest called");
268
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
269
        return new SchemaRequest.AddField(fieldAttributes);
270
    }
271

  
272
    private SchemaRequest.Update getNotIndexedAddTextFieldRequest(String name) {
273
        LOG.trace("SolrService method getAddTextFieldRequest called");
274
        Map<String, Object> fieldAttributes = getFieldAttributes(name);
275
        fieldAttributes.put("indexed", false);
254 276
        return new SchemaRequest.AddField(fieldAttributes);
255 277
    }
256 278

  
......
439 461
                TextLine textLine = textLines[j];
440 462
                String prefix = PREFIX_TEXT_LINE + j;
441 463

  
442
                addFieldToDocumentCore(prefix);
464
                addFieldToDocumentCore(prefix, getAddTextFieldRequest(prefix));
443 465
                solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
444 466

  
445
                addFieldToDocumentCore(prefix + SUFFIX_COORDS);
467
                addFieldToDocumentCore(prefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
446 468
                solrInputDocument.addField(prefix + SUFFIX_COORDS, textLine.getLineCoords().getPointsString());
447 469

  
448 470
                TextWord[] textWords = textLine.getTextWords();
......
451 473
                    TextWord textWord = textWords[k];
452 474
                    String wordPrefix = PREFIX_TEXT_WORD + k + "_" + prefix;
453 475

  
454
                    addFieldToDocumentCore(wordPrefix);
476
                    addFieldToDocumentCore(wordPrefix, getAddTextFieldRequest(wordPrefix));
455 477
                    solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode());
456 478

  
457
                    addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS);
479
                    addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
458 480
                    solrInputDocument.addField(wordPrefix + SUFFIX_COORDS, textWord.getWordCoords().getPointsString());
459 481
                }
460 482
            }

Také k dispozici: Unified diff