Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 7eddfeab

Přidáno uživatelem Jitka Poubová před téměř 5 roky(ů)

Re #7918: Zrychlení nahrávání dokumentů
- ověření před vytvářením cores v Solru, jestli už neexistují
- úprava readme

Zobrazit rozdíly:

README.md
51 51
# Database - Solr
52 52
## Solr - spuštění lokálně
53 53
- stáhnout si Solr [zde](https://lucene.apache.org/solr/downloads.html)
54
- přesunout se do složky ...\solr-8.4.1\bin 
54
- do složky ...\solr-8.4.1\server\solr přesunout celý obsah projektové složky aswi2020anonymous\solr\core-configs
55
- přesunout se do složky ...\solr-8.4.1\bin
55 56
- nastartovat Solr příkazem `solr start`
56
- vytvořit core "documents" příkazem `solr create -c documents`
57 57
## Solr - spuštění v dockeru
58 58
- nainstalovat docker viz Návody
59 59
- nastartovat docker 
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java
52 52

  
53 53
    private static final int SEARCH_QUERY_ROWS = 100;
54 54

  
55
    private static final int DEFAULT_LINE_FIELD_COUNT = 150;
56
    private static final int DEFAULT_WORD_FIELD_COUNT = 30;
57

  
55 58
    private HttpSolrClient solrClientDocuments;
56 59
    private HttpSolrClient solrClientFiles;
57 60
    private List<String> fieldNamesDocumentCore = new ArrayList<>();
......
61 64
    }
62 65

  
63 66
    private void connect() {
64
        LOG.info("Connecting to Solr");
67
        LOG.info("Connecting to Solr...");
65 68
        String urlString = URL + ":8983/solr/";
66 69

  
67 70
        try {
68
            LOG.info("Before HttpSolrClient.Builder");
71
            // documents
69 72
            solrClientDocuments = new HttpSolrClient.Builder(urlString).build();
70
            LOG.info("After HttpSolrClient.Builder");
71

  
72
            LOG.info("Before Create Core");
73
            CoreAdminRequest.createCore(CORE_NAME_DOCUMENTS, CORE_NAME_DOCUMENTS, solrClientDocuments, "conf/solrconfig.xml", "documents-schema.xml");
74
            LOG.info("After Create Core");
75

  
76
            LOG.info("Before setBase URL");
73
            if (CoreAdminRequest.getStatus(CORE_NAME_DOCUMENTS, solrClientDocuments).getCoreStatus(CORE_NAME_DOCUMENTS).size() < 1) {
74
                CoreAdminRequest.createCore(CORE_NAME_DOCUMENTS, CORE_NAME_DOCUMENTS, solrClientDocuments, "conf/solrconfig.xml", "documents-schema.xml");
75
                LOG.info("Core " + CORE_NAME_DOCUMENTS + " has been created.");
76
            } else {
77
                LOG.info("Core " + CORE_NAME_DOCUMENTS + " already exists.");
78
            }
77 79
            solrClientDocuments.setBaseURL(solrClientDocuments.getBaseURL() + "/" +  CORE_NAME_DOCUMENTS);
78
            LOG.info("After setBase URL");
79

  
80 80

  
81
            // files
81 82
            solrClientFiles = new HttpSolrClient.Builder(urlString).build();
82
            CoreAdminRequest.createCore(CORE_NAME_FILES, CORE_NAME_FILES, solrClientFiles, "conf/solrconfig.xml", "files-schema.xml");
83
            if (CoreAdminRequest.getStatus(CORE_NAME_FILES, solrClientFiles).getCoreStatus(CORE_NAME_FILES).size() < 1) {
84
                CoreAdminRequest.createCore(CORE_NAME_FILES, CORE_NAME_FILES, solrClientFiles, "conf/solrconfig.xml", "files-schema.xml");
85
                LOG.info("Core " + CORE_NAME_FILES + " has been created.");
86
            } else {
87
                LOG.info("Core " + CORE_NAME_FILES + " already exists.");
88
            }
83 89
            solrClientFiles.setBaseURL(solrClientFiles.getBaseURL() + "/"  + CORE_NAME_FILES);
84
            LOG.info("After try");
85
        } catch (SolrServerException e) {
86
            LOG.error("SolrServerException: " + e);
87
            e.printStackTrace();
88
        } catch (IOException e) {
89
            LOG.error("IOException: " + e);
90
            e.printStackTrace();
90

  
91
            LOG.info("Connected to Solr.");
92
        } catch (SolrServerException | IOException e ) {
93
            LOG.error("Couldn't connect to Solr! " + e);
91 94
        }
92
        LOG.error("Connect finished");
93 95
    }
94 96

  
95 97
    public int uploadFiles(List<MultipartFile> files) throws IOException, SolrServerException, JAXBException {
......
139 141
        LOG.trace("SolrService method addFiles called");
140 142
        LOG.debug("Uploading files to Solr");
141 143
        getFieldNamesFromDocumentCore();
142
        addBasicFieldsToDocumentsCore();
143

  
144
        addAllFieldsToFilesCore();
145 144

  
146 145
        int count = 0;
147 146
        Set<String> keys = pairs.keySet();
......
184 183
        addToDocumentsCore(doc, imageFile.getOriginalFilename());
185 184
    }
186 185

  
187
    private void addAllFieldsToFilesCore() throws IOException, SolrServerException {
188
        LOG.trace("SolrService method addAllFieldsToFilesCore called");
189
        List<String> fieldNamesFilesCore = new ArrayList<>();
190

  
191
        SchemaRequest.Fields fieldRequest = new SchemaRequest.Fields();
192
        SchemaResponse.FieldsResponse fieldsResponse = fieldRequest.process(solrClientFiles);
193
        List<Map<String, Object>> fields = fieldsResponse.getFields();
194

  
195
        for (Map<String, Object> field: fields) {
196
            fieldNamesFilesCore.add(field.get("name").toString());
197
        }
198

  
199
        String[] filenames = new String[]{ FIELD_DOC_FILENAME, FIELD_IMG_FILENAME };
200
        String[] contents = new String[]{ FIELD_DOC_CONTENT, FIELD_IMG_CONTENT };
201

  
202
        LOG.debug("Adding all fields to File core");
203
        for (String name: filenames) {
204
            if (fieldNamesFilesCore.contains(name)) continue;
205

  
206
            try {
207
                SchemaRequest.Update request = getAddTextFieldRequest(name);
208
                request.process(solrClientFiles);
209
            } catch (Exception e) {
210
                LOG.error("Couldn't add field = " + name);
211
                LOG.error(e.toString());
212
            }
213
        }
214

  
215
        for (String name: contents) {
216
            if (fieldNamesFilesCore.contains(name)) continue;
217

  
218
            try {
219
                Map<String, Object> fieldAttributes = new LinkedHashMap<>();
220
                fieldAttributes.put("name", name);
221
                fieldAttributes.put("type", "binary");
222
                fieldAttributes.put("indexed", false);
223
                fieldAttributes.put("stored", true);
224
                fieldAttributes.put("multiValued", false);
225
                SchemaRequest.Update request = new SchemaRequest.AddField(fieldAttributes);
226
                request.process(solrClientFiles);
227
            } catch (Exception e) {
228
                LOG.error("Couldn't add field = " + name);
229
                LOG.error(e.toString());
230
            }
231
        }
232
    }
233

  
234
    private void addBasicFieldsToDocumentsCore() {
235
        LOG.trace("SolrService method addBasicFieldsToDocumentsCore called");
236
        LOG.debug("Adding basic fields to Documents core");
237
        addFieldToDocumentCore(FIELD_DOC_FILENAME, getAddTextFieldRequest(FIELD_DOC_FILENAME));
238
        addFieldToDocumentCore(FIELD_IMG_FILENAME, getAddTextFieldRequest(FIELD_IMG_FILENAME));
239
        addFieldToDocumentCore(FIELD_TEXT_REGION, getAddTextFieldRequest(FIELD_TEXT_REGION));
240
        addFieldToDocumentCore(FIELD_TEXT_REGION_COORDS, getNotIndexedAddTextFieldRequest(FIELD_TEXT_REGION_COORDS));
241
    }
242

  
243 186
    private void getFieldNamesFromDocumentCore() throws IOException, SolrServerException {
244 187
        LOG.trace("SolrService method getFieldNamesFromDocumentCore called");
245 188
        LOG.debug("Getting field names from Documents core");
......
474 417
                TextLine textLine = textLines[j];
475 418
                String prefix = PREFIX_TEXT_LINE + j;
476 419

  
477
                addFieldToDocumentCore(prefix, getAddTextFieldRequest(prefix));
478
                solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
420
                if (j >= DEFAULT_LINE_FIELD_COUNT) {
421
                    addFieldToDocumentCore(prefix, getAddTextFieldRequest(prefix));
422
                    addFieldToDocumentCore(prefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
423
                }
479 424

  
480
                addFieldToDocumentCore(prefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(prefix + SUFFIX_COORDS));
425
                solrInputDocument.addField(prefix, textLine.getTextEquiv().getUnicode());
481 426
                solrInputDocument.addField(prefix + SUFFIX_COORDS, textLine.getLineCoords().getPointsString());
482 427

  
483 428
                TextWord[] textWords = textLine.getTextWords();
......
486 431
                    TextWord textWord = textWords[k];
487 432
                    String wordPrefix = PREFIX_TEXT_WORD + k + "_" + prefix;
488 433

  
489
                    addFieldToDocumentCore(wordPrefix, getAddTextFieldRequest(wordPrefix));
490
                    solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode());
434
                    if (k >= DEFAULT_WORD_FIELD_COUNT) {
435
                        addFieldToDocumentCore(wordPrefix, getAddTextFieldRequest(wordPrefix));
436
                        addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
437
                    }
491 438

  
492
                    addFieldToDocumentCore(wordPrefix + SUFFIX_COORDS, getNotIndexedAddTextFieldRequest(wordPrefix + SUFFIX_COORDS));
439
                    solrInputDocument.addField(wordPrefix, textWord.getTextEquiv().getUnicode());
493 440
                    solrInputDocument.addField(wordPrefix + SUFFIX_COORDS, textWord.getWordCoords().getPointsString());
494 441
                }
495 442
            }
be/fulltextsearch/src/main/resources/application.properties
1 1
spring.servlet.multipart.max-file-size = -1
2
spring.servlet.multipart.max-request-size = -1
2
spring.servlet.multipart.max-request-size = -1
3

  
4
logging.level.cz.zcu.kiv.aswi.fulltextsearch=INFO

Také k dispozici: Unified diff