Revize c965c938
Přidáno uživatelem Jitka Poubová před téměř 5 roky(ů)
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/IndexController.java | ||
---|---|---|
5 | 5 |
import org.apache.solr.client.solrj.SolrServerException; |
6 | 6 |
import org.springframework.web.bind.annotation.*; |
7 | 7 |
|
8 |
import javax.xml.bind.JAXBException; |
|
8 | 9 |
import java.io.IOException; |
9 | 10 |
|
10 | 11 |
@CrossOrigin(origins = "*", allowedHeaders = "*") |
... | ... | |
19 | 20 |
|
20 | 21 |
try { |
21 | 22 |
response = "Found: " + solrService.query(query.getQuery()); |
22 |
} catch (IOException | SolrServerException e) { |
|
23 |
response = "Error with Solr."; |
|
23 |
} catch (IOException e) { |
|
24 |
response = "IOException"; |
|
25 |
} catch (SolrServerException e) { |
|
26 |
response = "SolrServerException"; |
|
27 |
} catch (Exception e) { |
|
28 |
response = "Unknown exception"; |
|
24 | 29 |
} |
25 | 30 |
|
26 | 31 |
return new QueryResponse(response); |
... | ... | |
28 | 33 |
|
29 | 34 |
@GetMapping("/add") |
30 | 35 |
public String add() { |
31 |
solrService.addAllDocuments(); |
|
32 |
return "Document created"; |
|
36 |
String response; |
|
37 |
|
|
38 |
try { |
|
39 |
solrService.addAllDocuments(); |
|
40 |
response = "All documents have been added."; |
|
41 |
} catch (IOException e) { |
|
42 |
response = "IOException"; |
|
43 |
} catch (SolrServerException e) { |
|
44 |
response = "SolrServerException"; |
|
45 |
} catch (JAXBException e) { |
|
46 |
response = "JAXBException"; |
|
47 |
} catch (Exception e) { |
|
48 |
response = "Unknown exception"; |
|
49 |
} |
|
50 |
|
|
51 |
return response; |
|
33 | 52 |
} |
34 | 53 |
|
35 | 54 |
@GetMapping("/delete") |
36 | 55 |
public String delete() { |
37 |
solrService.deleteAll(); |
|
38 |
return "Documents deleted"; |
|
56 |
String response; |
|
57 |
|
|
58 |
try { |
|
59 |
solrService.deleteAll(); |
|
60 |
response = "All documents have been deleted."; |
|
61 |
} catch (IOException e) { |
|
62 |
response = "IOException"; |
|
63 |
} catch (SolrServerException e) { |
|
64 |
response = "SolrServerException"; |
|
65 |
} catch (Exception e) { |
|
66 |
response = "Unknown exception"; |
|
67 |
} |
|
68 |
|
|
69 |
return response; |
|
39 | 70 |
} |
40 | 71 |
|
72 |
@GetMapping("/info") |
|
73 |
public String info() { |
|
74 |
String response; |
|
75 |
|
|
76 |
try { |
|
77 |
response = solrService.info(); |
|
78 |
} catch (IOException e) { |
|
79 |
response = "IOException"; |
|
80 |
} catch (SolrServerException e) { |
|
81 |
response = "SolrServerException"; |
|
82 |
} catch (Exception e) { |
|
83 |
response = "Unknown exception"; |
|
84 |
} |
|
85 |
|
|
86 |
return response; |
|
87 |
} |
|
41 | 88 |
} |
42 | 89 |
|
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java | ||
---|---|---|
9 | 9 |
import org.apache.solr.client.solrj.response.QueryResponse; |
10 | 10 |
import org.apache.solr.common.SolrDocumentList; |
11 | 11 |
|
12 |
import javax.xml.bind.JAXBException; |
|
12 | 13 |
import java.io.IOException; |
13 |
import java.io.InputStream; |
|
14 | 14 |
|
15 | 15 |
public class SolrService { |
16 | 16 |
|
... | ... | |
27 | 27 |
solrClient.setParser(new XMLResponseParser()); |
28 | 28 |
} |
29 | 29 |
|
30 |
public void addAllDocuments() { // todo all |
|
30 |
public void addAllDocuments() throws IOException, SolrServerException, JAXBException { // todo all
|
|
31 | 31 |
XMLLoader xmlLoader = new XMLLoader(); |
32 |
InputStream is = getClass().getClassLoader().getResourceAsStream("soap-ch_knihovna_ascher-zeitung-1866-06-09-n23_0815.xml");
|
|
33 |
PcGts doc = xmlLoader.loadFile(is);
|
|
32 |
String filename = "soap-ch_knihovna_ascher-zeitung-1866-06-09-n23_0815.xml";
|
|
33 |
PcGts doc = xmlLoader.loadFile(filename);
|
|
34 | 34 |
addDocument(doc); |
35 | 35 |
} |
36 | 36 |
|
37 |
public void deleteAll() { |
|
38 |
try { |
|
39 |
solrClient.deleteByQuery("*:*"); |
|
40 |
solrClient.commit(); |
|
41 |
System.out.println("all docs deleted"); |
|
42 |
} catch (SolrServerException | IOException e) { |
|
43 |
e.printStackTrace(); |
|
44 |
} |
|
37 |
public void deleteAll() throws IOException, SolrServerException { |
|
38 |
solrClient.deleteByQuery("*:*"); |
|
39 |
solrClient.commit(); |
|
45 | 40 |
} |
46 | 41 |
|
47 | 42 |
public String query(String query) throws IOException, SolrServerException { |
48 | 43 |
SolrQuery solrQuery = new SolrQuery(); |
49 |
solrQuery.set("q", "TextEquiv:" + query); |
|
44 |
solrQuery.set("q", "TextEquiv:" + query + " AND id:*_region");
|
|
50 | 45 |
QueryResponse response = solrClient.query(solrQuery); |
51 | 46 |
|
52 | 47 |
SolrDocumentList docList = response.getResults(); |
53 | 48 |
return Long.toString(docList.getNumFound()); |
54 | 49 |
} |
55 | 50 |
|
56 |
private void addDocument(PcGts document) { |
|
51 |
private void addDocument(PcGts document) throws IOException, SolrServerException {
|
|
57 | 52 |
for (TextRegion textRegion: document.getPage().getTextRegions()) { |
58 | 53 |
addTextRegion(textRegion); |
59 | 54 |
} |
60 | 55 |
} |
61 | 56 |
|
62 |
private void addTextRegion(TextRegion textRegion) { |
|
63 |
try { |
|
64 |
solrClient.addBean(textRegion); |
|
65 |
solrClient.commit(); |
|
66 |
} catch (SolrServerException | IOException e) { |
|
67 |
e.printStackTrace(); |
|
68 |
} |
|
57 |
private void addTextRegion(TextRegion textRegion) throws IOException, SolrServerException { |
|
58 |
solrClient.addBean(textRegion); |
|
59 |
solrClient.commit(); |
|
69 | 60 |
} |
70 | 61 |
|
71 |
|
|
62 |
public String info() throws IOException, SolrServerException { |
|
63 |
SolrQuery q = new SolrQuery("*:*"); |
|
64 |
q.setRows(0); // don't actually request any data |
|
65 |
return "Number of documents in core: " + solrClient.query(q).getResults().getNumFound(); |
|
66 |
} |
|
72 | 67 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/XMLLoader.java | ||
---|---|---|
10 | 10 |
|
11 | 11 |
public class XMLLoader { |
12 | 12 |
|
13 |
public PcGts loadFile(InputStream file) { |
|
14 |
try { |
|
15 |
JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class); |
|
16 |
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); |
|
17 |
|
|
18 |
java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST); |
|
19 |
jaxbUnmarshaller.setEventHandler( |
|
20 |
event -> { |
|
21 |
System.out.println("Event Info: "+event); |
|
22 |
return event.getMessage().toLowerCase().contains("unexpected element"); |
|
23 |
}); |
|
24 |
|
|
25 |
PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file); |
|
26 |
|
|
27 |
System.out.println("page=" + document.getPage()); |
|
28 |
return document; |
|
29 |
} catch (JAXBException e) { |
|
30 |
e.printStackTrace(); |
|
31 |
} |
|
32 |
|
|
33 |
return null; |
|
13 |
public PcGts loadFile(String filename) throws JAXBException { |
|
14 |
InputStream file = getClass().getClassLoader().getResourceAsStream(filename); |
|
15 |
|
|
16 |
JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class); |
|
17 |
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); |
|
18 |
|
|
19 |
// ignoring unused elements |
|
20 |
java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST); |
|
21 |
jaxbUnmarshaller.setEventHandler(event -> event.getMessage().toLowerCase().contains("unexpected element")); |
|
22 |
|
|
23 |
PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file); |
|
24 |
document.setId(filename.replaceAll(".xml", "")); |
|
25 |
|
|
26 |
return document; |
|
34 | 27 |
} |
35 | 28 |
|
36 | 29 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Page.java | ||
---|---|---|
11 | 11 |
@XmlElement(name = "TextRegion") |
12 | 12 |
private List<TextRegion> textRegions; |
13 | 13 |
|
14 |
private String id; |
|
15 |
|
|
14 | 16 |
public List<TextRegion> getTextRegions() { |
15 | 17 |
return textRegions; |
16 | 18 |
} |
... | ... | |
18 | 20 |
public void setTextRegions(List<TextRegion> textRegions) { |
19 | 21 |
this.textRegions = textRegions; |
20 | 22 |
} |
23 |
|
|
24 |
public String getId() { |
|
25 |
return id; |
|
26 |
} |
|
27 |
|
|
28 |
public void setId(String id) { |
|
29 |
this.id = id; |
|
30 |
for (int i = 0; i < textRegions.size(); i++) { |
|
31 |
TextRegion textRegion = textRegions.get(i); |
|
32 |
textRegion.setId(id + "_" + i +"_region"); |
|
33 |
textRegion.setFile(id); |
|
34 |
} |
|
35 |
} |
|
21 | 36 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/PcGts.java | ||
---|---|---|
12 | 12 |
@XmlElement(name = "Page") |
13 | 13 |
private Page page; |
14 | 14 |
|
15 |
private String id; |
|
16 |
|
|
15 | 17 |
public Page getPage() { |
16 | 18 |
return page; |
17 | 19 |
} |
... | ... | |
19 | 21 |
public void setPage(Page page) { |
20 | 22 |
this.page = page; |
21 | 23 |
} |
24 |
|
|
25 |
public String getId() { |
|
26 |
return id; |
|
27 |
} |
|
28 |
|
|
29 |
public void setId(String id) { |
|
30 |
this.id = id; |
|
31 |
this.page.setId(id); |
|
32 |
} |
|
22 | 33 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextLine.java | ||
---|---|---|
1 |
package cz.zcu.kiv.aswi.fulltextsearch.document; |
|
2 |
|
|
3 |
import org.apache.solr.client.solrj.beans.Field; |
|
4 |
|
|
5 |
import javax.xml.bind.annotation.XmlAccessType; |
|
6 |
import javax.xml.bind.annotation.XmlAccessorType; |
|
7 |
import javax.xml.bind.annotation.XmlElement; |
|
8 |
import java.util.List; |
|
9 |
|
|
10 |
@XmlAccessorType(XmlAccessType.PROPERTY) |
|
11 |
public class TextLine { |
|
12 |
|
|
13 |
private Coords coords; |
|
14 |
private TextEquiv textEquiv; |
|
15 |
|
|
16 |
@Field(value = "Word", child = true) |
|
17 |
private List<Word> words; |
|
18 |
|
|
19 |
@Field("id") |
|
20 |
private String id; |
|
21 |
|
|
22 |
@Field("Coords") |
|
23 |
private String coordsString; |
|
24 |
|
|
25 |
@Field("TextEquiv") |
|
26 |
private String textEquivString; |
|
27 |
|
|
28 |
public Coords getCoords() { |
|
29 |
return coords; |
|
30 |
} |
|
31 |
|
|
32 |
@XmlElement(name = "Coords") |
|
33 |
public void setCoords(Coords coords) { |
|
34 |
this.coords = coords; |
|
35 |
this.coordsString = coords.getPoints(); |
|
36 |
} |
|
37 |
|
|
38 |
public TextEquiv getTextEquiv() { |
|
39 |
return textEquiv; |
|
40 |
} |
|
41 |
|
|
42 |
@XmlElement(name = "TextEquiv") |
|
43 |
public void setTextEquiv(TextEquiv textEquiv) { |
|
44 |
this.textEquiv = textEquiv; |
|
45 |
this.textEquivString = textEquiv.getUnicode(); |
|
46 |
} |
|
47 |
|
|
48 |
public String getCoordsString() { |
|
49 |
return coordsString; |
|
50 |
} |
|
51 |
|
|
52 |
public String getTextEquivString() { |
|
53 |
return textEquivString; |
|
54 |
} |
|
55 |
|
|
56 |
public void setCoordsString(String coordsString) { |
|
57 |
this.coordsString = coordsString; |
|
58 |
} |
|
59 |
|
|
60 |
public void setTextEquivString(String textEquivString) { |
|
61 |
this.textEquivString = textEquivString; |
|
62 |
} |
|
63 |
|
|
64 |
public List<Word> getWords() { |
|
65 |
return words; |
|
66 |
} |
|
67 |
|
|
68 |
@XmlElement(name = "Word") |
|
69 |
public void setWords(List<Word> words) { |
|
70 |
this.words = words; |
|
71 |
} |
|
72 |
|
|
73 |
public String getId() { |
|
74 |
return id; |
|
75 |
} |
|
76 |
|
|
77 |
public void setId(String id) { |
|
78 |
this.id = id; |
|
79 |
for (int i = 0; i < words.size(); i++) { |
|
80 |
Word word = words.get(i); |
|
81 |
word.setId(id + "_" + i + "_word"); |
|
82 |
} |
|
83 |
} |
|
84 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextRegion.java | ||
---|---|---|
5 | 5 |
import javax.xml.bind.annotation.XmlAccessType; |
6 | 6 |
import javax.xml.bind.annotation.XmlAccessorType; |
7 | 7 |
import javax.xml.bind.annotation.XmlElement; |
8 |
import java.util.List; |
|
8 | 9 |
|
9 |
@XmlAccessorType(XmlAccessType.FIELD)
|
|
10 |
@XmlAccessorType(XmlAccessType.PROPERTY)
|
|
10 | 11 |
public class TextRegion { |
11 | 12 |
|
12 |
@Field("Coords") |
|
13 |
@XmlElement(name = "Coords") |
|
14 | 13 |
private Coords coords; |
14 |
private TextEquiv textEquiv; |
|
15 |
|
|
16 |
@Field("id") |
|
17 |
private String id; |
|
18 |
|
|
19 |
@Field("file") |
|
20 |
private String file; |
|
21 |
|
|
22 |
@Field(value = "TextLine", child = true) |
|
23 |
private List<TextLine> textLines; |
|
24 |
|
|
25 |
@Field("Coords") |
|
26 |
private String coordsString; |
|
15 | 27 |
|
16 | 28 |
@Field("TextEquiv") |
17 |
@XmlElement(name = "TextEquiv") |
|
18 |
private TextEquiv textEquiv; |
|
29 |
private String textEquivString; |
|
19 | 30 |
|
20 | 31 |
public Coords getCoords() { |
21 | 32 |
return coords; |
22 | 33 |
} |
23 | 34 |
|
35 |
@XmlElement(name = "Coords") |
|
24 | 36 |
public void setCoords(Coords coords) { |
25 | 37 |
this.coords = coords; |
38 |
this.coordsString = coords.getPoints(); |
|
26 | 39 |
} |
27 | 40 |
|
28 | 41 |
public TextEquiv getTextEquiv() { |
29 | 42 |
return textEquiv; |
30 | 43 |
} |
31 | 44 |
|
45 |
@XmlElement(name = "TextEquiv") |
|
32 | 46 |
public void setTextEquiv(TextEquiv textEquiv) { |
33 | 47 |
this.textEquiv = textEquiv; |
48 |
this.textEquivString = textEquiv.getUnicode(); |
|
49 |
} |
|
50 |
|
|
51 |
public List<TextLine> getTextLines() { |
|
52 |
return textLines; |
|
53 |
} |
|
54 |
|
|
55 |
@XmlElement(name = "TextLine") |
|
56 |
public void setTextLines(List<TextLine> textLines) { |
|
57 |
this.textLines = textLines; |
|
58 |
} |
|
59 |
|
|
60 |
public String getCoordsString() { |
|
61 |
return coordsString; |
|
62 |
} |
|
63 |
|
|
64 |
public String getTextEquivString() { |
|
65 |
return textEquivString; |
|
66 |
} |
|
67 |
|
|
68 |
public void setCoordsString(String coordsString) { |
|
69 |
this.coordsString = coordsString; |
|
70 |
} |
|
71 |
|
|
72 |
public void setTextEquivString(String textEquivString) { |
|
73 |
this.textEquivString = textEquivString; |
|
74 |
} |
|
75 |
|
|
76 |
public String getId() { |
|
77 |
return id; |
|
78 |
} |
|
79 |
|
|
80 |
public void setId(String id) { |
|
81 |
this.id = id; |
|
82 |
for (int i = 0; i < textLines.size(); i++) { |
|
83 |
TextLine textLine = textLines.get(i); |
|
84 |
textLine.setId(id + "_" + i + "_line"); |
|
85 |
} |
|
86 |
} |
|
87 |
|
|
88 |
public String getFile() { |
|
89 |
return file; |
|
90 |
} |
|
91 |
|
|
92 |
public void setFile(String file) { |
|
93 |
this.file = file; |
|
34 | 94 |
} |
35 | 95 |
} |
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Word.java | ||
---|---|---|
1 |
package cz.zcu.kiv.aswi.fulltextsearch.document; |
|
2 |
|
|
3 |
import org.apache.solr.client.solrj.beans.Field; |
|
4 |
|
|
5 |
import javax.xml.bind.annotation.XmlAccessType; |
|
6 |
import javax.xml.bind.annotation.XmlAccessorType; |
|
7 |
import javax.xml.bind.annotation.XmlElement; |
|
8 |
|
|
9 |
@XmlAccessorType(XmlAccessType.PROPERTY) |
|
10 |
public class Word { |
|
11 |
|
|
12 |
private Coords coords; |
|
13 |
private TextEquiv textEquiv; |
|
14 |
|
|
15 |
@Field("id") |
|
16 |
private String id; |
|
17 |
|
|
18 |
@Field("Coords") |
|
19 |
private String coordsString; |
|
20 |
|
|
21 |
@Field("TextEquiv") |
|
22 |
private String textEquivString; |
|
23 |
|
|
24 |
public Coords getCoords() { |
|
25 |
return coords; |
|
26 |
} |
|
27 |
|
|
28 |
@XmlElement(name = "Coords") |
|
29 |
public void setCoords(Coords coords) { |
|
30 |
this.coords = coords; |
|
31 |
this.coordsString = coords.getPoints().toString(); |
|
32 |
} |
|
33 |
|
|
34 |
public TextEquiv getTextEquiv() { |
|
35 |
return textEquiv; |
|
36 |
} |
|
37 |
|
|
38 |
@XmlElement(name = "TextEquiv") |
|
39 |
public void setTextEquiv(TextEquiv textEquiv) { |
|
40 |
this.textEquiv = textEquiv; |
|
41 |
this.textEquivString = textEquiv.getUnicode(); |
|
42 |
} |
|
43 |
|
|
44 |
public String getCoordsString() { |
|
45 |
return coordsString; |
|
46 |
} |
|
47 |
|
|
48 |
public String getTextEquivString() { |
|
49 |
return textEquivString; |
|
50 |
} |
|
51 |
|
|
52 |
public void setCoordsString(String coordsString) { |
|
53 |
this.coordsString = coordsString; |
|
54 |
} |
|
55 |
|
|
56 |
public void setTextEquivString(String textEquivString) { |
|
57 |
this.textEquivString = textEquivString; |
|
58 |
} |
|
59 |
|
|
60 |
public String getId() { |
|
61 |
return id; |
|
62 |
} |
|
63 |
|
|
64 |
public void setId(String id) { |
|
65 |
this.id = id; |
|
66 |
} |
|
67 |
} |
Také k dispozici: Unified diff
Re #7734: Implementace ukládání dokumentů
- do Solru se nyní ukládají i jednotlivé řádky a slova
- ukládání id dokumentu (= název souboru)