Projekt

Obecné

Profil

« Předchozí | Další » 

Revize c965c938

Přidáno uživatelem Jitka Poubová před více než 4 roky(ů)

Re #7734: Implementace ukládání dokumentů
- do Solru se nyní ukládají i jednotlivé řádky a slova
- ukládání id dokumentu (= název souboru)

Zobrazit rozdíly:

be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/IndexController.java
5 5
import org.apache.solr.client.solrj.SolrServerException;
6 6
import org.springframework.web.bind.annotation.*;
7 7

  
8
import javax.xml.bind.JAXBException;
8 9
import java.io.IOException;
9 10

  
10 11
@CrossOrigin(origins = "*", allowedHeaders = "*")
......
19 20

  
20 21
        try {
21 22
            response = "Found: " + solrService.query(query.getQuery());
22
        } catch (IOException | SolrServerException e) {
23
            response = "Error with Solr.";
23
        } catch (IOException e) {
24
            response = "IOException";
25
        } catch (SolrServerException e) {
26
            response = "SolrServerException";
27
        } catch (Exception e) {
28
            response = "Unknown exception";
24 29
        }
25 30

  
26 31
        return new QueryResponse(response);
......
28 33

  
29 34
    @GetMapping("/add")
30 35
    public String add() {
31
        solrService.addAllDocuments();
32
        return "Document created";
36
        String response;
37

  
38
        try {
39
            solrService.addAllDocuments();
40
            response = "All documents have been added.";
41
        } catch (IOException e) {
42
            response = "IOException";
43
        } catch (SolrServerException e) {
44
            response = "SolrServerException";
45
        } catch (JAXBException e) {
46
            response = "JAXBException";
47
        } catch (Exception e) {
48
            response = "Unknown exception";
49
        }
50

  
51
        return response;
33 52
    }
34 53

  
35 54
    @GetMapping("/delete")
36 55
    public String delete() {
37
        solrService.deleteAll();
38
        return "Documents deleted";
56
        String response;
57

  
58
        try {
59
            solrService.deleteAll();
60
            response = "All documents have been deleted.";
61
        } catch (IOException e) {
62
            response = "IOException";
63
        } catch (SolrServerException e) {
64
            response = "SolrServerException";
65
        } catch (Exception e) {
66
            response = "Unknown exception";
67
        }
68

  
69
        return response;
39 70
    }
40 71

  
72
    @GetMapping("/info")
73
    public String info() {
74
        String response;
75

  
76
        try {
77
            response = solrService.info();
78
        } catch (IOException e) {
79
            response = "IOException";
80
        } catch (SolrServerException e) {
81
            response = "SolrServerException";
82
        } catch (Exception e) {
83
            response = "Unknown exception";
84
        }
85

  
86
        return response;
87
    }
41 88
}
42 89

  
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/SolrService.java
9 9
import org.apache.solr.client.solrj.response.QueryResponse;
10 10
import org.apache.solr.common.SolrDocumentList;
11 11

  
12
import javax.xml.bind.JAXBException;
12 13
import java.io.IOException;
13
import java.io.InputStream;
14 14

  
15 15
public class SolrService {
16 16

  
......
27 27
        solrClient.setParser(new XMLResponseParser());
28 28
    }
29 29

  
30
    public void addAllDocuments() { // todo all
30
    public void addAllDocuments() throws IOException, SolrServerException, JAXBException { // todo all
31 31
        XMLLoader xmlLoader = new XMLLoader();
32
        InputStream is = getClass().getClassLoader().getResourceAsStream("soap-ch_knihovna_ascher-zeitung-1866-06-09-n23_0815.xml");
33
        PcGts doc = xmlLoader.loadFile(is);
32
        String filename = "soap-ch_knihovna_ascher-zeitung-1866-06-09-n23_0815.xml";
33
        PcGts doc = xmlLoader.loadFile(filename);
34 34
        addDocument(doc);
35 35
    }
36 36

  
37
    public void deleteAll() {
38
        try {
39
            solrClient.deleteByQuery("*:*");
40
            solrClient.commit();
41
            System.out.println("all docs deleted");
42
        } catch (SolrServerException | IOException e) {
43
            e.printStackTrace();
44
        }
37
    public void deleteAll() throws IOException, SolrServerException {
38
        solrClient.deleteByQuery("*:*");
39
        solrClient.commit();
45 40
    }
46 41

  
47 42
    public String query(String query) throws IOException, SolrServerException  {
48 43
        SolrQuery solrQuery = new SolrQuery();
49
        solrQuery.set("q", "TextEquiv:" + query);
44
        solrQuery.set("q", "TextEquiv:" + query + " AND id:*_region");
50 45
        QueryResponse response = solrClient.query(solrQuery);
51 46

  
52 47
        SolrDocumentList docList = response.getResults();
53 48
        return Long.toString(docList.getNumFound());
54 49
    }
55 50

  
56
    private void addDocument(PcGts document) {
51
    private void addDocument(PcGts document) throws IOException, SolrServerException {
57 52
        for (TextRegion textRegion: document.getPage().getTextRegions()) {
58 53
            addTextRegion(textRegion);
59 54
        }
60 55
    }
61 56

  
62
    private void addTextRegion(TextRegion textRegion) {
63
        try {
64
            solrClient.addBean(textRegion);
65
            solrClient.commit();
66
        } catch (SolrServerException | IOException e) {
67
            e.printStackTrace();
68
        }
57
    private void addTextRegion(TextRegion textRegion) throws IOException, SolrServerException {
58
        solrClient.addBean(textRegion);
59
        solrClient.commit();
69 60
    }
70 61

  
71

  
62
    public String info() throws IOException, SolrServerException {
63
        SolrQuery q = new SolrQuery("*:*");
64
        q.setRows(0);  // don't actually request any data
65
        return "Number of documents in core: " + solrClient.query(q).getResults().getNumFound();
66
    }
72 67
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/XMLLoader.java
10 10

  
11 11
public class XMLLoader {
12 12

  
13
    public PcGts loadFile(InputStream file) {
14
        try {
15
            JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class);
16
            Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
17

  
18
            java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST);
19
            jaxbUnmarshaller.setEventHandler(
20
                    event -> {
21
                        System.out.println("Event Info: "+event);
22
                        return event.getMessage().toLowerCase().contains("unexpected element");
23
                    });
24

  
25
            PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file);
26

  
27
            System.out.println("page=" + document.getPage());
28
            return document;
29
        } catch (JAXBException e) {
30
            e.printStackTrace();
31
        }
32

  
33
        return null;
13
    public PcGts loadFile(String filename) throws JAXBException {
14
        InputStream file = getClass().getClassLoader().getResourceAsStream(filename);
15

  
16
        JAXBContext jaxbContext = JAXBContext.newInstance(PcGts.class);
17
        Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
18

  
19
        // ignoring unused elements
20
        java.util.logging.Logger.getLogger("com.sun.xml.bind").setLevel(Level.FINEST);
21
        jaxbUnmarshaller.setEventHandler(event -> event.getMessage().toLowerCase().contains("unexpected element"));
22

  
23
        PcGts document = (PcGts) jaxbUnmarshaller.unmarshal(file);
24
        document.setId(filename.replaceAll(".xml", ""));
25

  
26
        return document;
34 27
    }
35 28

  
36 29
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Page.java
11 11
    @XmlElement(name = "TextRegion")
12 12
    private List<TextRegion> textRegions;
13 13

  
14
    private String id;
15

  
14 16
    public List<TextRegion> getTextRegions() {
15 17
        return textRegions;
16 18
    }
......
18 20
    public void setTextRegions(List<TextRegion> textRegions) {
19 21
        this.textRegions = textRegions;
20 22
    }
23

  
24
    public String getId() {
25
        return id;
26
    }
27

  
28
    public void setId(String id) {
29
        this.id = id;
30
        for (int i = 0; i < textRegions.size(); i++) {
31
            TextRegion textRegion = textRegions.get(i);
32
            textRegion.setId(id + "_" + i +"_region");
33
            textRegion.setFile(id);
34
        }
35
    }
21 36
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/PcGts.java
12 12
    @XmlElement(name = "Page")
13 13
    private Page page;
14 14

  
15
    private String id;
16

  
15 17
    public Page getPage() {
16 18
        return page;
17 19
    }
......
19 21
    public void setPage(Page page) {
20 22
        this.page = page;
21 23
    }
24

  
25
    public String getId() {
26
        return id;
27
    }
28

  
29
    public void setId(String id) {
30
        this.id = id;
31
        this.page.setId(id);
32
    }
22 33
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextLine.java
1
package cz.zcu.kiv.aswi.fulltextsearch.document;
2

  
3
import org.apache.solr.client.solrj.beans.Field;
4

  
5
import javax.xml.bind.annotation.XmlAccessType;
6
import javax.xml.bind.annotation.XmlAccessorType;
7
import javax.xml.bind.annotation.XmlElement;
8
import java.util.List;
9

  
10
@XmlAccessorType(XmlAccessType.PROPERTY)
11
public class TextLine {
12

  
13
    private Coords coords;
14
    private TextEquiv textEquiv;
15

  
16
    @Field(value = "Word", child = true)
17
    private List<Word> words;
18

  
19
    @Field("id")
20
    private String id;
21

  
22
    @Field("Coords")
23
    private String coordsString;
24

  
25
    @Field("TextEquiv")
26
    private String textEquivString;
27

  
28
    public Coords getCoords() {
29
        return coords;
30
    }
31

  
32
    @XmlElement(name = "Coords")
33
    public void setCoords(Coords coords) {
34
        this.coords = coords;
35
        this.coordsString = coords.getPoints();
36
    }
37

  
38
    public TextEquiv getTextEquiv() {
39
        return textEquiv;
40
    }
41

  
42
    @XmlElement(name = "TextEquiv")
43
    public void setTextEquiv(TextEquiv textEquiv) {
44
        this.textEquiv = textEquiv;
45
        this.textEquivString = textEquiv.getUnicode();
46
    }
47

  
48
    public String getCoordsString() {
49
        return coordsString;
50
    }
51

  
52
    public String getTextEquivString() {
53
        return textEquivString;
54
    }
55

  
56
    public void setCoordsString(String coordsString) {
57
        this.coordsString = coordsString;
58
    }
59

  
60
    public void setTextEquivString(String textEquivString) {
61
        this.textEquivString = textEquivString;
62
    }
63

  
64
    public List<Word> getWords() {
65
        return words;
66
    }
67

  
68
    @XmlElement(name = "Word")
69
    public void setWords(List<Word> words) {
70
        this.words = words;
71
    }
72

  
73
    public String getId() {
74
        return id;
75
    }
76

  
77
    public void setId(String id) {
78
        this.id = id;
79
        for (int i = 0; i < words.size(); i++) {
80
            Word word = words.get(i);
81
            word.setId(id + "_" + i + "_word");
82
        }
83
    }
84
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/TextRegion.java
5 5
import javax.xml.bind.annotation.XmlAccessType;
6 6
import javax.xml.bind.annotation.XmlAccessorType;
7 7
import javax.xml.bind.annotation.XmlElement;
8
import java.util.List;
8 9

  
9
@XmlAccessorType(XmlAccessType.FIELD)
10
@XmlAccessorType(XmlAccessType.PROPERTY)
10 11
public class TextRegion {
11 12

  
12
    @Field("Coords")
13
    @XmlElement(name = "Coords")
14 13
    private Coords coords;
14
    private TextEquiv textEquiv;
15

  
16
    @Field("id")
17
    private String id;
18

  
19
    @Field("file")
20
    private String file;
21

  
22
    @Field(value = "TextLine", child = true)
23
    private List<TextLine> textLines;
24

  
25
    @Field("Coords")
26
    private String coordsString;
15 27

  
16 28
    @Field("TextEquiv")
17
    @XmlElement(name = "TextEquiv")
18
    private TextEquiv textEquiv;
29
    private String textEquivString;
19 30

  
20 31
    public Coords getCoords() {
21 32
        return coords;
22 33
    }
23 34

  
35
    @XmlElement(name = "Coords")
24 36
    public void setCoords(Coords coords) {
25 37
        this.coords = coords;
38
        this.coordsString = coords.getPoints();
26 39
    }
27 40

  
28 41
    public TextEquiv getTextEquiv() {
29 42
        return textEquiv;
30 43
    }
31 44

  
45
    @XmlElement(name = "TextEquiv")
32 46
    public void setTextEquiv(TextEquiv textEquiv) {
33 47
        this.textEquiv = textEquiv;
48
        this.textEquivString = textEquiv.getUnicode();
49
    }
50

  
51
    public List<TextLine> getTextLines() {
52
        return textLines;
53
    }
54

  
55
    @XmlElement(name = "TextLine")
56
    public void setTextLines(List<TextLine> textLines) {
57
        this.textLines = textLines;
58
    }
59

  
60
    public String getCoordsString() {
61
        return coordsString;
62
    }
63

  
64
    public String getTextEquivString() {
65
        return textEquivString;
66
    }
67

  
68
    public void setCoordsString(String coordsString) {
69
        this.coordsString = coordsString;
70
    }
71

  
72
    public void setTextEquivString(String textEquivString) {
73
        this.textEquivString = textEquivString;
74
    }
75

  
76
    public String getId() {
77
        return id;
78
    }
79

  
80
    public void setId(String id) {
81
        this.id = id;
82
        for (int i = 0; i < textLines.size(); i++) {
83
            TextLine textLine = textLines.get(i);
84
            textLine.setId(id + "_" + i + "_line");
85
        }
86
    }
87

  
88
    public String getFile() {
89
        return file;
90
    }
91

  
92
    public void setFile(String file) {
93
        this.file = file;
34 94
    }
35 95
}
be/fulltextsearch/src/main/java/cz/zcu/kiv/aswi/fulltextsearch/document/Word.java
1
package cz.zcu.kiv.aswi.fulltextsearch.document;
2

  
3
import org.apache.solr.client.solrj.beans.Field;
4

  
5
import javax.xml.bind.annotation.XmlAccessType;
6
import javax.xml.bind.annotation.XmlAccessorType;
7
import javax.xml.bind.annotation.XmlElement;
8

  
9
@XmlAccessorType(XmlAccessType.PROPERTY)
10
public class Word {
11

  
12
    private Coords coords;
13
    private TextEquiv textEquiv;
14

  
15
    @Field("id")
16
    private String id;
17

  
18
    @Field("Coords")
19
    private String coordsString;
20

  
21
    @Field("TextEquiv")
22
    private String textEquivString;
23

  
24
    public Coords getCoords() {
25
        return coords;
26
    }
27

  
28
    @XmlElement(name = "Coords")
29
    public void setCoords(Coords coords) {
30
        this.coords = coords;
31
        this.coordsString = coords.getPoints().toString();
32
    }
33

  
34
    public TextEquiv getTextEquiv() {
35
        return textEquiv;
36
    }
37

  
38
    @XmlElement(name = "TextEquiv")
39
    public void setTextEquiv(TextEquiv textEquiv) {
40
        this.textEquiv = textEquiv;
41
        this.textEquivString = textEquiv.getUnicode();
42
    }
43

  
44
    public String getCoordsString() {
45
        return coordsString;
46
    }
47

  
48
    public String getTextEquivString() {
49
        return textEquivString;
50
    }
51

  
52
    public void setCoordsString(String coordsString) {
53
        this.coordsString = coordsString;
54
    }
55

  
56
    public void setTextEquivString(String textEquivString) {
57
        this.textEquivString = textEquivString;
58
    }
59

  
60
    public String getId() {
61
        return id;
62
    }
63

  
64
    public void setId(String id) {
65
        this.id = id;
66
    }
67
}

Také k dispozici: Unified diff