1 |
a4c65df6
|
Ondrej Drtina
|
import java.io.File;
|
2 |
|
|
import java.sql.*;
|
3 |
|
|
import java.util.ArrayList;
|
4 |
|
|
import java.util.Scanner;
|
5 |
|
|
|
6 |
|
|
public class Starter {
|
7 |
efca2344
|
Ondrej Drtina
|
public static ArrayList<String> deletedRedundantItIds; //contains ids of duplicated items which were deleted (duplicated items existed mefore merge...)
|
8 |
|
|
|
9 |
a4c65df6
|
Ondrej Drtina
|
public static void main(String[] args){
|
10 |
efca2344
|
Ondrej Drtina
|
deletedRedundantItIds = new ArrayList<>();
|
11 |
|
|
|
12 |
|
|
//establish connection to DBs with unmodified content (dalimil1, dalimil2)
|
13 |
a4c65df6
|
Ondrej Drtina
|
Connection dalimil1Con = establishDalimilX("dalimil1");
|
14 |
|
|
Connection dalimil2Con = establishDalimilX("dalimil2");
|
15 |
efca2344
|
Ondrej Drtina
|
//retrieve content of "dd_wordform"s of dalimil1
|
16 |
a4c65df6
|
Ondrej Drtina
|
ResultSet dalimil1Wordform = retrieveWordformDalimilX(dalimil1Con, "dalimil1");
|
17 |
|
|
//start process of finding whats missing in dalimil2 (AND is present in dalimil1)
|
18 |
efca2344
|
Ondrej Drtina
|
checkMissingWordformDalimil2(dalimil1Con, dalimil2Con, dalimil1Wordform); //UNCOMMENT THIS TO MERGE
|
19 |
|
|
|
20 |
|
|
//UNCOMMENT THIS TO PERFORM COMPLETE CHECK - START
|
21 |
|
|
/*System.out.println("MERGED DB CHECK mergedDBCheck - START");
|
22 |
|
|
mergedDBCheck(dalimil2Con);
|
23 |
|
|
System.out.println("MERGED DB CHECK mergedDBCheck - END");
|
24 |
|
|
System.out.println("REVIEW DELETED ITEMS MERGE reviewDeletedItemsMerge - START");
|
25 |
|
|
reviewDeletedItemsMerge(dalimil2Con);
|
26 |
|
|
System.out.println("REVIEW DELETED ITEMS MERGE reviewDeletedItemsMerge - END");*/
|
27 |
|
|
//UNCOMMENT THIS TO PERFORM COMPLETE CHECK - END
|
28 |
|
|
|
29 |
a4c65df6
|
Ondrej Drtina
|
//close connection to DBs
|
30 |
|
|
closeDalimilX(dalimil1Con, "dalimil1");
|
31 |
|
|
closeDalimilX(dalimil2Con, "dalimil2");
|
32 |
|
|
}
|
33 |
|
|
|
34 |
|
|
/**
|
35 |
|
|
* Go through entries present in dalimil1.dd_wordform and check which items are missing in dalimil2. Then perform respective steps (add wordform and possibly entries to other tables - dd_lemma etc).
|
36 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
37 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
38 |
|
|
* @param dalimil1Wordform ResultSet which represents entries in dalimil1.wordform
|
39 |
|
|
*/
|
40 |
|
|
private static void checkMissingWordformDalimil2(Connection dalimil1Con, Connection dalimil2Con, ResultSet dalimil1Wordform){
|
41 |
|
|
ArrayList<Integer>[] doNotDeleteID = parseConflictsFile("shody - okomentované_rem_VYHODIT.txt"); //load ids, which should NOT be deleted (according to db admins)
|
42 |
|
|
|
43 |
|
|
ArrayList<String> dalimil1Var = new ArrayList<>();
|
44 |
|
|
ArrayList<String> dalimil2Var = new ArrayList<>();
|
45 |
|
|
|
46 |
|
|
try{
|
47 |
|
|
while(dalimil1Wordform.next()){ //go through dalimil1/dd_wordform
|
48 |
|
|
//get all fields dalimil1/dd_wordform - START
|
49 |
|
|
String id = dalimil1Wordform.getString("id");
|
50 |
|
|
String context = dalimil1Wordform.getString("context");
|
51 |
|
|
String date = dalimil1Wordform.getString("date");
|
52 |
|
|
String description = dalimil1Wordform.getString("description");
|
53 |
|
|
String description2 = dalimil1Wordform.getString("description2");
|
54 |
|
|
String description3 = dalimil1Wordform.getString("description3");
|
55 |
|
|
String ending = dalimil1Wordform.getString("ending");
|
56 |
|
|
String finished = dalimil1Wordform.getString("finished");
|
57 |
|
|
String namedentity = dalimil1Wordform.getString("namedentity");
|
58 |
|
|
String position1 = dalimil1Wordform.getString("position1");
|
59 |
|
|
String position2 = dalimil1Wordform.getString("position2");
|
60 |
|
|
String positiondetail = dalimil1Wordform.getString("positiondetail");
|
61 |
|
|
String prefix = dalimil1Wordform.getString("prefix");
|
62 |
|
|
String suffix = dalimil1Wordform.getString("suffix");
|
63 |
|
|
String word = dalimil1Wordform.getString("word");
|
64 |
|
|
String lemma_id = dalimil1Wordform.getString("lemma_id");
|
65 |
|
|
String tag_id = dalimil1Wordform.getString("tag_id");
|
66 |
|
|
//get all fields dalimil1/dd_wordform - END
|
67 |
|
|
|
68 |
|
|
//look where points lemma_id and tag_id
|
69 |
|
|
String[] lemma_id_dd_lemma = retrieveLemmaContForId(dalimil1Con, "dalimil1", lemma_id);
|
70 |
|
|
String tag_id_dd_tag = retrieveTagContForId(dalimil1Con, "dalimil1", tag_id);
|
71 |
|
|
ArrayList<String> manuscript_cont = retrieveManuscriptNumsForId(dalimil1Con, "dalimil1", id);
|
72 |
|
|
|
73 |
|
|
String dal1Print = "";
|
74 |
|
|
dal1Print += "Dal1: id: " + id + " context: " + context + " date: " + date + " description: " + description + " description2: " + description2 + " description3: " + description3 + " ending: " + ending + " finished: " + finished + " namedentity: " + namedentity + " position1: " + position1 + " position2: " + position2 + " positiondetail: " + positiondetail + " prefix: " + prefix + " suffix: " + suffix + " word: " + word + " lemma_id: " + lemma_id + " tag_id: " + tag_id + " dd_lemma lemma: " + lemma_id_dd_lemma[0] + " dd_lemma pos: " + lemma_id_dd_lemma[1] + " dd_tag tag: " + tag_id_dd_tag;
|
75 |
|
|
dal1Print += " in manuscripts: ";
|
76 |
|
|
for(String manuscriptId : manuscript_cont){
|
77 |
|
|
dal1Print += manuscriptId + ", ";
|
78 |
|
|
}
|
79 |
|
|
|
80 |
|
|
//check if item already in dalimil2/dd_wordform
|
81 |
|
|
Statement statement = dalimil2Con.createStatement();
|
82 |
|
|
String sqlQuery = "SELECT * FROM public.dd_wordform WHERE position1='" + position1 + "' AND position2='" + position2 + "' AND positiondetail='" + positiondetail + "' AND word='" + word + "'" + ";";
|
83 |
|
|
ResultSet results = statement.executeQuery(sqlQuery);
|
84 |
|
|
|
85 |
|
|
int matchCountSec = 0; //number of matches found in second DB
|
86 |
|
|
while(results.next()){
|
87 |
|
|
matchCountSec++;
|
88 |
|
|
String idSec = results.getString("id");
|
89 |
|
|
String contextSec = results.getString("context");
|
90 |
|
|
String dateSec = results.getString("date");
|
91 |
|
|
String descriptionSec = results.getString("description");
|
92 |
|
|
String description2Sec = results.getString("description2");
|
93 |
|
|
String description3Sec = results.getString("description3");
|
94 |
|
|
String endingSec = results.getString("ending");
|
95 |
|
|
String finishedSec = results.getString("finished");
|
96 |
|
|
String namedentitySec = results.getString("namedentity");
|
97 |
|
|
String position1Sec = results.getString("position1");
|
98 |
|
|
String position2Sec = results.getString("position2");
|
99 |
|
|
String positiondetailSec = results.getString("positiondetail");
|
100 |
|
|
String prefixSec = results.getString("prefix");
|
101 |
|
|
String suffixSec = results.getString("suffix");
|
102 |
|
|
String wordSec = results.getString("word");
|
103 |
|
|
String lemma_idSec = results.getString("lemma_id");
|
104 |
|
|
String tag_idSec = results.getString("tag_id");
|
105 |
|
|
|
106 |
|
|
ArrayList<String> manuscript_cont2 = retrieveManuscriptNumsForId(dalimil2Con, "dalimil2", idSec);
|
107 |
|
|
String[] lemma_id_dd_lemma2 = retrieveLemmaContForId(dalimil2Con, "dalimil2", lemma_idSec);
|
108 |
|
|
String tag_id_dd_tag2 = retrieveTagContForId(dalimil2Con, "dalimil2", tag_idSec);
|
109 |
|
|
|
110 |
|
|
String dal2Print = "";
|
111 |
|
|
dal2Print += "Dal2: id: " + idSec + " context: " + contextSec + " date: " + dateSec + " description: " + descriptionSec + " description2: " + description2Sec + " description3: " + description3Sec + " ending: " + endingSec + " finished: " + finishedSec + " namedentity: " + namedentitySec + " position1: " + position1Sec + " position2: " + position2Sec + " positiondetail: " + positiondetailSec + " prefix: " + prefixSec + " suffix: " + suffixSec + " word: " + wordSec + " lemma_id: " + lemma_idSec + " tag_id: " + tag_idSec + " dd_lemma lemma: " + lemma_id_dd_lemma2[0] + " dd_lemma pos: " + lemma_id_dd_lemma2[1] + " dd_tag tag: " + tag_id_dd_tag2;
|
112 |
|
|
dal2Print += " in manuscripts: ";
|
113 |
|
|
for(String manuscriptId : manuscript_cont2){
|
114 |
|
|
dal2Print += manuscriptId + ", ";
|
115 |
|
|
}
|
116 |
|
|
|
117 |
|
|
if(finished.equals("f") && finishedSec.equals("t")){
|
118 |
efca2344
|
Ondrej Drtina
|
//System.out.println("OK, found finished equivalent to unfinished item.. From first drop item: " + dal1Print);
|
119 |
a4c65df6
|
Ondrej Drtina
|
//if unfinished item from first db has finished equivalent in second db, then drop item from first db and keep entry in second
|
120 |
|
|
if(!doNotDeleteID[0].contains(Integer.valueOf(id))){ //drop from first only if supervisor doesnt want to keep
|
121 |
|
|
deleteConflict(dalimil1Con, "dalimil1", id, dal1Print); //drop from first db
|
122 |
|
|
}else{
|
123 |
efca2344
|
Ondrej Drtina
|
//System.out.println("Conflicting ID: " + id + " kept..");
|
124 |
a4c65df6
|
Ondrej Drtina
|
}
|
125 |
|
|
}else{
|
126 |
|
|
dalimil1Var.add(dal1Print);
|
127 |
|
|
dalimil2Var.add(dal2Print);
|
128 |
|
|
//drop items which are same across two given DBs
|
129 |
|
|
if(!doNotDeleteID[0].contains(Integer.valueOf(id))){ //drop from first only if supervisor doesnt want to keep
|
130 |
|
|
deleteConflict(dalimil1Con, "dalimil1", id, dal1Print); //delete conflicting item from first db
|
131 |
|
|
}else{
|
132 |
efca2344
|
Ondrej Drtina
|
//System.out.println("Conflicting ID: " + id + " kept..");
|
133 |
a4c65df6
|
Ondrej Drtina
|
}
|
134 |
|
|
if(!doNotDeleteID[1].contains(Integer.valueOf(idSec))){ //drop from second only if supervisor doesnt want to keep
|
135 |
|
|
deleteConflict(dalimil2Con, "dalimil2", idSec, dal2Print); //delete conflicting item from second db
|
136 |
|
|
}else{
|
137 |
efca2344
|
Ondrej Drtina
|
//System.out.println("Conflicting ID: " + idSec + " kept..");
|
138 |
a4c65df6
|
Ondrej Drtina
|
}
|
139 |
|
|
}
|
140 |
|
|
}
|
141 |
|
|
if(matchCountSec > 1){
|
142 |
|
|
//System.out.println("Found more equivalents, might be OK " + matchCountSec);
|
143 |
|
|
}
|
144 |
|
|
}
|
145 |
|
|
addRemainingFromFirstToSecDB(dalimil1Con, dalimil2Con); //add data remaining in first DB to second DB
|
146 |
|
|
//printConflicts(dalimil1Var, dalimil2Var);
|
147 |
|
|
} catch (SQLException throwables) {
|
148 |
|
|
System.out.println("err, working with dbs");
|
149 |
|
|
throwables.printStackTrace();
|
150 |
|
|
}
|
151 |
|
|
}
|
152 |
|
|
|
153 |
efca2344
|
Ondrej Drtina
|
/**
|
154 |
|
|
* Checks whether all finished entries present in dalimil1original and dalimil2original (original, not modified DBs) are present in dalimil2 (merged DB)
|
155 |
|
|
* @param dalimil2Con connection to dalimil2 DB (merged DB with all finished rows from dalimil1original and dalimil2original - except the ones, which supervisor wanted to delete)
|
156 |
|
|
*/
|
157 |
|
|
private static void mergedDBCheck(Connection dalimil2Con){
|
158 |
|
|
Connection dalimil1origCon = establishDalimilX("dalimil1original"); //connection to db which contains unmodified dalimil1
|
159 |
|
|
Connection dalimil2origCon = establishDalimilX("dalimil2original"); //connection to db which contains unmodified dalimil2
|
160 |
|
|
|
161 |
|
|
ResultSet dalimil1origWordform = retrieveWordformDalimilX(dalimil1origCon, "dalimil1original"); //content of dalimil1 original dd_wordform
|
162 |
|
|
ResultSet dalimil2origWordform = retrieveWordformDalimilX(dalimil2origCon, "dalimil2original"); //content of dalimil2 original dd_wordform
|
163 |
|
|
|
164 |
|
|
//go through dalimil1original + dalimil2original and check presence of each finished item in dalimil2 (merged DB)
|
165 |
|
|
try{
|
166 |
|
|
while(dalimil1origWordform.next()){
|
167 |
|
|
//get all fields dalimil1original/dd_wordform - START
|
168 |
|
|
String id = dalimil1origWordform.getString("id");
|
169 |
|
|
String context = dalimil1origWordform.getString("context");
|
170 |
|
|
String date = dalimil1origWordform.getString("date");
|
171 |
|
|
String description = dalimil1origWordform.getString("description");
|
172 |
|
|
String description2 = dalimil1origWordform.getString("description2");
|
173 |
|
|
String description3 = dalimil1origWordform.getString("description3");
|
174 |
|
|
String ending = dalimil1origWordform.getString("ending");
|
175 |
|
|
String finished = dalimil1origWordform.getString("finished");
|
176 |
|
|
String namedentity = dalimil1origWordform.getString("namedentity");
|
177 |
|
|
String position1 = dalimil1origWordform.getString("position1");
|
178 |
|
|
String position2 = dalimil1origWordform.getString("position2");
|
179 |
|
|
String positiondetail = dalimil1origWordform.getString("positiondetail");
|
180 |
|
|
String prefix = dalimil1origWordform.getString("prefix");
|
181 |
|
|
String suffix = dalimil1origWordform.getString("suffix");
|
182 |
|
|
String word = dalimil1origWordform.getString("word");
|
183 |
|
|
String lemma_id = dalimil1origWordform.getString("lemma_id");
|
184 |
|
|
String tag_id = dalimil1origWordform.getString("tag_id");
|
185 |
|
|
//get all fields dalimil1original/dd_wordform - END
|
186 |
|
|
|
187 |
|
|
//look where points lemma_id and tag_id
|
188 |
|
|
String[] lemma_id_dd_lemma = retrieveLemmaContForId(dalimil1origCon, "dalimil1original", lemma_id);
|
189 |
|
|
String tag_id_dd_tag = retrieveTagContForId(dalimil1origCon, "dalimil1original", tag_id);
|
190 |
|
|
ArrayList<String> manuscript_cont = retrieveManuscriptNumsForId(dalimil1origCon, "dalimil1original", id);
|
191 |
|
|
|
192 |
|
|
if(finished.equals("t")){ //entry is considered as finished, continue with check of presence in merged DB
|
193 |
|
|
boolean isPresentMerge = checkPresenceEntryMergedDB(id, "dalimil1", dalimil2Con, context, date, description, description2, description3,
|
194 |
|
|
ending, finished, namedentity, position1, position2, positiondetail, prefix,
|
195 |
|
|
suffix, word, tag_id_dd_tag, lemma_id_dd_lemma[0], lemma_id_dd_lemma[1], manuscript_cont);
|
196 |
|
|
if(!isPresentMerge){ //finished entry is not present in merged DB, that should not ever occur :(!!!
|
197 |
|
|
System.out.println("ERR, finished item from FIRST DB is not present in merged DB!!!" + id);
|
198 |
|
|
}else{
|
199 |
|
|
//System.out.println("Ok, item " + word + " (FIRST DB) found in merged DB " + id);
|
200 |
|
|
}
|
201 |
|
|
}
|
202 |
|
|
}
|
203 |
|
|
|
204 |
|
|
while(dalimil2origWordform.next()){
|
205 |
|
|
//get all fields dalimil2original/dd_wordform - START
|
206 |
|
|
String id = dalimil2origWordform.getString("id");
|
207 |
|
|
String context = dalimil2origWordform.getString("context");
|
208 |
|
|
String date = dalimil2origWordform.getString("date");
|
209 |
|
|
String description = dalimil2origWordform.getString("description");
|
210 |
|
|
String description2 = dalimil2origWordform.getString("description2");
|
211 |
|
|
String description3 = dalimil2origWordform.getString("description3");
|
212 |
|
|
String ending = dalimil2origWordform.getString("ending");
|
213 |
|
|
String finished = dalimil2origWordform.getString("finished");
|
214 |
|
|
String namedentity = dalimil2origWordform.getString("namedentity");
|
215 |
|
|
String position1 = dalimil2origWordform.getString("position1");
|
216 |
|
|
String position2 = dalimil2origWordform.getString("position2");
|
217 |
|
|
String positiondetail = dalimil2origWordform.getString("positiondetail");
|
218 |
|
|
String prefix = dalimil2origWordform.getString("prefix");
|
219 |
|
|
String suffix = dalimil2origWordform.getString("suffix");
|
220 |
|
|
String word = dalimil2origWordform.getString("word");
|
221 |
|
|
if(word.equals("viz'u")){ //DB contains just one word, which throws exception -> edit the word
|
222 |
|
|
word = "viz''u";
|
223 |
|
|
}
|
224 |
|
|
|
225 |
|
|
String lemma_id = dalimil2origWordform.getString("lemma_id");
|
226 |
|
|
String tag_id = dalimil2origWordform.getString("tag_id");
|
227 |
|
|
//get all fields dalimil2original/dd_wordform - END
|
228 |
|
|
|
229 |
|
|
//look where points lemma_id and tag_id
|
230 |
|
|
String[] lemma_id_dd_lemma = retrieveLemmaContForId(dalimil2origCon, "dalimil2original", lemma_id);
|
231 |
|
|
String tag_id_dd_tag = retrieveTagContForId(dalimil2origCon, "dalimil2original", tag_id);
|
232 |
|
|
ArrayList<String> manuscript_cont = retrieveManuscriptNumsForId(dalimil2origCon, "dalimil2original", id);
|
233 |
|
|
|
234 |
|
|
if(finished.equals("t")){ //entry is considered as finished, continue with check of presence in merged DB
|
235 |
|
|
boolean isPresentMerge = checkPresenceEntryMergedDB(id, "dalimil2",dalimil2Con, context, date, description, description2, description3,
|
236 |
|
|
ending, finished, namedentity, position1, position2, positiondetail, prefix,
|
237 |
|
|
suffix, word, tag_id_dd_tag, lemma_id_dd_lemma[0], lemma_id_dd_lemma[1], manuscript_cont);
|
238 |
|
|
if(!isPresentMerge){ //finished entry is not present in merged DB, that should not ever occur :(!!!
|
239 |
|
|
System.out.println("ERR, finished item from SECOND DB is not present in merged DB!!!" + id);
|
240 |
|
|
}else{
|
241 |
|
|
//System.out.println("Ok, item " + word + " (FIRST DB) found in merged DB " + id);
|
242 |
|
|
}
|
243 |
|
|
}
|
244 |
|
|
}
|
245 |
|
|
}catch (SQLException throwables) {
|
246 |
|
|
System.out.println("err, working with dbs");
|
247 |
|
|
throwables.printStackTrace();
|
248 |
|
|
}
|
249 |
|
|
|
250 |
|
|
closeDalimilX(dalimil1origCon, "dalimil1original");
|
251 |
|
|
closeDalimilX(dalimil2origCon, "dalimil2original");
|
252 |
|
|
}
|
253 |
|
|
|
254 |
|
|
/**
|
255 |
|
|
* Checks whether entry specified by given parameters is present in merged DB (dalimil2).
|
256 |
|
|
* @param id id the entry in given DB
|
257 |
|
|
* @param nameDB name of the checked DB (dalimil1 or dalimil2 expected)
|
258 |
|
|
* @param dalimil2mergedCon connection to merged database (dalimil2)
|
259 |
|
|
* @param context original DB - table dd_wordform - context
|
260 |
|
|
* @param date original DB - table dd_wordform - date
|
261 |
|
|
* @param description original DB - table dd_wordform - description
|
262 |
|
|
* @param description2 original DB - table dd_wordform - description2
|
263 |
|
|
* @param description3 original DB - table dd_wordform - description3
|
264 |
|
|
* @param ending original DB - table dd_wordform - ending
|
265 |
|
|
* @param finished original DB - table dd_wordform - finished
|
266 |
|
|
* @param namedentity original DB - table dd_wordform - namedentity
|
267 |
|
|
* @param position1 original DB - table dd_wordform - position1
|
268 |
|
|
* @param position2 original DB - table dd_wordform - position2
|
269 |
|
|
* @param positiondetail original DB - table dd_wordform - positiondetail
|
270 |
|
|
* @param prefix original DB - table dd_wordform - prefix
|
271 |
|
|
* @param suffix original DB - table dd_wordform - suffix
|
272 |
|
|
* @param word original DB - table dd_wordform - word
|
273 |
|
|
* @param tag original DB - table dd_tag - tag
|
274 |
|
|
* @param lemma original DB - table dd_lemma - lemma
|
275 |
|
|
* @param pos original DB - table dd_lemma - pos
|
276 |
|
|
* @param manuscriptNums original DB - table dd_manuscript - numbers of all manuscript assigned to doc
|
277 |
|
|
* @return true if specified entry is present in second DB, else false
|
278 |
|
|
*/
|
279 |
|
|
private static boolean checkPresenceEntryMergedDB(String id, String nameDB, Connection dalimil2mergedCon, String context, String date, String description, String description2, String description3,
|
280 |
|
|
String ending, String finished, String namedentity, String position1, String position2, String positiondetail, String prefix,
|
281 |
|
|
String suffix, String word, String tag, String lemma, String pos, ArrayList<String> manuscriptNums){
|
282 |
|
|
ResultSet retrievedResMerge = null; //result retrieved from merged DB (dalimil2)
|
283 |
|
|
Statement statementMerge = null; //statement to be executed on merged DB (dalimil2)
|
284 |
|
|
|
285 |
|
|
//tags in first DB are shorter than the ones in second DB (sec is 12char length) -> make it match
|
286 |
|
|
while(tag.length() < 12){
|
287 |
|
|
tag += "-";
|
288 |
|
|
}
|
289 |
|
|
|
290 |
|
|
try{
|
291 |
|
|
//retrieve all rows from dd_lemma which match desired lemma + pos (then one of the retrieved ids has to be assigned to the wordform) - START
|
292 |
|
|
Statement statementLemma = dalimil2mergedCon.createStatement();
|
293 |
|
|
ArrayList<String> possibleLemmaIdsList = new ArrayList<>();
|
294 |
|
|
|
295 |
|
|
ResultSet statementLemmaRes = statementLemma.executeQuery("SELECT * FROM public.dd_lemma WHERE lemma='" + lemma + "' AND pos='" + pos + "';");
|
296 |
|
|
|
297 |
|
|
while(statementLemmaRes.next()){
|
298 |
|
|
possibleLemmaIdsList.add(statementLemmaRes.getString("id"));
|
299 |
|
|
}
|
300 |
|
|
//retrieve all rows from dd_lemma which match desired lemma + pos (then one of the retrieved ids has to be assigned to the wordform) - END
|
301 |
|
|
|
302 |
|
|
//retrieve all rows from dd_tag which match desired tag (then one of the retrieved ids has to be assigned to the wordform) - START
|
303 |
|
|
Statement statementTag = dalimil2mergedCon.createStatement();
|
304 |
|
|
ArrayList<String> possibleTagIdsList = new ArrayList<>();
|
305 |
|
|
|
306 |
|
|
ResultSet statementTagRes = statementTag.executeQuery("SELECT * FROM public.dd_tag WHERE tag='" + tag + "';");
|
307 |
|
|
while(statementTagRes.next()){
|
308 |
|
|
possibleTagIdsList.add(statementTagRes.getString("id"));
|
309 |
|
|
}
|
310 |
|
|
//retrieve all rows from dd_tag which match desired tag (then one of the retrieved ids has to be assigned to the wordform) - END
|
311 |
|
|
|
312 |
|
|
boolean dd_wordformPresent = false;
|
313 |
|
|
|
314 |
|
|
//retrieve all items from dd_wordform which match all attributes except for tag_id and lemma_id (then one one of retrieved ids must have assigned required manuscripts) - START
|
315 |
|
|
statementMerge = dalimil2mergedCon.createStatement();
|
316 |
|
|
ArrayList<String> wordformIds = new ArrayList<>();
|
317 |
|
|
|
318 |
|
|
retrievedResMerge = statementMerge.executeQuery("SELECT * FROM public.dd_wordform WHERE context='" + context + "' AND date='" + date + "' AND description='" + description + "' AND description2='" + description2 + "' AND description3='" + description3 + "' AND ending='" + ending + "' AND finished='" + finished + "' AND namedentity='" + namedentity + "' AND position1='" + position1 + "' AND position2='" + position2 + "' AND positiondetail='" + positiondetail + "' AND prefix='" + prefix + "' AND suffix='" + suffix + "' AND word='" + word + "';");
|
319 |
|
|
while(retrievedResMerge.next()){
|
320 |
|
|
if(possibleTagIdsList.contains(retrievedResMerge.getString("tag_id")) && possibleLemmaIdsList.contains(retrievedResMerge.getString("lemma_id"))){ //found item with suitable tag id and lemma id
|
321 |
|
|
wordformIds.add(retrievedResMerge.getString("id"));
|
322 |
|
|
}
|
323 |
|
|
}
|
324 |
|
|
//retrieve all items from dd_wordform which match all attributes except for tag_id and lemma_id (then one one of retrieved ids must have assigned required manuscripts) - END
|
325 |
|
|
|
326 |
|
|
int matchCountWordform = 0; //number of matches found in merged DB, table dd_wordform
|
327 |
|
|
ArrayList<String> matchingWordformLemmaTagManucriptIds = new ArrayList<>(); //wordform ids of items WHICH HAVE SAME: wordform, lemma, tag, assigned manuscript = totally equiv entry
|
328 |
|
|
|
329 |
|
|
//got suitable dd_lemma + dd_tag -> from that deducted possible dd_wordform ids. now check if any of possible dd_wordform ids have assigned required manuscripts
|
330 |
|
|
for(String suitableId : wordformIds){ //go through possible wordform ids and check if they have required entries in dd_manuscript
|
331 |
|
|
Statement statementManuscript = dalimil2mergedCon.createStatement();
|
332 |
|
|
ArrayList<String> assignedManuscriptNumsList = new ArrayList<>(); //manuscript numbers assigned to traversed id
|
333 |
|
|
ResultSet statementManuscriptRes = statementManuscript.executeQuery("SELECT * FROM public.dd_manuscript WHERE wordform_id=" + suitableId + ";");
|
334 |
|
|
|
335 |
|
|
while(statementManuscriptRes.next()){ //add all manuscripts assigned to specified id to list
|
336 |
|
|
String manuscript = statementManuscriptRes.getString("manuscript");
|
337 |
|
|
assignedManuscriptNumsList.add(manuscript);
|
338 |
|
|
}
|
339 |
|
|
|
340 |
|
|
int traversedManuscriptCount = 0; //number of already checked assigned manuscripts
|
341 |
|
|
for(String manuscriptNum : assignedManuscriptNumsList){
|
342 |
|
|
if(manuscriptNums.contains(manuscriptNum)){
|
343 |
|
|
traversedManuscriptCount++;
|
344 |
|
|
}
|
345 |
|
|
}
|
346 |
|
|
|
347 |
|
|
if(traversedManuscriptCount == assignedManuscriptNumsList.size()){ //number of checked items equals to total number of assigned manuscripts, items MATCH (same wordform, lemma, tag and assigned manuscripts...)
|
348 |
|
|
matchingWordformLemmaTagManucriptIds.add(suitableId);
|
349 |
|
|
matchCountWordform++;
|
350 |
|
|
}
|
351 |
|
|
}
|
352 |
|
|
|
353 |
|
|
if(matchCountWordform == 1){ //found exactly one matching entry in merged DB (dalimil2), ok (if > 1 is not nice -> db contains redundant information, but doesnt matter that much...)
|
354 |
|
|
dd_wordformPresent = true;
|
355 |
|
|
}else if(matchCountWordform == 0){
|
356 |
|
|
System.out.println("Not FOUND: db" + nameDB + ", id: " + id + "context='" + context + "' AND date='" + date + "' AND description='" + description + "' AND description2='" + description2 + "' AND description3='" + description3 + "' AND ending='" + ending + "' AND finished='" + finished + "' AND namedentity='" + namedentity + "' AND position1='" + position1 + "' AND position2='" + position2 + "' AND positiondetail='" + positiondetail + "' AND prefix='" + prefix + "' AND suffix='" + suffix + "' AND word='" + word + "';");
|
357 |
|
|
}else if(matchCountWordform > 0){
|
358 |
|
|
System.out.println("Found more than once! - START");
|
359 |
|
|
|
360 |
|
|
for(int i = 0; i < wordformIds.size(); i++){
|
361 |
|
|
String wfid = wordformIds.get(i);
|
362 |
|
|
System.out.println("In merged db " + nameDB + " id is: " + wfid + ", id from original: " + id);
|
363 |
|
|
|
364 |
|
|
if(i != 0){ //do not delete the first one, must keep one equiv
|
365 |
|
|
deleteConflict(dalimil2mergedCon, "dalimil2merged", wfid, "DELETE AFTER MERGE"); //drop from first db
|
366 |
|
|
deletedRedundantItIds.add(wfid);
|
367 |
|
|
}
|
368 |
|
|
}
|
369 |
|
|
System.out.println("Found more than once! - END");
|
370 |
|
|
dd_wordformPresent = true;
|
371 |
|
|
}
|
372 |
|
|
|
373 |
|
|
if(dd_wordformPresent){ //found entry with matching content - dd_wordform + dd_lemma + dd_tag + dd_manuscript (except for ids, thats obvious)
|
374 |
|
|
return true;
|
375 |
|
|
}else{
|
376 |
|
|
return false;
|
377 |
|
|
}
|
378 |
|
|
} catch (SQLException throwables) {
|
379 |
|
|
System.out.println("ERR, checking data in merged DB. id in original: " + id);
|
380 |
|
|
throwables.printStackTrace();
|
381 |
|
|
return false;
|
382 |
|
|
}
|
383 |
|
|
}
|
384 |
|
|
|
385 |
|
|
/**
|
386 |
|
|
* Checks whether all deleted items really had equivavelents.
|
387 |
|
|
* @param dalimil2mergedCon connection to DB which does not contains redundant items (final DB to be used)
|
388 |
|
|
*/
|
389 |
|
|
private static void reviewDeletedItemsMerge(Connection dalimil2mergedCon){
|
390 |
|
|
Connection dalimil2mergedWOdeletedredundantCon = establishDalimilX("dalimil2mergedWOdeletedredundant"); //connection to DB which still contains redundant items
|
391 |
|
|
|
392 |
|
|
for(String removedId : deletedRedundantItIds){ //go through removed items in dalimil2mergedWOdeletedredundant and check if equiv is really present in dalimil2
|
393 |
|
|
ResultSet wordformDeletedMerge = retrieveWordformIdDalimilX(dalimil2mergedWOdeletedredundantCon, "dalimil2mergedWOdeletedredundant", removedId); //retrieve content of dd_wordform for deleted id
|
394 |
|
|
|
395 |
|
|
int matchCount = 0; //number of items found in dalimil2mergedWOdeletedredundant with the specified id (SHOULD NOT EVER BE > 1!)
|
396 |
|
|
|
397 |
|
|
try{
|
398 |
|
|
while(wordformDeletedMerge.next()){ //go through items found with specified id
|
399 |
|
|
matchCount++;
|
400 |
|
|
|
401 |
|
|
//get all fields dalimil2mergedWOdeletedredundant/dd_wordform - START
|
402 |
|
|
String id = wordformDeletedMerge.getString("id");
|
403 |
|
|
String context = wordformDeletedMerge.getString("context");
|
404 |
|
|
String date = wordformDeletedMerge.getString("date");
|
405 |
|
|
String description = wordformDeletedMerge.getString("description");
|
406 |
|
|
String description2 = wordformDeletedMerge.getString("description2");
|
407 |
|
|
String description3 = wordformDeletedMerge.getString("description3");
|
408 |
|
|
String ending = wordformDeletedMerge.getString("ending");
|
409 |
|
|
String finished = wordformDeletedMerge.getString("finished");
|
410 |
|
|
String namedentity = wordformDeletedMerge.getString("namedentity");
|
411 |
|
|
String position1 = wordformDeletedMerge.getString("position1");
|
412 |
|
|
String position2 = wordformDeletedMerge.getString("position2");
|
413 |
|
|
String positiondetail = wordformDeletedMerge.getString("positiondetail");
|
414 |
|
|
String prefix = wordformDeletedMerge.getString("prefix");
|
415 |
|
|
String suffix = wordformDeletedMerge.getString("suffix");
|
416 |
|
|
String word = wordformDeletedMerge.getString("word");
|
417 |
|
|
String lemma_id = wordformDeletedMerge.getString("lemma_id");
|
418 |
|
|
String tag_id = wordformDeletedMerge.getString("tag_id");
|
419 |
|
|
//get all fields dalimil2mergedWOdeletedredundant/dd_wordform - END
|
420 |
|
|
|
421 |
|
|
String[] lemma_id_dd_lemma = retrieveLemmaContForId(dalimil2mergedWOdeletedredundantCon, "dalimil2mergedWOdeletedredundant", lemma_id); //retrieve lemma from dd_lemma for deleted id
|
422 |
|
|
String tag_id_dd_tag = retrieveTagContForId(dalimil2mergedWOdeletedredundantCon, "dalimil2mergedWOdeletedredundant", tag_id); //retrieve tag from dd_tag for deleted id
|
423 |
|
|
ArrayList<String> manuscript_cont = retrieveManuscriptNumsForId(dalimil2mergedWOdeletedredundantCon, "dalimil2mergedWOdeletedredundant", id); //retrieve manuscripts from dd_manuscript for deleted id
|
424 |
|
|
|
425 |
|
|
boolean isEquivPresent = checkPresenceEntryMergedDB(id, "dalimil2", dalimil2mergedCon, context, date, description, description2, description3,
|
426 |
|
|
ending, finished, namedentity, position1, position2, positiondetail, prefix,
|
427 |
|
|
suffix, word, tag_id_dd_tag, lemma_id_dd_lemma[0], lemma_id_dd_lemma[1], manuscript_cont);
|
428 |
|
|
|
429 |
|
|
if(!isEquivPresent){ //if equivalent is not present in DB with removed duplicates, something is wrong...
|
430 |
|
|
System.out.println("ERR, duplicated item with id " + id + " has no equivalents in final DB!!!");
|
431 |
|
|
}else{
|
432 |
|
|
System.out.println("Ok, found equivalent to removed id: " + removedId);
|
433 |
|
|
}
|
434 |
|
|
}
|
435 |
|
|
}catch (SQLException throwables) {
|
436 |
|
|
System.out.println("err, working with dbs");
|
437 |
|
|
throwables.printStackTrace();
|
438 |
|
|
}
|
439 |
|
|
|
440 |
|
|
if(matchCount != 1){
|
441 |
|
|
System.out.println("err, match count for deleted id is not 1!");
|
442 |
|
|
}
|
443 |
|
|
}
|
444 |
|
|
|
445 |
|
|
closeDalimilX(dalimil2mergedWOdeletedredundantCon, "dalimil2mergedWOdeletedredundant");
|
446 |
|
|
}
|
447 |
|
|
|
448 |
a4c65df6
|
Ondrej Drtina
|
/**
|
449 |
|
|
* Parses content of the file which contains only valid entries and returns ArrayLists which contains ids of entries which should stay in DB. Each entry begins with "Dal1:" or "Dal2:" and ends with ", OK"
|
450 |
|
|
* @param pathToFile path to the file which contains valid entries (checked by contracting authority)
|
451 |
|
|
* @return ArrayList<Integer>[] - on first index is list of ids, which should stay in first DB (Dal1) and on index 1 is list of ids, which should stay in the second DB
|
452 |
|
|
*/
|
453 |
|
|
private static ArrayList<Integer>[] parseConflictsFile(String pathToFile){
|
454 |
|
|
ArrayList<String> fileCont = new ArrayList<>(); //content of the file
|
455 |
|
|
|
456 |
|
|
ArrayList<Integer>[] toStayInDB = new ArrayList[2]; //init outer array (0 = first DB, 1 = second DB)
|
457 |
|
|
toStayInDB[0] = new ArrayList<>(); //represents indexes which should stay in first DB (Dal1)
|
458 |
|
|
toStayInDB[1] = new ArrayList<>(); //represents indexes which should stay in second DB (Dal2)
|
459 |
|
|
|
460 |
|
|
try{
|
461 |
|
|
File conflictsFile = new File(pathToFile);
|
462 |
|
|
Scanner conflictsSc = new Scanner(conflictsFile);
|
463 |
|
|
while(conflictsSc.hasNextLine()){
|
464 |
|
|
String line = conflictsSc.nextLine();
|
465 |
|
|
fileCont.add(line);
|
466 |
|
|
}
|
467 |
|
|
conflictsSc.close();
|
468 |
|
|
}catch(Exception e){
|
469 |
|
|
System.out.println("Err, reading conflicts file");
|
470 |
|
|
e.printStackTrace();
|
471 |
|
|
}
|
472 |
|
|
|
473 |
|
|
//content should be loaded by now, parse it
|
474 |
|
|
for(String line : fileCont){
|
475 |
|
|
if(line.startsWith("Dal1: id: ")){ //line contains id of item in first DB, Dal1
|
476 |
|
|
String[] splitted = line.split("Dal1: id: ");
|
477 |
|
|
splitted = splitted[1].split(" ");
|
478 |
|
|
toStayInDB[0].add(Integer.valueOf(splitted[0]));
|
479 |
|
|
}else if(line.startsWith("Dal2: id: ")){ //line contains id of item in second DB, Dal2
|
480 |
|
|
String[] splitted = line.split("Dal2: id: ");
|
481 |
|
|
splitted = splitted[1].split(" ");
|
482 |
|
|
toStayInDB[1].add(Integer.valueOf(splitted[0]));
|
483 |
|
|
}else{
|
484 |
|
|
System.out.println("Err, file is in invalid format...");
|
485 |
|
|
}
|
486 |
|
|
}
|
487 |
|
|
|
488 |
efca2344
|
Ondrej Drtina
|
//System.out.println("size is: " + toStayInDB[0].size() + " and " + toStayInDB[1].size());
|
489 |
a4c65df6
|
Ondrej Drtina
|
|
490 |
|
|
return toStayInDB;
|
491 |
|
|
}
|
492 |
|
|
|
493 |
|
|
/**
|
494 |
|
|
* Used for deleting conflicting entries from database.
|
495 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
496 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
497 |
|
|
* @param id id of entry which should be deleted (in dd_wordform)
|
498 |
|
|
* @param textRepres text representation of deleted item, used for debug print
|
499 |
|
|
*/
|
500 |
|
|
private static void deleteConflict(Connection dalimilXCon, String whichDB, String id, String textRepres){
|
501 |
|
|
try {
|
502 |
|
|
Statement statement = dalimilXCon.createStatement();
|
503 |
|
|
int affectedManuscript = statement.executeUpdate("DELETE FROM public.dd_manuscript WHERE wordform_id=" + id); //delete entry which is associated with the word from dd_manuscript
|
504 |
|
|
int affectedWordform = statement.executeUpdate("DELETE FROM public.dd_wordform WHERE id=" + id); //delete entry from dd_wordform
|
505 |
|
|
//System.out.println("Removing affected wordform num: " + affectedWordform + ", affected manuscript num: " + affectedManuscript + " info line: " + whichDB + " " + textRepres);
|
506 |
efca2344
|
Ondrej Drtina
|
System.out.println("Z " + whichDB + " odstraneno: " + textRepres);
|
507 |
a4c65df6
|
Ondrej Drtina
|
} catch (SQLException throwables) {
|
508 |
|
|
System.out.println("ERR, " + whichDB + " - deleting conflicts from database.");
|
509 |
|
|
throwables.printStackTrace();
|
510 |
|
|
}
|
511 |
|
|
}
|
512 |
|
|
|
513 |
|
|
/**
|
514 |
|
|
* Adds remaining data presesent in Dalimil1 to Dalimil2
|
515 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
516 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
517 |
|
|
*/
|
518 |
|
|
private static void addRemainingFromFirstToSecDB(Connection dalimil1Con, Connection dalimil2Con){
|
519 |
|
|
ResultSet dalimil1Wordform = retrieveWordformDalimilX(dalimil1Con, "dalimil1"); //retrieve all data from dd_wordform, first DB (Dalimil 1)
|
520 |
|
|
|
521 |
|
|
try {
|
522 |
|
|
while(dalimil1Wordform.next()){ //go through remaining items present in first DB and put them to second DB
|
523 |
|
|
//get all fields dalimil1/dd_wordform - START
|
524 |
|
|
String id = dalimil1Wordform.getString("id");
|
525 |
|
|
String lemma_id = dalimil1Wordform.getString("lemma_id");
|
526 |
|
|
String tag_id = dalimil1Wordform.getString("tag_id");
|
527 |
|
|
//System.out.println("Got remaining: " + id + " " + lemma_id + " " + tag_id);
|
528 |
|
|
//get all fields dalimil1/dd_wordform - END
|
529 |
|
|
|
530 |
|
|
//ok, got lemma + tag + manuscript data - now add them to respective tables before adding dd_wordform entry
|
531 |
|
|
int lemma_idSecondDB = addLemmaToSecDB(dalimil1Con, dalimil2Con, lemma_id);
|
532 |
|
|
int tag_idSecondDB = addTagToSecDB(dalimil1Con, dalimil2Con, tag_id);
|
533 |
|
|
int wordform_idSecondDB = addWordformToSecDB(dalimil1Con, dalimil2Con, id, lemma_idSecondDB, tag_idSecondDB);
|
534 |
|
|
//System.out.println("added as: " + wordform_idSecondDB + " " + lemma_idSecondDB + " " + tag_idSecondDB);
|
535 |
|
|
addManuscriptToSecDB(dalimil1Con, dalimil2Con, String.valueOf(id), String.valueOf(wordform_idSecondDB));
|
536 |
|
|
}
|
537 |
|
|
} catch (SQLException throwables) {
|
538 |
|
|
System.out.println("Error while adding remaining from first to sec DB.");
|
539 |
|
|
throwables.printStackTrace();
|
540 |
|
|
}
|
541 |
|
|
}
|
542 |
|
|
|
543 |
|
|
/**
|
544 |
|
|
* Used for adding wordform to table dd_wordform (from first DB to second DB ; Dalimil1 -> Dalimil2).
|
545 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
546 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
547 |
|
|
* @param idWordformFirstDB id of the entry in first DB, table dd_wordform, which should be inserted to second DB, same table
|
548 |
|
|
* @param lemma_idSecondDB lemma_id attribute which should be used in SECOND DB for given entry
|
549 |
|
|
* @param tag_idSecondDB tag_id attribute which should be used in SECOND DB for given entry
|
550 |
|
|
* @return id of the newly inserted wordform
|
551 |
|
|
*/
|
552 |
|
|
private static int addWordformToSecDB(Connection dalimil1Con, Connection dalimil2Con, String idWordformFirstDB, int lemma_idSecondDB, int tag_idSecondDB){
|
553 |
|
|
ResultSet wordformDataFirstDB = retrieveWordformIdDalimilX(dalimil1Con, "dalimil1", idWordformFirstDB);
|
554 |
|
|
|
555 |
|
|
int newWordformId = -1;
|
556 |
|
|
try{
|
557 |
|
|
Statement secondDBWordformMaxState = dalimil2Con.createStatement();
|
558 |
|
|
ResultSet secondDBWordformMaxRes = secondDBWordformMaxState.executeQuery("SELECT MAX(id) FROM public.dd_wordform;"); //get max id, currently in dd_wordform (sec DB, Dalimil2) - newly added item has to be value+1
|
559 |
|
|
while(secondDBWordformMaxRes.next()){
|
560 |
|
|
int maxId = secondDBWordformMaxRes.getInt("max"); //get max id currently in wordform (second DB, Dalimil2) - newly added item has to have retrieved value+1
|
561 |
|
|
newWordformId = maxId + 1; //newly inserted item : current_id + 1 for new id
|
562 |
|
|
}
|
563 |
|
|
|
564 |
|
|
//now retrieve data from first DB
|
565 |
|
|
while(wordformDataFirstDB.next()){
|
566 |
|
|
//get relevant fields dalimil1/dd_wordform - START
|
567 |
|
|
String context = wordformDataFirstDB.getString("context");
|
568 |
|
|
String date = wordformDataFirstDB.getString("date");
|
569 |
|
|
String description = wordformDataFirstDB.getString("description");
|
570 |
|
|
String description2 = wordformDataFirstDB.getString("description2");
|
571 |
|
|
String description3 = wordformDataFirstDB.getString("description3");
|
572 |
|
|
String ending = wordformDataFirstDB.getString("ending");
|
573 |
|
|
String finished = wordformDataFirstDB.getString("finished");
|
574 |
|
|
String namedentity = wordformDataFirstDB.getString("namedentity");
|
575 |
|
|
String position1 = wordformDataFirstDB.getString("position1");
|
576 |
|
|
String position2 = wordformDataFirstDB.getString("position2");
|
577 |
|
|
String positiondetail = wordformDataFirstDB.getString("positiondetail");
|
578 |
|
|
String prefix = wordformDataFirstDB.getString("prefix");
|
579 |
|
|
String suffix = wordformDataFirstDB.getString("suffix");
|
580 |
|
|
String word = wordformDataFirstDB.getString("word");
|
581 |
|
|
//get relevant fields dalimil1/dd_wordform - END
|
582 |
|
|
|
583 |
|
|
if(newWordformId == -1){
|
584 |
|
|
System.out.println("err, cannot retrieve wordform id!");
|
585 |
|
|
return newWordformId;
|
586 |
|
|
}
|
587 |
|
|
|
588 |
|
|
//now try to insert data to second DB
|
589 |
|
|
Statement secondDBWordformInsert = dalimil2Con.createStatement();
|
590 |
|
|
|
591 |
|
|
String insertIntoSql = "INSERT INTO public.dd_wordform (id, context, date, description, description2, description3, ending, finished, namedentity, position1, position2, positiondetail, prefix, suffix, word, lemma_id, tag_id) VALUES (";
|
592 |
|
|
insertIntoSql += newWordformId + ",'" + context + "',TO_DATE('"+date+"', 'YYYY-MM-DD HH24:MI:ss.fff' ),'"+description+"','"+description2+"','"+description3+"','"+ending+"','"+finished+"','"+namedentity+"','"+position1+"','"+position2+"','"+positiondetail+"','"+prefix+"','"+suffix+"','"+word+"',"+lemma_idSecondDB+","+tag_idSecondDB+");";
|
593 |
|
|
int secondDBWordformInsertRes = secondDBWordformInsert.executeUpdate(insertIntoSql);
|
594 |
|
|
//System.out.println("Inserted wordform with id: " + newWordformId + " RES: " + secondDBWordformInsertRes);
|
595 |
|
|
}
|
596 |
|
|
return newWordformId;
|
597 |
|
|
}catch (SQLException throwables){
|
598 |
|
|
System.out.println("err, inserting wordform");
|
599 |
|
|
throwables.printStackTrace();
|
600 |
|
|
return newWordformId;
|
601 |
|
|
}
|
602 |
|
|
}
|
603 |
|
|
|
604 |
|
|
/**
|
605 |
|
|
* Used for adding manuscripts associated with the word to table dd_manuscript (from first to second DB).
|
606 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
607 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
608 |
|
|
* @param wordformIdFirstDB id of the word (in first DB, table dd_wordform), used for retrieving data
|
609 |
|
|
* @param wordformIdSecondDB id of the wordform in second DB
|
610 |
|
|
*/
|
611 |
|
|
private static void addManuscriptToSecDB(Connection dalimil1Con, Connection dalimil2Con, String wordformIdFirstDB, String wordformIdSecondDB){
|
612 |
|
|
ArrayList<String> manuscriptNumsFirstDB = retrieveManuscriptNumsForId(dalimil1Con, "dalimil1", wordformIdFirstDB);
|
613 |
|
|
//got numbers of manuscripts in first DB, now edit them to match with the second one -> wordform_id = newly assigned id in second DB, manuscript = stays the same
|
614 |
|
|
|
615 |
|
|
try{
|
616 |
|
|
for(int i = 0; i < manuscriptNumsFirstDB.size(); i++){ //go through all manuscripts for each item and add them to second database
|
617 |
|
|
String oneManuscriptNum = manuscriptNumsFirstDB.get(i); //get one manuscript num from retrieved manuscript nums (first DB)
|
618 |
|
|
//now try to insert the data to second DB
|
619 |
|
|
Statement secondDBManuscriptInsert = dalimil2Con.createStatement();
|
620 |
|
|
int secondDBManuscriptInsertRes = secondDBManuscriptInsert.executeUpdate("INSERT INTO public.dd_manuscript (wordform_id, manuscript) VALUES (" + wordformIdSecondDB + "," + oneManuscriptNum + ");");
|
621 |
|
|
//System.out.println("Inserted manuscript with wordform_id: " + wordformIdSecondDB + ", manuscript: " + oneManuscriptNum + " RES: " + secondDBManuscriptInsertRes);
|
622 |
|
|
}
|
623 |
|
|
|
624 |
|
|
}catch (SQLException throwables) {
|
625 |
|
|
System.out.println("err, inserting manuscript");
|
626 |
|
|
throwables.printStackTrace();
|
627 |
|
|
}
|
628 |
|
|
}
|
629 |
|
|
|
630 |
|
|
/**
|
631 |
|
|
* Used for adding lemma associated with the word to table dd_lemma (from first to second DB).
|
632 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
633 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
634 |
|
|
* @param lemma_idFirstDB lemma_id of respective item (from first DB, dd_wordform! ie. Dalimil1)
|
635 |
|
|
* @return id of the newly inserted lemma
|
636 |
|
|
*/
|
637 |
|
|
private static int addLemmaToSecDB(Connection dalimil1Con, Connection dalimil2Con, String lemma_idFirstDB){
|
638 |
|
|
String[] lemmaFirstDB = retrieveLemmaContForId(dalimil1Con, "dalimil1", lemma_idFirstDB); //retrieve lemma data from first DB (from table dd_lemma)
|
639 |
|
|
String lemma = lemmaFirstDB[0];
|
640 |
|
|
String pos = lemmaFirstDB[1];
|
641 |
|
|
|
642 |
|
|
int newLemmaId = -1;
|
643 |
|
|
try {
|
644 |
|
|
Statement secondDBLemmaMaxState = dalimil2Con.createStatement();
|
645 |
|
|
ResultSet secondDBLemmaMaxRes = secondDBLemmaMaxState.executeQuery("SELECT MAX(id) FROM public.dd_lemma;"); //get max id currently in lemma (second DB, Dalimil2) - newly added item has to have retrieved value+1
|
646 |
|
|
while(secondDBLemmaMaxRes.next()){
|
647 |
|
|
int maxId = secondDBLemmaMaxRes.getInt("max"); //get max id currently in lemma (second DB, Dalimil2) - newly added item has to have retrieved value+1
|
648 |
|
|
newLemmaId = maxId + 1; //newly inserted item : current_id + 1 for new id
|
649 |
|
|
}
|
650 |
|
|
|
651 |
|
|
if(newLemmaId == -1){
|
652 |
|
|
System.out.println("err, cannot retrieve lemma id!");
|
653 |
|
|
return newLemmaId;
|
654 |
|
|
}
|
655 |
|
|
//now try to insert data to second DB
|
656 |
|
|
Statement secondDBLemmaInsert = dalimil2Con.createStatement();
|
657 |
|
|
int secondDBLemmaInsertRes = secondDBLemmaInsert.executeUpdate("INSERT INTO public.dd_lemma (id, lemma, pos) VALUES (" + newLemmaId + ",'" + lemma + "'," + pos + ");"); //insert lemma data retrieved from first DB to second DB (table dd_lemma)
|
658 |
|
|
//System.out.print("Inserted lemma with id: " + newLemmaId + ", lemma: " + lemma + ", pos: " + pos + " RES: " + secondDBLemmaInsertRes);
|
659 |
|
|
return newLemmaId;
|
660 |
|
|
} catch (SQLException throwables) {
|
661 |
|
|
System.out.println("err, inserting lemma");
|
662 |
|
|
throwables.printStackTrace();
|
663 |
|
|
return newLemmaId;
|
664 |
|
|
}
|
665 |
|
|
}
|
666 |
|
|
|
667 |
|
|
/**
|
668 |
|
|
* Used for adding tag associated with the word to table dd_tag (from first to second DB).
|
669 |
|
|
* @param dalimil1Con Connection obj, represents connection to dalimil1 DB
|
670 |
|
|
* @param dalimil2Con Connection obj, represents connection to dalimil2 DB
|
671 |
|
|
* @param tag_idFirstDB tag_id of respective item (from first DB, dd_wordform! ie. Dalimil1)
|
672 |
|
|
* @return id of the newly inserted tag
|
673 |
|
|
*/
|
674 |
|
|
private static int addTagToSecDB(Connection dalimil1Con, Connection dalimil2Con, String tag_idFirstDB){
|
675 |
|
|
String tagFirstDB = retrieveTagContForId(dalimil1Con, "dalimil1", tag_idFirstDB); //retrieve tag data from first DB (from table dd_tag)
|
676 |
|
|
|
677 |
|
|
//tags in first DB are shorter than the ones in second DB (sec is 12char length) -> make it match
|
678 |
|
|
while(tagFirstDB.length() < 12){
|
679 |
|
|
tagFirstDB += "-";
|
680 |
|
|
}
|
681 |
|
|
|
682 |
|
|
//try to insert tag data to dd_tag table located in second DB
|
683 |
|
|
int newTagId = -1;
|
684 |
|
|
try{
|
685 |
|
|
Statement secondDBTagMaxState = dalimil2Con.createStatement();
|
686 |
|
|
ResultSet secondDBTagMaxRes = secondDBTagMaxState.executeQuery("SELECT MAX(id) FROM public.dd_tag;"); //get max id currently in tag (second DB, Dalimil2) - newly added item has to have retrieved value+1
|
687 |
|
|
while(secondDBTagMaxRes.next()){
|
688 |
|
|
int maxId = secondDBTagMaxRes.getInt("max"); //get max id currently in tag (second DB, Dalimil2) - newly added item has to have retrieved value+1
|
689 |
|
|
newTagId = maxId + 1; //newly inserted item : current_id + 1 for new id
|
690 |
|
|
}
|
691 |
|
|
|
692 |
|
|
if(newTagId == -1){
|
693 |
|
|
System.out.println("err, cannot retrieve tag id!");
|
694 |
|
|
return newTagId;
|
695 |
|
|
}
|
696 |
|
|
//now try to insert tag data retrieved from first DB to second DB
|
697 |
|
|
Statement secondDBTagInsert = dalimil2Con.createStatement();
|
698 |
|
|
int secondDBTagInsertRes = secondDBTagInsert.executeUpdate("INSERT INTO public.dd_tag (id, tag) VALUES (" + newTagId + ",'" + tagFirstDB + "');");
|
699 |
|
|
//System.out.println("Inserted tag with id: " + newTagId + ", tag: " + tagFirstDB + " RES: " + secondDBTagInsertRes);
|
700 |
|
|
return newTagId;
|
701 |
|
|
}catch (SQLException throwables){
|
702 |
|
|
System.out.println("err, inserting tag");
|
703 |
|
|
throwables.printStackTrace();
|
704 |
|
|
return newTagId;
|
705 |
|
|
}
|
706 |
|
|
}
|
707 |
|
|
|
708 |
|
|
/**
|
709 |
|
|
* Prints conflicting rows.
|
710 |
|
|
* @param dal1 list with dalimil1 content
|
711 |
|
|
* @param dal2 list with dalimil2 content
|
712 |
|
|
*/
|
713 |
|
|
private static void printConflicts(ArrayList<String> dal1, ArrayList<String> dal2){
|
714 |
|
|
if(dal1.size() != dal2.size()){
|
715 |
|
|
System.out.println("Err, size must be same...");
|
716 |
|
|
return;
|
717 |
|
|
}else{
|
718 |
|
|
for(int i = 0; i < dal1.size(); i++){
|
719 |
|
|
System.out.println(dal1.get(i));
|
720 |
|
|
System.out.println(dal2.get(i));
|
721 |
|
|
}
|
722 |
|
|
}
|
723 |
|
|
}
|
724 |
|
|
|
725 |
|
|
/**
|
726 |
|
|
* Retrieve manuscript numbers from table dd_manuscript for specific wordform_id (typically id from table dd_wordform).
|
727 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
728 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
729 |
|
|
* @param wordform_id id from table dd_wordform
|
730 |
|
|
* @return string arr which contains data from dd_manuscript (each index = manuscript assigned to wordform_id)
|
731 |
|
|
*/
|
732 |
|
|
private static ArrayList<String> retrieveManuscriptNumsForId(Connection dalimilXCon, String whichDB, String wordform_id){
|
733 |
|
|
ArrayList<String> manuscriptNumsList = new ArrayList<>();
|
734 |
|
|
|
735 |
|
|
try {
|
736 |
|
|
Statement statement = dalimilXCon.createStatement();
|
737 |
|
|
ResultSet results = statement.executeQuery("SELECT * FROM public.dd_manuscript WHERE wordform_id=" + wordform_id + ";");
|
738 |
|
|
while(results.next()){
|
739 |
|
|
String manuscript = results.getString("manuscript");
|
740 |
|
|
manuscriptNumsList.add(manuscript);
|
741 |
|
|
}
|
742 |
|
|
|
743 |
|
|
//System.out.println("OK, " + whichDB + " - retrieving data from dd_manuscript.");
|
744 |
|
|
} catch (SQLException throwables) {
|
745 |
|
|
System.out.println("ERR, " + whichDB + " - retrieving data from dd_manuscript.");
|
746 |
|
|
throwables.printStackTrace();
|
747 |
|
|
}
|
748 |
|
|
|
749 |
|
|
return manuscriptNumsList;
|
750 |
|
|
}
|
751 |
|
|
|
752 |
|
|
/**
|
753 |
|
|
* Retrieve content from table dd_lemma for specific id (typically lemma_id from table dd_wordform).
|
754 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
755 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
756 |
|
|
* @param id lemma_id from table dd_wordform
|
757 |
|
|
* @return String arr which contains data from dd_lemma (index 0 = lemma, index 1 = pos)
|
758 |
|
|
*/
|
759 |
|
|
private static String[] retrieveLemmaContForId(Connection dalimilXCon, String whichDB, String id){
|
760 |
|
|
String[] lemmaCont = new String[2];
|
761 |
|
|
|
762 |
|
|
try {
|
763 |
|
|
Statement statement = dalimilXCon.createStatement();
|
764 |
|
|
ResultSet results = statement.executeQuery("SELECT * FROM public.dd_lemma WHERE id=" + id + ";");
|
765 |
|
|
while(results.next()){
|
766 |
|
|
String lemma = results.getString("lemma");
|
767 |
|
|
String pos = results.getString("pos");
|
768 |
|
|
|
769 |
|
|
lemmaCont[0] = lemma;
|
770 |
|
|
lemmaCont[1] = pos;
|
771 |
|
|
}
|
772 |
|
|
} catch (SQLException throwables) {
|
773 |
|
|
System.out.println("ERR, " + whichDB + " - retrieving data from dd_lemma.");
|
774 |
|
|
throwables.printStackTrace();
|
775 |
|
|
}
|
776 |
|
|
|
777 |
|
|
return lemmaCont;
|
778 |
|
|
}
|
779 |
|
|
|
780 |
|
|
/**
|
781 |
|
|
* Retrieve tag from table dd_lemma for specific id (typically tag_id from table dd_wordform).
|
782 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
783 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
784 |
|
|
* @param id tag_id from table dd_wordform
|
785 |
|
|
* @return
|
786 |
|
|
*/
|
787 |
|
|
private static String retrieveTagContForId(Connection dalimilXCon, String whichDB, String id){
|
788 |
|
|
String tag = null;
|
789 |
|
|
|
790 |
|
|
try {
|
791 |
|
|
Statement statement = dalimilXCon.createStatement();
|
792 |
|
|
ResultSet results = statement.executeQuery("SELECT * FROM public.dd_tag WHERE id=" + id + ";");
|
793 |
|
|
while(results.next()){
|
794 |
|
|
tag = results.getString("tag");
|
795 |
|
|
}
|
796 |
|
|
} catch (SQLException throwables) {
|
797 |
|
|
System.out.println("ERR, " + whichDB + " - retrieving data from dd_tag.");
|
798 |
|
|
throwables.printStackTrace();
|
799 |
|
|
}
|
800 |
|
|
|
801 |
|
|
return tag;
|
802 |
|
|
}
|
803 |
|
|
|
804 |
|
|
/**
|
805 |
|
|
* Get data from table dd_wordform (from dalimilX DB)
|
806 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
807 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
808 |
|
|
* @return ResultSet, which represents data retrieved from dd_wordform table
|
809 |
|
|
*/
|
810 |
|
|
private static ResultSet retrieveWordformDalimilX(Connection dalimilXCon, String whichDB){
|
811 |
|
|
ResultSet results = null;
|
812 |
|
|
|
813 |
|
|
try {
|
814 |
|
|
Statement statement = dalimilXCon.createStatement();
|
815 |
|
|
results = statement.executeQuery("SELECT * FROM public.dd_wordform;");
|
816 |
|
|
//System.out.println("OK, " + whichDB + " - retrieving data from dd_wordform.");
|
817 |
|
|
} catch (SQLException throwables) {
|
818 |
|
|
System.out.println("ERR, " + whichDB + " - retrieving data from dd_wordform.");
|
819 |
|
|
throwables.printStackTrace();
|
820 |
|
|
}
|
821 |
|
|
|
822 |
|
|
return results;
|
823 |
|
|
}
|
824 |
|
|
|
825 |
|
|
/**
|
826 |
|
|
* Used for retrieving wordform with given id from DB (table dd_wordform).
|
827 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
828 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
829 |
|
|
* @param id id of the entry in table dd_wordform which should be retrieved
|
830 |
|
|
* @return ResultSet contains data about object with the specified id
|
831 |
|
|
*/
|
832 |
|
|
private static ResultSet retrieveWordformIdDalimilX(Connection dalimilXCon, String whichDB, String id){
|
833 |
|
|
ResultSet res = null;
|
834 |
|
|
|
835 |
|
|
try{
|
836 |
|
|
Statement statement = dalimilXCon.createStatement();
|
837 |
|
|
res = statement.executeQuery("SELECT * FROM public.dd_wordform WHERE id=" + id + ";");
|
838 |
|
|
} catch (SQLException throwables) {
|
839 |
|
|
System.out.println("ERR, " + whichDB + " - retrieving data from dd_wordform.");
|
840 |
|
|
throwables.printStackTrace();
|
841 |
|
|
}
|
842 |
|
|
|
843 |
|
|
return res;
|
844 |
|
|
}
|
845 |
|
|
|
846 |
|
|
|
847 |
|
|
|
848 |
|
|
/**
|
849 |
|
|
* Establish connection to dalimilX DB.
|
850 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
851 |
|
|
* @return Connection obj, represents connection to dalimilX DB
|
852 |
|
|
*/
|
853 |
|
|
private static Connection establishDalimilX(String whichDB){
|
854 |
|
|
String jdbcDal1 = "jdbc:postgresql://localhost:5432/" + whichDB;
|
855 |
|
|
String username = "postgres";
|
856 |
efca2344
|
Ondrej Drtina
|
String password = "YOUR_ADMIN_PASS"; //obviously - replace this with your admin password
|
857 |
a4c65df6
|
Ondrej Drtina
|
|
858 |
|
|
Connection connection = null;
|
859 |
|
|
|
860 |
|
|
try {
|
861 |
|
|
connection = DriverManager.getConnection(jdbcDal1, username, password);
|
862 |
|
|
//System.out.println("OK, " + whichDB + " - connect to PostgreSQL server.");
|
863 |
|
|
} catch (SQLException throwables) {
|
864 |
|
|
System.out.println("ERR, " + whichDB + " - connect to PostgreSQL server.");
|
865 |
|
|
throwables.printStackTrace();
|
866 |
|
|
}
|
867 |
|
|
|
868 |
|
|
return connection;
|
869 |
|
|
}
|
870 |
|
|
|
871 |
|
|
/**
|
872 |
|
|
* Close connection to dalimilX DB.
|
873 |
|
|
* @param dalimilXCon Connection obj which represents connection to DB
|
874 |
|
|
* @param whichDB name of DB (dalimil1 or dalimil2)
|
875 |
|
|
* @return true if closed successfully, else false
|
876 |
|
|
*/
|
877 |
|
|
private static boolean closeDalimilX(Connection dalimilXCon, String whichDB){
|
878 |
|
|
try {
|
879 |
|
|
dalimilXCon.close();
|
880 |
|
|
//System.out.println("OK, " + whichDB + " - disconnect from PostgreSQL server.");
|
881 |
|
|
return true;
|
882 |
|
|
} catch (SQLException throwables) {
|
883 |
|
|
throwables.printStackTrace();
|
884 |
|
|
System.out.println("ERR, " + whichDB + " - disconnect from PostgreSQL server.");
|
885 |
|
|
return false;
|
886 |
|
|
}
|
887 |
|
|
}
|
888 |
|
|
}
|