Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 0d303b8f

Přidáno uživatelem Jakub Šmíd před asi 2 roky(ů)

Added two external catalog items sources

re #9752

Zobrazit rozdíly:

backend/src/main/java/cz/zcu/kiv/backendapi/external/ExternalCatalogItemItemServiceImpl.java
9 9
import org.apache.tomcat.util.http.fileupload.FileUtils;
10 10
import org.springframework.stereotype.Service;
11 11
import org.springframework.transaction.annotation.Transactional;
12
import org.springframework.web.multipart.MultipartFile;
12 13

  
13 14
import javax.persistence.Table;
14 15
import java.io.*;
......
40 41
     * Buffer size
41 42
     */
42 43
    public static final int BUFFER_SIZE = 1024;
44

  
43 45
    /**
44 46
     * Directory where files for external directory will be stored
45 47
     */
46 48
    private static final String DIRECTORY_FOR_EXTERNAL_FILES = "sources";
49

  
47 50
    /**
48 51
     * URL for Pleiades file
49 52
     */
50 53
    private static final String PLEIADES_FILE_URL = "https://atlantides.org/downloads/pleiades/dumps/pleiades-names-latest.csv.gz";
54

  
51 55
    /**
52 56
     * Name of Pleiades file
53 57
     */
54 58
    private static final String PLEIADES_FILE_NAME = "pleiades-names-latest.csv";
59

  
55 60
    /**
56 61
     * URL for Pleiades file – needs to be formatted (more sources)
57 62
     */
58 63
    private static final String GEONAMES_FILE_URL = "https://download.geonames.org/export/dump/%s.zip";
64

  
59 65
    /**
60 66
     * Name of GeoNames file – needs to be formatted (more sources)
61 67
     */
62 68
    private static final String GEONAMES_FILE_NAME = "%s.txt";
69

  
63 70
    /**
64
     * URL for CIGS file
71
     * Name of CIGS file
65 72
     */
66
    private static final String CIGS_FILE_URL = "https://zenodo.org/record/5642899/files/CIGS_v1_4_20211101.csv";
73
    private static final String CIGS_FILE_NAME = "CIGS.csv";
74

  
67 75
    /**
68
     * Name of CIGS file
76
     * Name of ANE file
69 77
     */
70
    private static final String CIGS_FILE_NAME = "CIGS_v1_4_20211101.csv";
78
    private static final String ANE_FILE = "ANE.csv";
79

  
71 80
    /**
72 81
     * Batch size for saving items
73 82
     */
......
110 119

  
111 120

  
112 121
    @Override
113
    public void updateCatalog() {
122
    public void updateCatalog(MultipartFile file) {
114 123
        log.info("Updating external catalog");
124

  
115 125
        try {
116 126
            Files.createDirectories(Paths.get(DIRECTORY_FOR_EXTERNAL_FILES)); // creates directory if not exists
117 127
            FileUtils.cleanDirectory(new File(DIRECTORY_FOR_EXTERNAL_FILES)); // cleans the directory
118
            externalCatalogItemRepository.deleteAll(); // clears database – updated list will be stored later
119
            addPleiadesSource();
120
            addGeonamesSources();
121
            addCigsSources();
122 128
        } catch (Exception e) {
123 129
            e.printStackTrace();
124 130
        }
131

  
132
        externalCatalogItemRepository.deleteAll(); // clears database – updated list will be stored later
133

  
134
        try {
135
            addPleiadesRecords();
136
        } catch (Exception e) {
137
            e.printStackTrace();
138
        }
139

  
140
        try {
141
            addGeonamesRecords();
142
        } catch (Exception e) {
143
            e.printStackTrace();
144
        }
145

  
146
        try {
147
            if (file.getOriginalFilename() != null) {
148
                File cigsFile = new File(new File(DIRECTORY_FOR_EXTERNAL_FILES), file.getOriginalFilename());
149
                Files.copy(file.getInputStream(), cigsFile.toPath());
150
                addCigsRecords(cigsFile);
151
            }
152
        } catch (Exception e) {
153
            e.printStackTrace();
154
        }
155

  
156
        try {
157
            addAneRecords();
158
        } catch (Exception e) {
159
            e.printStackTrace();
160
        }
161

  
125 162
        log.info("External catalog updated");
126 163
    }
127 164

  
......
134 171
    }
135 172

  
136 173
    /**
137
     * Downloads, extracts and reads Pleiades sources and saves them to database
174
     * Downloads, extracts and reads records from Pleiades and saves them to database
138 175
     */
139
    private void addPleiadesSource() {
176
    private void addPleiadesRecords() {
140 177
        List<ExternalCatalogItem> externalCatalogItems = new ArrayList<>();
141 178
        byte[] buffer = new byte[BUFFER_SIZE];
142 179
        File pleiadesFile = new File(new File(DIRECTORY_FOR_EXTERNAL_FILES), PLEIADES_FILE_NAME);
......
170 207
            ex.printStackTrace();
171 208
        }
172 209
        saveAllWithThreads(externalCatalogItems);
210

  
211
        log.info("Records from Pleiades added");
173 212
    }
174 213

  
175 214
    /**
176
     * Downloads, extracts and reads GeoNames sources and saves them to database
215
     * Downloads, extracts and reads records from GeoNames and saves them to database
177 216
     */
178
    private void addGeonamesSources() {
217
    private void addGeonamesRecords() {
179 218
        byte[] buffer = new byte[BUFFER_SIZE];
180 219
        for (String countryCode : ExternalCatalogItem.COUNTRY_CODES.keySet()) {
181 220
            List<ExternalCatalogItem> externalCatalogItems = new ArrayList<>();
......
211 250
            }
212 251
            saveAllWithThreads(externalCatalogItems);
213 252
        }
253

  
254
        log.info("Records from GeoNames added");
214 255
    }
215 256

  
216 257
    /**
217
     * Downloads and reads CIGS sources and saves them to database
258
     * Saves records from ANE to database
218 259
     */
219
    private void addCigsSources() {
260
    private void addAneRecords() {
220 261
        List<ExternalCatalogItem> externalCatalogItems = new ArrayList<>();
221
        byte[] buffer = new byte[BUFFER_SIZE];
222
        File cigsFile = new File(new File(DIRECTORY_FOR_EXTERNAL_FILES), CIGS_FILE_NAME);
223

  
224
        try (InputStream inputStream = new URL(CIGS_FILE_URL).openStream();
225
             FileOutputStream fileOutputStream = new FileOutputStream(cigsFile)) {
226
            int bytes_read;
227

  
228
            while ((bytes_read = inputStream.read(buffer)) > 0) {
229

  
230
                fileOutputStream.write(buffer, 0, bytes_read);
262
        // Reads file and adds catalog items to list
263
        try (InputStream csvData = getClass().getClassLoader().getResourceAsStream(ANE_FILE)) {
264
            if (csvData == null) {
265
                return;
266
            }
267
            CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8, CSVFormat.Builder.create(CSVFormat.DEFAULT)
268
                    .setHeader()
269
                    .setSkipHeaderRecord(true)
270
                    .build());
271
            for (CSVRecord csvRecord : parser) {
272
                ExternalCatalogItem e = new ExternalCatalogItem(csvRecord.toList(), ExternalSource.ANE);
273
                externalCatalogItems.add(e);
231 274
            }
232 275
        } catch (IOException e) {
233 276
            e.printStackTrace();
234 277
        }
278
        saveAllWithThreads(externalCatalogItems);
279

  
280
        log.info("Records from ANE added");
281
    }
235 282

  
236
        log.info("The CIGS file was downloaded successfully");
283

  
284
    /**
285
     * Reads records from CIGS and saves them to database
286
     *
287
     * @param cigsFile CIGS file
288
     */
289
    private void addCigsRecords(File cigsFile) {
290
        List<ExternalCatalogItem> externalCatalogItems = new ArrayList<>();
237 291

  
238 292
        try (InputStream csvData = new FileInputStream(cigsFile)) {
239 293
            CSVParser parser = CSVParser.parse(csvData, StandardCharsets.UTF_8, CSVFormat.Builder.create(CSVFormat.DEFAULT)
......
248 302
            e.printStackTrace();
249 303
        }
250 304
        saveAllWithThreads(externalCatalogItems);
305

  
306
        log.info("Records from CIGS added");
251 307
    }
252 308

  
253 309
    /**

Také k dispozici: Unified diff