You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@baremaps.apache.org by bc...@apache.org on 2023/01/06 14:33:53 UTC
[incubator-baremaps] branch main updated: Simplify the design of the geocoder (#563)
This is an automated email from the ASF dual-hosted git repository.
bchapuis pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-baremaps.git
The following commit(s) were added to refs/heads/main by this push:
new 48b1ce7a Simplify the design of the geocoder (#563)
48b1ce7a is described below
commit 48b1ce7a85a1701c37433a3bbac5044244261788
Author: Bertil Chapuis <bc...@gmail.com>
AuthorDate: Fri Jan 6 15:33:48 2023 +0100
Simplify the design of the geocoder (#563)
* Simplify the design of the geocoder
* Close the resources
* Revert some changes
* Format with spotless
---
.run/geocoder-create.run.xml | 2 +-
.run/geocoder-serve.run.xml | 2 +-
.run/iploc-create.run.xml | 2 +-
.run/iploc-serve.run.xml | 2 +-
.../org/apache/baremaps/cli/geocoder/Search.java | 46 +++++--
.../org/apache/baremaps/cli/geocoder/Serve.java | 42 +++---
baremaps-core/pom.xml | 10 +-
.../org/apache/baremaps/geocoder/Geocoder.java | 126 ------------------
.../Response.java => GeocoderConstants.java} | 16 ++-
.../baremaps/geocoder/GeonamesDocumentMapper.java | 53 ++++++++
.../baremaps/geocoder/GeonamesQueryBuilder.java | 78 +++++++++++
.../apache/baremaps/geocoder/GeonamesReader.java | 69 ++++++++++
.../geocoder/{geonames => }/GeonamesRecord.java | 8 +-
.../geocoder/{utils => }/IsoCountriesUtils.java | 13 +-
.../geocoder/geonames/GeonamesGeocoder.java | 118 -----------------
.../apache/baremaps/geocoder/request/Request.java | 53 --------
.../apache/baremaps/geocoder/response/Data.java | 90 -------------
.../apache/baremaps/geocoder/response/Result.java | 16 ---
.../main/java/org/apache/baremaps/iploc/IpLoc.java | 65 ++++++----
.../database/InetnumLocationDaoSqliteImpl.java | 4 -
.../apache/baremaps/openstreetmap/OsmReader.java | 3 +-
.../function/OpenstreetmapDocumentMapper.java | 71 +++++++++++
.../workflow/tasks/CreateGeonamesIndex.java | 47 +++++--
.../baremaps/workflow/tasks/CreateIplocIndex.java | 103 +++++++--------
.../org/apache/baremaps/geocoder/GeocoderTest.java | 79 ------------
.../baremaps/geocoder/GeonamesReaderTest.java | 40 ++++++
.../geocoder/geonames/GeonamesGeocoderTest.java | 73 -----------
.../java/org/apache/baremaps/iploc/IpLocTest.java | 21 +--
.../apache/baremaps/server/GeocoderResources.java | 68 +++++++---
.../src/main/resources/geocoder/index.html | 142 +++++++--------------
examples/geocoding/workflow.js | 4 +-
examples/ip-to-location/workflow.js | 73 ++++++++---
pom.xml | 10 +-
33 files changed, 691 insertions(+), 858 deletions(-)
diff --git a/.run/geocoder-create.run.xml b/.run/geocoder-create.run.xml
index 020a7e9c..0caed5b1 100644
--- a/.run/geocoder-create.run.xml
+++ b/.run/geocoder-create.run.xml
@@ -8,4 +8,4 @@
<option name="Make" enabled="true" />
</method>
</configuration>
-</component>
\ No newline at end of file
+</component>
diff --git a/.run/geocoder-serve.run.xml b/.run/geocoder-serve.run.xml
index 80b59f48..2577524e 100644
--- a/.run/geocoder-serve.run.xml
+++ b/.run/geocoder-serve.run.xml
@@ -10,4 +10,4 @@
<option name="Make" enabled="true" />
</method>
</configuration>
-</component>
\ No newline at end of file
+</component>
diff --git a/.run/iploc-create.run.xml b/.run/iploc-create.run.xml
index 1d11e5f7..456d9ff7 100644
--- a/.run/iploc-create.run.xml
+++ b/.run/iploc-create.run.xml
@@ -8,4 +8,4 @@
<option name="Make" enabled="true" />
</method>
</configuration>
-</component>
\ No newline at end of file
+</component>
diff --git a/.run/iploc-serve.run.xml b/.run/iploc-serve.run.xml
index 0e475ae5..f012f450 100644
--- a/.run/iploc-serve.run.xml
+++ b/.run/iploc-serve.run.xml
@@ -8,4 +8,4 @@
<option name="Make" enabled="true" />
</method>
</configuration>
-</component>
\ No newline at end of file
+</component>
diff --git a/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Search.java b/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Search.java
index 29024ee4..16fae523 100644
--- a/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Search.java
+++ b/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Search.java
@@ -16,8 +16,9 @@ package org.apache.baremaps.cli.geocoder;
import java.nio.file.Path;
import java.util.concurrent.Callable;
-import org.apache.baremaps.geocoder.geonames.GeonamesGeocoder;
-import org.apache.baremaps.geocoder.request.Request;
+import org.apache.baremaps.geocoder.GeonamesQueryBuilder;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.MMapDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine.Command;
@@ -28,21 +29,44 @@ public class Search implements Callable<Integer> {
private static final Logger logger = LoggerFactory.getLogger(Search.class);
- @Option(names = {"--index"}, paramLabel = "INDEX", description = "The path to the lucene index.",
+ @Option(
+ names = {"--index"}, paramLabel = "INDEX", description = "The path to the lucene index.",
required = true)
- private Path indexPath;
+ private Path indexDirectory;
- @Option(names = {"--search"}, paramLabel = "SEARCH",
+ @Option(
+ names = {"--terms"}, paramLabel = "terms",
description = "The terms to search in the index.", required = true)
- private String search;
+ private String terms;
+
+ @Option(
+ names = {"--country"}, paramLabel = "COUNTRY", description = "The country code filter.",
+ required = false)
+ private String countryCode = "";
+
+ @Option(
+ names = {"--limit"}, paramLabel = "LIMIT",
+ description = "The number of result to return.", required = false)
+ private Integer limit = 10;
@Override
public Integer call() throws Exception {
- var geocoder = new GeonamesGeocoder(indexPath, null);
- geocoder.open();
- var request = new Request(search, 20);
- var response = geocoder.search(request);
- logger.info("{}", response);
+ try (
+ var directory = MMapDirectory.open(indexDirectory);
+ var searcherManager = new SearcherManager(directory, new SearcherFactory())) {
+ var query = new GeonamesQueryBuilder().queryText(terms).countryCode(countryCode).build();
+ var searcher = searcherManager.acquire();
+ try {
+ var result = searcher.search(query, limit);
+ for (var hit : result.scoreDocs) {
+ var document = searcher.doc(hit.doc);
+ logger.info("{}", document);
+ }
+ } finally {
+ searcherManager.release(searcher);
+ }
+ }
+
return 0;
}
}
diff --git a/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Serve.java b/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Serve.java
index 097cdb71..6a2af117 100644
--- a/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Serve.java
+++ b/baremaps-cli/src/main/java/org/apache/baremaps/cli/geocoder/Serve.java
@@ -18,10 +18,11 @@ import io.servicetalk.http.netty.HttpServers;
import io.servicetalk.http.router.jersey.HttpJerseyRouterBuilder;
import java.nio.file.Path;
import java.util.concurrent.Callable;
-import org.apache.baremaps.geocoder.Geocoder;
-import org.apache.baremaps.geocoder.geonames.GeonamesGeocoder;
import org.apache.baremaps.server.CorsFilter;
import org.apache.baremaps.server.GeocoderResources;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.MMapDirectory;
import org.glassfish.hk2.utilities.binding.AbstractBinder;
import org.glassfish.jersey.server.ResourceConfig;
import org.slf4j.Logger;
@@ -34,9 +35,10 @@ public class Serve implements Callable<Integer> {
private static final Logger logger = LoggerFactory.getLogger(Serve.class);
- @Option(names = {"--index"}, paramLabel = "INDEX", description = "The path to the lucene index.",
+ @Option(
+ names = {"--index"}, paramLabel = "INDEX", description = "The path to the lucene index.",
required = true)
- private Path indexPath;
+ private Path indexDirectory;
@Option(names = {"--host"}, paramLabel = "HOST", description = "The host of the server.")
private String host = "localhost";
@@ -46,25 +48,25 @@ public class Serve implements Callable<Integer> {
@Override
public Integer call() throws Exception {
+ try (
+ var directory = MMapDirectory.open(indexDirectory);
+ var searcherManager = new SearcherManager(directory, new SearcherFactory())) {
+ // Configure the application
+ var application = new ResourceConfig().register(CorsFilter.class)
+ .register(GeocoderResources.class).register(new AbstractBinder() {
+ @Override
+ protected void configure() {
+ bind(searcherManager).to(SearcherManager.class).named("searcherManager");
+ }
+ });
- var geocoder = new GeonamesGeocoder(indexPath, null);
- geocoder.open();
+ var httpService = new HttpJerseyRouterBuilder().buildBlockingStreaming(application);
+ var serverContext = HttpServers.forPort(port).listenBlockingStreamingAndAwait(httpService);
- // Configure the application
- var application = new ResourceConfig().register(CorsFilter.class)
- .register(GeocoderResources.class).register(new AbstractBinder() {
- @Override
- protected void configure() {
- bind(geocoder).to(Geocoder.class).named("geocoder");
- }
- });
+ logger.info("Listening on {}", serverContext.listenAddress());
+ serverContext.awaitShutdown();
+ }
- var httpService = new HttpJerseyRouterBuilder().buildBlockingStreaming(application);
- var serverContext = HttpServers.forPort(port).listenBlockingStreamingAndAwait(httpService);
-
- logger.info("Listening on {}", serverContext.listenAddress());
-
- serverContext.awaitShutdown();
return 0;
}
}
diff --git a/baremaps-core/pom.xml b/baremaps-core/pom.xml
index c5cc7fb5..6a103615 100644
--- a/baremaps-core/pom.xml
+++ b/baremaps-core/pom.xml
@@ -78,10 +78,6 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
</dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-common</artifactId>
- </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
@@ -92,11 +88,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-replicator</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-spatial</artifactId>
+ <artifactId>lucene-spatial-extras</artifactId>
</dependency>
<dependency>
<groupId>org.apache.sis.core</groupId>
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/Geocoder.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/Geocoder.java
deleted file mode 100644
index 2f1fb198..00000000
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/Geocoder.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder;
-
-
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Stream;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.baremaps.geocoder.response.Data;
-import org.apache.baremaps.geocoder.response.Response;
-import org.apache.baremaps.geocoder.response.Result;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.SearcherFactory;
-import org.apache.lucene.search.SearcherManager;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MMapDirectory;
-
-public abstract class Geocoder implements AutoCloseable {
-
- private final Directory directory;
- private SearcherManager searcherManager;
- private Analyzer analyzer = analyzer();
-
- public Geocoder(Path index) throws IOException {
- this.directory = MMapDirectory.open(index);
- }
-
- public boolean indexExists() throws IOException {
- return DirectoryReader.indexExists(directory);
- }
-
- public void open() throws IOException {
- if (!DirectoryReader.indexExists(directory)) {
- throw new IllegalStateException("Invalid Lucene index directory");
- }
- searcherManager = new SearcherManager(directory, new SearcherFactory());
- }
-
- public void build() throws IOException {
- build(documents()::iterator);
- }
-
- private void build(Iterable<Document> documents) throws IOException {
- IndexWriterConfig config = new IndexWriterConfig(analyzer);
- try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
- indexWriter.deleteAll();
- indexWriter.addDocuments(documents);
- } catch (IOException exception) {
- throw new RuntimeException();
- }
- searcherManager = new SearcherManager(directory, new SearcherFactory());
- }
-
- public Response search(Request request) throws IOException, ParseException {
- IndexSearcher searcher = searcherManager.acquire();
- List<Result> results = new ArrayList<>();
- try {
- TopDocs topDocs = searcher.search(query(analyzer, request), request.limit());
- for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
- Document document = searcher.doc(scoreDoc.doc);
- Data data = new Data(document.get("name"), document.get("asciiname"),
- document.get("alternatenames"),
- document.getField("latitude") != null
- ? document.getField("latitude").numericValue().doubleValue()
- : null,
- document.getField("longitude") != null
- ? document.getField("longitude").numericValue().doubleValue()
- : null,
- document.get("featureClass"), document.get("featureCode"), document.get("countryCode"),
- document.get("cc2"), document.get("admin1Code"), document.get("admin2Code"),
- document.get("admin3Code"), document.get("admin4Code"),
- document.getField("population") != null
- ? document.getField("population").numericValue().longValue()
- : null,
- document.getField("elevation") != null
- ? document.getField("elevation").numericValue().intValue()
- : null,
- document.getField("dem") != null ? document.getField("dem").numericValue().intValue()
- : null,
- document.get("timezone"), document.get("modificationDate"));
- results.add(new Result(scoreDoc.score, data));
- }
- return new Response(results);
- } finally {
- searcherManager.release(searcher);
- }
- }
-
- @Override
- public void close() throws IOException {
- analyzer.close();
- directory.close();
- if (searcherManager != null) {
- searcherManager.close();
- }
- }
-
- protected abstract Analyzer analyzer() throws IOException;
-
- protected abstract Stream<Document> documents() throws IOException;
-
- protected abstract Query query(Analyzer analyzer, Request request) throws ParseException;
-}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Response.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeocoderConstants.java
similarity index 65%
rename from baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Response.java
rename to baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeocoderConstants.java
index e1933dcb..395bd4bd 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Response.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeocoderConstants.java
@@ -10,8 +10,18 @@
* the License.
*/
-package org.apache.baremaps.geocoder.response;
+package org.apache.baremaps.geocoder;
-import java.util.List;
-public record Response(List<Result> results) {}
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+/**
+ * Constants used by the geocoder.
+ */
+public class GeocoderConstants {
+
+ public static final Analyzer ANALYZER = new StandardAnalyzer();
+
+}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
new file mode 100644
index 00000000..d9765ef8
--- /dev/null
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesDocumentMapper.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+
+
+import java.util.function.Function;
+import org.apache.lucene.document.*;
+
+/**
+ * Maps a {@link GeonamesRecord} to a Lucene {@link Document}.
+ */
+public class GeonamesDocumentMapper implements Function<GeonamesRecord, Document> {
+
+ @Override
+ public Document apply(GeonamesRecord record) {
+ Document document = new Document();
+ document.add(new TextField("name", record.getName(), Field.Store.YES));
+ document.add(new TextField("country", IsoCountriesUtils.getCountry(record.getCountryCode()),
+ Field.Store.YES));
+ document.add(new StringField("countryCode", record.getCountryCode(), Field.Store.YES));
+ document.add(new LatLonPoint("point", record.getLatitude(), record.getLongitude()));
+ document.add(new StoredField("longitude", record.getLongitude()));
+ document.add(new StoredField("latitude", record.getLatitude()));
+ document.add(new StoredField("asciiname", record.getAsciiname()));
+ document.add(new StoredField("alternatenames", record.getAlternatenames()));
+ document.add(new StoredField("featureClass", record.getFeatureClass()));
+ document.add(new StoredField("featureCode", record.getFeatureCode()));
+ document.add(new StoredField("cc2", record.getCc2()));
+ document.add(new StoredField("admin1Code", record.getAdmin1Code()));
+ document.add(new StoredField("admin2Code", record.getAdmin2Code()));
+ document.add(new StoredField("admin3Code", record.getAdmin3Code()));
+ document.add(new StoredField("admin4Code", record.getAdmin4Code()));
+ document.add(new StoredField("population", record.getPopulation()));
+ if (record.getElevation() != null) {
+ document.add(new StoredField("elevation", record.getElevation()));
+ }
+ document.add(new StoredField("dem", record.getDem()));
+ document.add(new StoredField("timezone", record.getTimezone()));
+ document.add(new StoredField("modificationDate", record.getModificationDate()));
+ return document;
+ }
+}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
new file mode 100644
index 00000000..57838f80
--- /dev/null
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesQueryBuilder.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+
+
+import java.util.Map;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.simple.SimpleQueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/**
+ * Utility class to build a Lucene {@link Query}.
+ */
+public class GeonamesQueryBuilder {
+
+ private final Analyzer analyzer;
+
+ private String queryText;
+
+ private String countryCode = "";
+
+ public GeonamesQueryBuilder() {
+ this(GeocoderConstants.ANALYZER);
+ }
+
+ public GeonamesQueryBuilder(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ public GeonamesQueryBuilder queryText(String queryText) {
+ this.queryText = queryText;
+ return this;
+ }
+
+ public GeonamesQueryBuilder countryCode(String countryCode) {
+ this.countryCode = countryCode;
+ return this;
+ }
+
+ public Query build() {
+ var builder = new BooleanQuery.Builder();
+
+ if (queryText != null) {
+ var queryTextEsc = QueryParser.escape(queryText);
+ if (!queryTextEsc.isBlank()) {
+ var fieldWeights = Map.of("name", 1f, "country", 1f);
+ var termsQuery = new SimpleQueryParser(analyzer, fieldWeights).parse(queryTextEsc);
+ builder.add(termsQuery, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ if (countryCode != null) {
+ var countryCodeEsc = QueryParser.escape(countryCode);
+ if (!countryCodeEsc.isBlank()) {
+ var countryCodeQuery = new TermQuery(new Term("countryCode", countryCodeEsc));
+ builder.add(countryCodeQuery, BooleanClause.Occur.MUST);
+ }
+ }
+
+ return builder.build();
+ }
+}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java
new file mode 100644
index 00000000..9c25f172
--- /dev/null
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesReader.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+
+
+import com.fasterxml.jackson.databind.MappingIterator;
+import com.fasterxml.jackson.dataformat.csv.CsvMapper;
+import com.fasterxml.jackson.dataformat.csv.CsvSchema;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+import org.apache.baremaps.openstreetmap.OsmReader;
+
+/**
+ * A reader for the Geonames database.
+ */
+public class GeonamesReader implements OsmReader<GeonamesRecord> {
+
+ @Override
+ public Stream<GeonamesRecord> stream(InputStream inputStream) throws IOException {
+ CsvMapper mapper = new CsvMapper();
+
+ CsvSchema schema = CsvSchema.builder()
+ .addColumn("geonameid")
+ .addColumn("name")
+ .addColumn("asciiname")
+ .addColumn("alternatenames")
+ .addColumn("latitude")
+ .addColumn("longitude")
+ .addColumn("featureClass")
+ .addColumn("featureCode")
+ .addColumn("countryCode")
+ .addColumn("cc2")
+ .addColumn("admin1Code")
+ .addColumn("admin2Code")
+ .addColumn("admin3Code")
+ .addColumn("admin4Code")
+ .addColumn("population")
+ .addColumn("elevation")
+ .addColumn("dem")
+ .addColumn("timezone")
+ .addColumn("modificationDate")
+ .build()
+ .withColumnSeparator('\t')
+ .withoutQuoteChar();
+
+ MappingIterator<GeonamesRecord> recordIterator = mapper.readerFor(GeonamesRecord.class)
+ .with(schema).readValues(new InputStreamReader(inputStream));
+ Spliterator<GeonamesRecord> recordSpliterator =
+ Spliterators.spliteratorUnknownSize(recordIterator, 0);
+
+ return StreamSupport.stream(recordSpliterator, false);
+ }
+}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesRecord.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesRecord.java
similarity index 97%
rename from baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesRecord.java
rename to baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesRecord.java
index 75a964f8..fd64b004 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesRecord.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/GeonamesRecord.java
@@ -10,10 +10,12 @@
* the License.
*/
-package org.apache.baremaps.geocoder.geonames;
+package org.apache.baremaps.geocoder;
-/** Structured of a Geonames record. */
-class GeonamesRecord {
+/**
+ * A record from the Geonames database.
+ */
+public class GeonamesRecord {
// integer id of record in geonames database
private Integer geonameid;
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/utils/IsoCountriesUtils.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/IsoCountriesUtils.java
similarity index 74%
rename from baremaps-core/src/main/java/org/apache/baremaps/geocoder/utils/IsoCountriesUtils.java
rename to baremaps-core/src/main/java/org/apache/baremaps/geocoder/IsoCountriesUtils.java
index 6b269de7..deec622f 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/utils/IsoCountriesUtils.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/IsoCountriesUtils.java
@@ -10,7 +10,7 @@
* the License.
*/
-package org.apache.baremaps.geocoder.utils;
+package org.apache.baremaps.geocoder;
@@ -18,24 +18,27 @@ import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+/**
+ * Utility class to deal with country codes.
+ */
public class IsoCountriesUtils {
- private static Map<String, String> isoCountries = new HashMap<>();
+ private static final Map<String, String> ISO_COUNTRIES = new HashMap<>();
static {
for (String iso : Locale.getISOCountries()) {
Locale l = new Locale("", iso);
- isoCountries.put(iso, l.getDisplayCountry());
+ ISO_COUNTRIES.put(iso, l.getDisplayCountry());
}
}
private IsoCountriesUtils() {}
public static String getCountry(String iso) {
- return isoCountries.getOrDefault(iso, "");
+ return ISO_COUNTRIES.getOrDefault(iso, "");
}
public static boolean containsCountry(String iso) {
- return isoCountries.containsKey(iso);
+ return ISO_COUNTRIES.containsKey(iso);
}
}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoder.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoder.java
deleted file mode 100644
index 7ffdf48b..00000000
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoder.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder.geonames;
-
-
-
-import com.fasterxml.jackson.databind.MappingIterator;
-import com.fasterxml.jackson.dataformat.csv.CsvMapper;
-import com.fasterxml.jackson.dataformat.csv.CsvSchema;
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.Spliterators;
-import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
-import org.apache.baremaps.geocoder.Geocoder;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.baremaps.geocoder.utils.IsoCountriesUtils;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.queryparser.simple.SimpleQueryParser;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BooleanQuery.Builder;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-
-public class GeonamesGeocoder extends Geocoder {
-
- private Path data;
-
- public GeonamesGeocoder(Path index, Path data) throws IOException {
- super(index);
- this.data = data;
- }
-
- @Override
- protected Analyzer analyzer() {
- return new StandardAnalyzer();
- }
-
- @Override
- protected Stream<Document> documents() throws IOException {
- CsvMapper mapper = new CsvMapper();
- CsvSchema schema = CsvSchema.builder().addColumn("geonameid").addColumn("name")
- .addColumn("asciiname").addColumn("alternatenames").addColumn("latitude")
- .addColumn("longitude").addColumn("featureClass").addColumn("featureCode")
- .addColumn("countryCode").addColumn("cc2").addColumn("admin1Code").addColumn("admin2Code")
- .addColumn("admin3Code").addColumn("admin4Code").addColumn("population")
- .addColumn("elevation").addColumn("dem").addColumn("timezone").addColumn("modificationDate")
- .build().withColumnSeparator('\t').withoutQuoteChar();
- MappingIterator<GeonamesRecord> it =
- mapper.readerFor(GeonamesRecord.class).with(schema).readValues(data.toFile());
- return StreamSupport.stream(Spliterators.spliteratorUnknownSize(it, 0), false).map(record -> {
- Document document = new Document();
- document.add(new TextField("name", record.getName(), Store.YES));
- document.add(new TextField("country", IsoCountriesUtils.getCountry(record.getCountryCode()),
- Store.YES));
- document.add(new StringField("countryCode", record.getCountryCode(), Store.YES));
- document.add(new StoredField("longitude", record.getLongitude()));
- document.add(new StoredField("latitude", record.getLatitude()));
- document.add(new StoredField("asciiname", record.getAsciiname()));
- document.add(new StoredField("alternatenames", record.getAlternatenames()));
- document.add(new StoredField("featureClass", record.getFeatureClass()));
- document.add(new StoredField("featureCode", record.getFeatureCode()));
- document.add(new StoredField("cc2", record.getCc2()));
- document.add(new StoredField("admin1Code", record.getAdmin1Code()));
- document.add(new StoredField("admin2Code", record.getAdmin2Code()));
- document.add(new StoredField("admin3Code", record.getAdmin3Code()));
- document.add(new StoredField("admin4Code", record.getAdmin4Code()));
- document.add(new StoredField("population", record.getPopulation()));
- if (record.getElevation() != null) {
- document.add(new StoredField("elevation", record.getElevation()));
- }
- document.add(new StoredField("dem", record.getDem()));
- document.add(new StoredField("timezone", record.getTimezone()));
- document.add(new StoredField("modificationDate", record.getModificationDate()));
- return document;
- });
- }
-
- @Override
- protected Query query(Analyzer analyzer, Request request) throws ParseException {
- BooleanQuery.Builder builder = new Builder();
- String query = QueryParser.escape(request.query());
- if (!query.isBlank()) {
- SimpleQueryParser nameQueryParser = new SimpleQueryParser(analyzer, "name");
- builder.add(nameQueryParser.parse(query), Occur.SHOULD);
-
- SimpleQueryParser countryQueryParser = new SimpleQueryParser(analyzer, "country");
- builder.add(countryQueryParser.parse(query), Occur.SHOULD);
-
- if (request.countryCode() != null) {
- builder.add(
- new TermQuery(new Term("countryCode", QueryParser.escape(request.countryCode()))),
- Occur.MUST);
- }
- }
- return builder.build();
- }
-}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/request/Request.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/request/Request.java
deleted file mode 100644
index 2059d451..00000000
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/request/Request.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder.request;
-
-
-
-import org.apache.baremaps.geocoder.utils.IsoCountriesUtils;
-
-public class Request {
-
- private final String query;
- private final String countryCode;
-
- private final int limit;
-
- public Request(String query, int limit) {
- this.query = query;
- this.limit = limit;
- this.countryCode = null;
- }
-
- public Request(String query, int limit, String countryCode) {
- this.query = query;
- this.limit = limit;
- if (IsoCountriesUtils.containsCountry(countryCode)) {
- this.countryCode = countryCode;
- } else {
- this.countryCode = null;
- }
- }
-
- public String query() {
- return query;
- }
-
- public String countryCode() {
- return countryCode;
- }
-
- public int limit() {
- return limit;
- }
-}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Data.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Data.java
deleted file mode 100644
index 11f0e3b1..00000000
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Data.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder.response;
-
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"), you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Structured of a Geonames record.
- */
-public record Data(
-
- // name of geographical point (utf8) varchar(200)
- String name,
-
- // name of geographical point in plain ascii characters, varchar(200)
- String asciiname,
-
- // alternatenames, comma separated, ascii names automatically transliterated,
- // convenience attribute from alternatename table, varchar(10000)
- String alternatenames,
-
- // latitude in decimal degrees (wgs84)
- Double latitude,
-
- // longitude in decimal degrees (wgs84)
- Double longitude,
-
- // see http://www.geonames.org/export/codes.html, char(1)
- String featureClass,
-
- // see http://www.geonames.org/export/codes.html, varchar(10)
- String featureCode,
-
- // ISO-3166 2-letter country code, 2 characters
- String countryCode,
-
- // alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters
- String cc2,
-
- // fipscode (subject to change to iso code), see exceptions below, see file
- // admin1Codes.txt for display names of this code, varchar(20)
- // in switzerland usually canton code (ex: VD)
- String admin1Code,
-
- // code for the second administrative division, a county in the US, see file
- // admin2Codes.txt, varchar(80)
- String admin2Code,
-
- // code for third level administrative division, varchar(20)
- String admin3Code,
-
- // code for fourth level administrative division, varchar(20)
- String admin4Code,
-
- // population
- Long population,
-
- // elevation in meters, integer
- Integer elevation,
-
- // digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or
- // 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
- Integer dem,
-
- // the iana timezone id (see file timeZone.txt) varchar(40)
- String timezone,
-
- // date of last modification in yyyy-MM-dd format
- String modificationDate) {
-}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Result.java b/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Result.java
deleted file mode 100644
index b0e5bb36..00000000
--- a/baremaps-core/src/main/java/org/apache/baremaps/geocoder/response/Result.java
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder.response;
-
-
-public record Result(float score, Data data) {}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLoc.java b/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLoc.java
index 6b2fba39..69c15572 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLoc.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/iploc/IpLoc.java
@@ -22,10 +22,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.apache.baremaps.geocoder.Geocoder;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.baremaps.geocoder.response.Response;
-import org.apache.baremaps.geocoder.utils.IsoCountriesUtils;
+import org.apache.baremaps.geocoder.GeonamesQueryBuilder;
+import org.apache.baremaps.geocoder.IsoCountriesUtils;
import org.apache.baremaps.iploc.data.InetnumLocation;
import org.apache.baremaps.iploc.data.IpLocStats;
import org.apache.baremaps.iploc.data.Ipv4Range;
@@ -36,6 +34,8 @@ import org.apache.baremaps.iploc.nic.NicAttribute;
import org.apache.baremaps.iploc.nic.NicObject;
import org.apache.baremaps.stream.StreamUtils;
import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.search.TopDocs;
/** Generating pairs of IP address ranges and their locations into an SQLite database */
public class IpLoc {
@@ -43,19 +43,19 @@ public class IpLoc {
private final float SCORE_THRESHOLD = 0.1f;
private final InetnumLocationDao inetnumLocationDao;
- private final Geocoder geocoder;
+ private final SearcherManager searcherManager;
private IpLocStats iplocStats;
/**
* Create a new IpLoc object
*
* @param databaseUrl the jdbc url to the sqlite database
- * @param geocoder the geocoder that will be used to find the locations of the objects
+ * @param searcherManager the geocoder that will be used to find the locations of the objects
*/
- public IpLoc(String databaseUrl, Geocoder geocoder) {
- inetnumLocationDao = new InetnumLocationDaoSqliteImpl(databaseUrl);
- iplocStats = new IpLocStats();
- this.geocoder = geocoder;
+ public IpLoc(String databaseUrl, SearcherManager searcherManager) {
+ this.inetnumLocationDao = new InetnumLocationDaoSqliteImpl(databaseUrl);
+ this.iplocStats = new IpLocStats();
+ this.searcherManager = searcherManager;
}
/**
@@ -92,7 +92,6 @@ public class IpLoc {
}
NicAttribute firstAttribute = nicObject.attributes().get(0);
-
if (!Objects.equals(firstAttribute.name(), "inetnum")) {
return Optional.empty();
}
@@ -115,7 +114,7 @@ public class IpLoc {
// If there is an address we use that address to query the geocoder
if (attributes.containsKey("address")) {
Optional<Location> location =
- findLocation(new Request(attributes.get("address"), 1, attributes.get("country")));
+ findLocation(attributes.get("address"), attributes.get("country"));
if (location.isPresent()) {
iplocStats.incrementInsertedByAddressCount();
return Optional.of(new InetnumLocation(attributes.get("address"), ipRange, location.get(),
@@ -125,7 +124,7 @@ public class IpLoc {
// If there is a description we use that description to query the geocoder
if (attributes.containsKey("descr")) {
Optional<Location> location =
- findLocation(new Request(attributes.get("descr"), 1, attributes.get("country")));
+ findLocation(attributes.get("descr"), attributes.get("country"));
if (location.isPresent()) {
iplocStats.incrementInsertedByDescrCount();
return Optional.of(new InetnumLocation(attributes.get("descr"), ipRange, location.get(),
@@ -135,7 +134,7 @@ public class IpLoc {
// If there is a name we use that name to query the geocoder
if (attributes.containsKey("name")) {
Optional<Location> location =
- findLocation(new Request(attributes.get("name"), 1, attributes.get("country")));
+ findLocation(attributes.get("name"), attributes.get("country"));
if (location.isPresent()) {
iplocStats.incrementInsertedByDescrCount();
return Optional.of(new InetnumLocation(attributes.get("name"), ipRange, location.get(),
@@ -147,8 +146,8 @@ public class IpLoc {
if (attributes.containsKey("country")
&& IsoCountriesUtils.containsCountry(attributes.get("country").toUpperCase())) {
String countryUppercase = attributes.get("country").toUpperCase();
- Optional<Location> location = findLocation(
- new Request(IsoCountriesUtils.getCountry(countryUppercase), 1, countryUppercase));
+ Optional<Location> location =
+ findLocation(IsoCountriesUtils.getCountry(countryUppercase), countryUppercase);
if (location.isPresent()) {
iplocStats.incrementInsertedByCountryCodeCount();
return Optional.of(new InetnumLocation(IsoCountriesUtils.getCountry(countryUppercase),
@@ -159,7 +158,7 @@ public class IpLoc {
// If there is a country that did not follow the ISO format we will query using the country
// has plain text
if (attributes.containsKey("country")) {
- Optional<Location> location = findLocation(new Request(attributes.get("country"), 1));
+ Optional<Location> location = findLocation(attributes.get("country"), "");
if (location.isPresent()) {
iplocStats.incrementInsertedByCountryCount();
return Optional.of(new InetnumLocation(attributes.get("country"), ipRange, location.get(),
@@ -178,22 +177,32 @@ public class IpLoc {
/**
* Use the geocoder to find a latitude/longitude with the given query.
*
- * @param request for the location
+ * @param searchTerms the search terms
+ * @param countryCode the country code filter
* @return an optional of the location
* @throws IOException
* @throws ParseException
*/
- private Optional<Location> findLocation(Request request) throws IOException, ParseException {
- Response response = geocoder.search(request);
- if (response.results().size() > 0) {
- var bestResult = response.results().get(0);
- if (bestResult.score() > SCORE_THRESHOLD) {
- double latitude = bestResult.data().latitude();
- double longitude = bestResult.data().longitude();
- return Optional.of(new Location(latitude, longitude));
- }
+ private Optional<Location> findLocation(String searchTerms, String countryCode)
+ throws IOException, ParseException {
+ var indexSearcher = searcherManager.acquire();
+ var geonamesQuery =
+ new GeonamesQueryBuilder().queryText(searchTerms).countryCode(countryCode).build();
+
+ TopDocs topDocs = indexSearcher.search(geonamesQuery, 1);
+ if (topDocs.scoreDocs.length == 0) {
+ return Optional.empty();
}
- return Optional.empty();
+
+ var scoreDoc = topDocs.scoreDocs[0];
+ if (scoreDoc.score < SCORE_THRESHOLD) {
+ return Optional.empty();
+ }
+
+ var document = indexSearcher.doc(scoreDoc.doc);
+ double latitude = document.getField("latitude").numericValue().doubleValue();
+ double longitude = document.getField("longitude").numericValue().doubleValue();
+ return Optional.of(new Location(latitude, longitude));
}
/**
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/iploc/database/InetnumLocationDaoSqliteImpl.java b/baremaps-core/src/main/java/org/apache/baremaps/iploc/database/InetnumLocationDaoSqliteImpl.java
index 3437eeb0..cb5c7e93 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/iploc/database/InetnumLocationDaoSqliteImpl.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/iploc/database/InetnumLocationDaoSqliteImpl.java
@@ -23,7 +23,6 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
-import java.util.stream.Collectors;
import org.apache.baremaps.iploc.data.InetnumLocation;
import org.apache.baremaps.iploc.data.Ipv4Range;
import org.apache.baremaps.iploc.data.Location;
@@ -161,7 +160,6 @@ public final class InetnumLocationDaoSqliteImpl implements InetnumLocationDao {
stmt.setString(6, inetnumLocation.getNetwork());
stmt.setString(7, inetnumLocation.getCountry());
stmt.executeUpdate();
- logger.info(String.format("Data Added Successfully %s", inetnumLocation));
} catch (SQLException e) {
logger.error("Unable to save data", e);
}
@@ -185,8 +183,6 @@ public final class InetnumLocationDaoSqliteImpl implements InetnumLocationDao {
}
stmt.executeBatch();
connection.commit();
- logger.info(String.format("Batch executed Successfully \n\t%s", inetnumLocations.stream()
- .map(InetnumLocation::toString).collect(Collectors.joining("\n\t"))));
} catch (SQLException e) {
logger.error("Unable to save data", e);
}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/OsmReader.java b/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/OsmReader.java
index f0356531..cdffed4c 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/OsmReader.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/OsmReader.java
@@ -14,10 +14,11 @@ package org.apache.baremaps.openstreetmap;
+import java.io.IOException;
import java.io.InputStream;
import java.util.stream.Stream;
public interface OsmReader<T> {
- Stream<T> stream(InputStream inputStream);
+ Stream<T> stream(InputStream inputStream) throws IOException;
}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/function/OpenstreetmapDocumentMapper.java b/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/function/OpenstreetmapDocumentMapper.java
new file mode 100644
index 00000000..c794101b
--- /dev/null
+++ b/baremaps-core/src/main/java/org/apache/baremaps/openstreetmap/function/OpenstreetmapDocumentMapper.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.openstreetmap.function;
+
+
+
+import java.util.function.Function;
+import org.apache.baremaps.openstreetmap.model.Element;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.LatLonShape;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.geo.Line;
+import org.locationtech.jts.geom.LineString;
+import org.locationtech.jts.geom.MultiPolygon;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
+
+/**
+ * Maps an OpenStreetMap element to a Lucene document.
+ */
+public class OpenstreetmapDocumentMapper implements Function<Element, Document> {
+
+ @Override
+ public Document apply(Element element) {
+ var document = new Document();
+
+ document.add(new StoredField("id", element.getId()));
+
+ element.getTags().forEach((key, value) -> {
+ document.add(new StoredField(key, value));
+ });
+
+ var geometry = element.getGeometry();
+ if (geometry instanceof Point point) {
+ document.add(LatLonShape.createDocValueField("geometry", point.getY(), point.getX()));
+ } else if (geometry instanceof LineString lineString) {
+ var coordinates = lineString.getCoordinates();
+ var lats = new double[coordinates.length];
+ var lons = new double[coordinates.length];
+ for (int i = 0; i < coordinates.length; i++) {
+ lats[i] = coordinates[i].y;
+ lons[i] = coordinates[i].x;
+ }
+ document.add(LatLonShape.createDocValueField("geometry", new Line(lats, lons)));
+ } else if (geometry instanceof Polygon polygon) {
+ var coordinates = polygon.getCoordinates();
+ var lats = new double[coordinates.length];
+ var lons = new double[coordinates.length];
+ for (int i = 0; i < coordinates.length; i++) {
+ lats[i] = coordinates[i].y;
+ lons[i] = coordinates[i].x;
+ }
+ document.add(LatLonShape.createDocValueField("geometry",
+ new org.apache.lucene.geo.Polygon(lats, lons)));
+ } else if (geometry instanceof MultiPolygon) {
+ // TODO: Implement MultiPolygon
+ }
+
+ return document;
+ }
+}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateGeonamesIndex.java b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateGeonamesIndex.java
index 95dd0b78..0351ad7b 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateGeonamesIndex.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateGeonamesIndex.java
@@ -12,30 +12,49 @@
package org.apache.baremaps.workflow.tasks;
-import org.apache.baremaps.geocoder.geonames.GeonamesGeocoder;
+import org.apache.baremaps.geocoder.GeonamesDocumentMapper;
+import org.apache.baremaps.geocoder.GeonamesReader;
+import org.apache.baremaps.geocoder.GeocoderConstants;
import org.apache.baremaps.workflow.Task;
import org.apache.baremaps.workflow.WorkflowContext;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.MMapDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.nio.file.Path;
-public record CreateGeonamesIndex(String geonamesDumpPath, String targetGeonamesIndexPath) implements Task {
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+/**
+ * A task that creates a geonames index.
+ */
+public record CreateGeonamesIndex(String dataFile, String indexDirectory) implements Task {
private static final Logger logger = LoggerFactory.getLogger(CreateGeonamesIndex.class);
@Override
public void execute(WorkflowContext context) throws Exception {
- logger.info("Generating geonames from {}", geonamesDumpPath);
- try (GeonamesGeocoder geocoder =
- new GeonamesGeocoder(Path.of(targetGeonamesIndexPath), Path.of(geonamesDumpPath))) {
- if (!geocoder.indexExists()) {
- logger.info("Building the geocoder index");
- geocoder.build();
- }
- } catch(Exception e) {
- logger.error("Error while creating the geocoder index", e);
- return;
+ logger.info("Indexing {}", dataFile);
+
+ var dataPath = Paths.get(dataFile);
+ var indexPath = Paths.get(indexDirectory);
+ var directory = MMapDirectory.open(indexPath);
+ var config = new IndexWriterConfig(GeocoderConstants.ANALYZER);
+
+ try (var indexWriter = new IndexWriter(directory, config);
+ var inputStream = Files.newInputStream(dataPath)) {
+ indexWriter.deleteAll();
+ var documents = new GeonamesReader()
+ .stream(inputStream)
+ .map(new GeonamesDocumentMapper());
+ indexWriter.addDocuments((Iterable<Document>) documents::iterator);
+ } catch (IOException exception) {
+ throw new RuntimeException();
}
- logger.info("Finished creating the Geocoder index {}", targetGeonamesIndexPath);
+
+ logger.info("Finished indexing {}", indexDirectory);
}
}
diff --git a/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateIplocIndex.java b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateIplocIndex.java
index 7b655917..31801de1 100644
--- a/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateIplocIndex.java
+++ b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/CreateIplocIndex.java
@@ -12,7 +12,6 @@
package org.apache.baremaps.workflow.tasks;
-import org.apache.baremaps.geocoder.geonames.GeonamesGeocoder;
import org.apache.baremaps.iploc.IpLoc;
import org.apache.baremaps.iploc.data.IpLocStats;
import org.apache.baremaps.iploc.database.SqliteUtils;
@@ -20,6 +19,9 @@ import org.apache.baremaps.iploc.nic.NicParser;
import org.apache.baremaps.stream.StreamException;
import org.apache.baremaps.workflow.Task;
import org.apache.baremaps.workflow.WorkflowContext;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.MMapDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,66 +30,59 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.List;
-public record CreateIplocIndex(String geonamesIndexPath, List<String> nicPaths,
- String targetIplocIndexPath) implements Task {
+public record CreateIplocIndex(
+ String geonamesIndexPath,
+ List<String> nicPaths,
+ String targetIplocIndexPath
+) implements Task {
- private static final Logger logger = LoggerFactory.getLogger(CreateIplocIndex.class);
+ private static final Logger logger = LoggerFactory.getLogger(CreateIplocIndex.class);
- @Override
- public void execute(WorkflowContext context) throws Exception {
- logger.info("Generating Iploc from {} {}", geonamesIndexPath, nicPaths);
+ @Override
+ public void execute(WorkflowContext context) throws Exception {
+ logger.info("Generating Iploc from {} {}", geonamesIndexPath, nicPaths);
- logger.info("Creating the Geocoder");
- GeonamesGeocoder geocoder;
- try {
- geocoder = new GeonamesGeocoder(Path.of(geonamesIndexPath), null);
- if (!geocoder.indexExists()) {
- logger.error("Geocoder index doesn't exist");
- return;
- }
- geocoder.open();
- } catch (Exception e) {
- logger.error("Error while creating the geocoder index", e);
- return;
- }
-
- logger.info("Creating the Iploc database");
- String jdbcUrl = String.format("JDBC:sqlite:%s", targetIplocIndexPath);
-
- SqliteUtils.executeResource(jdbcUrl, "iploc_init.sql");
- IpLoc ipLoc = new IpLoc(jdbcUrl, geocoder);
-
- logger.info("Generating NIC objects stream");
- nicPaths.stream().parallel().forEach(path -> {
- try (InputStream inputStream = new BufferedInputStream(Files.newInputStream(Path.of(path)));) {
- var nicObjects = NicParser.parse(inputStream);
- logger.info("Inserting the nic objects into the Iploc database");
- ipLoc.insertNicObjects(nicObjects);
- } catch (IOException e) {
- throw new StreamException(e);
- }
- });
+ try (
+ var directory = MMapDirectory.open(Paths.get(geonamesIndexPath));
+ var searcherManager = new SearcherManager(directory, new SearcherFactory())
+ ) {
+ logger.info("Creating the Iploc database");
+ String jdbcUrl = String.format("JDBC:sqlite:%s", targetIplocIndexPath);
- IpLocStats ipLocStats = ipLoc.getIplocStats();
+ SqliteUtils.executeResource(jdbcUrl, "iploc_init.sql");
+ IpLoc ipLoc = new IpLoc(jdbcUrl, searcherManager);
- logger.info(
- """
- IpLoc stats
- -----------
- inetnumInsertedByAddress : {}
- inetnumInsertedByDescr : {}
- inetnumInsertedByCountry : {}
- inetnumInsertedByCountryCode : {}
- inetnumInsertedByGeoloc : {}
- inetnumNotInserted : {}""",
- ipLocStats.getInsertedByAddressCount(), ipLocStats.getInsertedByDescrCount(),
- ipLocStats.getInsertedByCountryCount(), ipLocStats.getInsertedByCountryCodeCount(),
- ipLocStats.getInsertedByGeolocCount(), ipLocStats.getNotInsertedCount());
-
- logger.info("IpLoc database created successfully");
+ logger.info("Generating NIC objects stream");
+ nicPaths.stream().parallel().forEach(path -> {
+ try (InputStream inputStream = new BufferedInputStream(Files.newInputStream(Path.of(path)));) {
+ var nicObjects = NicParser.parse(inputStream);
+ logger.info("Inserting the nic objects into the Iploc database");
+ ipLoc.insertNicObjects(nicObjects);
+ } catch (IOException e) {
+ throw new StreamException(e);
+ }
+ });
- logger.info("Finished creating the Geocoder index {}", targetIplocIndexPath);
+ IpLocStats ipLocStats = ipLoc.getIplocStats();
+ logger.info(
+ """
+ IpLoc stats
+ -----------
+ inetnumInsertedByAddress : {}
+ inetnumInsertedByDescr : {}
+ inetnumInsertedByCountry : {}
+ inetnumInsertedByCountryCode : {}
+ inetnumInsertedByGeoloc : {}
+ inetnumNotInserted : {}""",
+ ipLocStats.getInsertedByAddressCount(), ipLocStats.getInsertedByDescrCount(),
+ ipLocStats.getInsertedByCountryCount(), ipLocStats.getInsertedByCountryCodeCount(),
+ ipLocStats.getInsertedByGeolocCount(), ipLocStats.getNotInsertedCount()
+ );
}
+
+ logger.info("IpLoc database created successfully {}", targetIplocIndexPath);
+ }
}
diff --git a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeocoderTest.java b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeocoderTest.java
deleted file mode 100644
index 062164ad..00000000
--- a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeocoderTest.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Comparator;
-import java.util.stream.Stream;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.baremaps.geocoder.response.Response;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.Query;
-import org.junit.jupiter.api.Test;
-
-class GeocoderTest {
-
- private static final String v1 = "a simple text";
- private static final String v2 = "a simple test";
-
- @Test
- public void buildAndSearch() throws IOException, ParseException {
- Path path = Files.createTempDirectory(Paths.get("."), "geocoder_");
- Geocoder geocoder = new Geocoder(path) {
- @Override
- protected Analyzer analyzer() {
- return new StandardAnalyzer();
- }
-
- @Override
- protected Stream<Document> documents() {
- Document d1 = new Document();
- d1.add(new Field("name", v1, TextField.TYPE_STORED));
- Document d2 = new Document();
- d2.add(new Field("name", v2, TextField.TYPE_STORED));
- return Stream.of(d1, d2);
- }
-
- @Override
- protected Query query(Analyzer analyzer, Request request) throws ParseException {
- return new QueryParser("name", analyzer).parse(request.query());
- }
- };
- geocoder.build();
-
- Response r3 = geocoder.search(new Request("simple", 10));
- assertEquals(2, r3.results().size());
-
- Response r1 = geocoder.search(new Request("text", 10));
- assertEquals(1, r1.results().size());
- assertEquals(v1, r1.results().get(0).data().name());
-
- Response r2 = geocoder.search(new Request("test", 10));
- assertEquals(1, r2.results().size());
- assertEquals(v2, r2.results().get(0).data().name());
-
- Files.walk(path).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
- }
-}
diff --git a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesReaderTest.java b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesReaderTest.java
new file mode 100644
index 00000000..76ab91ed
--- /dev/null
+++ b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/GeonamesReaderTest.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.baremaps.geocoder;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.stream.Collectors;
+import org.apache.baremaps.testing.TestFiles;
+import org.junit.jupiter.api.Test;
+
+class GeonamesReaderTest {
+
+ @Test
+ void read() throws IOException {
+ var data = TestFiles.resolve("geonames/geocoder_sample.txt");
+ try (var inputStream = Files.newInputStream(data)) {
+ var reader = new GeonamesReader();
+ var stream = reader.stream(inputStream);
+
+ var list = stream.collect(Collectors.toList());
+ assertEquals(4, list.size());
+
+ var record = list.get(0);
+ assertEquals(1, record.getGeonameid());
+ assertEquals("HEIG", record.getAsciiname());
+ }
+ }
+}
diff --git a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoderTest.java b/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoderTest.java
deleted file mode 100644
index a1830dd0..00000000
--- a/baremaps-core/src/test/java/org/apache/baremaps/geocoder/geonames/GeonamesGeocoderTest.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.baremaps.geocoder.geonames;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Comparator;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.baremaps.testing.TestFiles;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.junit.jupiter.api.Test;
-
-class GeonamesGeocoderTest {
-
- @Test
- public void buildAndSearch() throws IOException, URISyntaxException, ParseException {
- var path = Files.createTempDirectory(Paths.get("."), "geocoder_");
- var data = TestFiles.resolve("geonames/LI.txt");
- var geocoder = new GeonamesGeocoder(path, data);
- geocoder.build();
-
- var response = geocoder.search(new Request("Bim Alta Schloss", 1));
- assertEquals(1, response.results().size());
- assertEquals("Bim Alta Schloss", response.results().get(0).data().name());
-
- Files.walk(path).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
- }
-
- @Test
- public void buildAndSearchWithTheRightCountryCode()
- throws IOException, URISyntaxException, ParseException {
- var path = Files.createTempDirectory(Paths.get("."), "geocoder_");
- var data = TestFiles.resolve("geonames/LI.txt");
- var geocoder = new GeonamesGeocoder(path, data);
- geocoder.build();
-
- var response = geocoder.search(new Request("Bim Alta Schloss", 10, "LI"));
- assertEquals(10, response.results().size());
- assertEquals("Bim Alta Schloss", response.results().get(0).data().name());
-
- Files.walk(path).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
- }
-
- @Test
- public void buildAndSearchWithTheWrongCountryCode()
- throws IOException, URISyntaxException, ParseException {
- var path = Files.createTempDirectory(Paths.get("."), "geocoder_");
- var data = TestFiles.resolve("geonames/LI.txt");
- var geocoder = new GeonamesGeocoder(path, data);
- geocoder.build();
-
- var response = geocoder.search(new Request("Bim Alta Schloss", 10, "CH"));
- assertEquals(0, response.results().size());
-
- Files.walk(path).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
- }
-}
diff --git a/baremaps-core/src/test/java/org/apache/baremaps/iploc/IpLocTest.java b/baremaps-core/src/test/java/org/apache/baremaps/iploc/IpLocTest.java
index a38fe88c..489e74f1 100644
--- a/baremaps-core/src/test/java/org/apache/baremaps/iploc/IpLocTest.java
+++ b/baremaps-core/src/test/java/org/apache/baremaps/iploc/IpLocTest.java
@@ -15,7 +15,6 @@ package org.apache.baremaps.iploc;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
-import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@@ -23,8 +22,6 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.baremaps.collection.utils.FileUtils;
-import org.apache.baremaps.geocoder.Geocoder;
-import org.apache.baremaps.geocoder.geonames.GeonamesGeocoder;
import org.apache.baremaps.iploc.data.InetnumLocation;
import org.apache.baremaps.iploc.data.Ipv4;
import org.apache.baremaps.iploc.data.Ipv4Range;
@@ -35,6 +32,11 @@ import org.apache.baremaps.iploc.database.SqliteUtils;
import org.apache.baremaps.iploc.nic.NicData;
import org.apache.baremaps.iploc.nic.NicObject;
import org.apache.baremaps.testing.TestFiles;
+import org.apache.baremaps.workflow.WorkflowContext;
+import org.apache.baremaps.workflow.tasks.CreateGeonamesIndex;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.MMapDirectory;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
@@ -53,20 +55,23 @@ class IpLocTest {
private static String databaseUrl;
@BeforeAll
- public static void beforeAll() throws IOException, URISyntaxException {
+ public static void beforeAll() throws Exception {
// Load the NIC sample objects
nicObjects = NicData.sample("ripe/simple_nic_sample.txt");
// Init the geocoderservice
directory = Files.createTempDirectory(Paths.get("."), "geocoder_");
+ // Create the geonames index
var data = TestFiles.resolve("geonames/geocoder_sample.txt");
- Geocoder geocoder = new GeonamesGeocoder(directory, data);
- geocoder.build();
+ var task = new CreateGeonamesIndex(data.toString(), directory.toString());
+ task.execute(new WorkflowContext());
// Create the IPLoc service
databaseUrl = String.format("JDBC:sqlite:%s", directory.resolve("test.db"));
- ipLoc = new IpLoc(databaseUrl, geocoder);
+ var dir = MMapDirectory.open(directory);
+ var searcherManager = new SearcherManager(dir, new SearcherFactory());
+ ipLoc = new IpLoc(databaseUrl, searcherManager);
// Accessor for the database
inetnumLocationDao = new InetnumLocationDaoSqliteImpl(databaseUrl);
@@ -78,7 +83,7 @@ class IpLocTest {
}
@BeforeEach
- public void beforeEach() throws IOException, URISyntaxException, SQLException {
+ public void beforeEach() throws IOException, SQLException {
SqliteUtils.executeResource(databaseUrl, "iploc_init.sql");
}
diff --git a/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResources.java b/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResources.java
index 38376945..743ab3ca 100644
--- a/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResources.java
+++ b/baremaps-server/src/main/java/org/apache/baremaps/server/GeocoderResources.java
@@ -18,45 +18,77 @@ import static javax.ws.rs.core.MediaType.APPLICATION_JSON;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import javax.inject.Inject;
import javax.inject.Singleton;
-import javax.ws.rs.GET;
-import javax.ws.rs.PathParam;
-import javax.ws.rs.QueryParam;
-import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.*;
import javax.ws.rs.core.Response;
-import org.apache.baremaps.geocoder.Geocoder;
-import org.apache.baremaps.geocoder.request.Request;
-import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.baremaps.geocoder.GeonamesQueryBuilder;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.SearcherManager;
+
@Singleton
@javax.ws.rs.Path("/")
public class GeocoderResources {
- private final Geocoder geocoder;
+ record GeocoderResponse(List<GeocoderResult> results) {}
+
+
+ record GeocoderResult(float score, Map<String, Object> data) {}
+
+
+ private final SearcherManager searcherManager;
@Inject
- public GeocoderResources(Geocoder geocoder) {
- this.geocoder = geocoder;
+ public GeocoderResources(SearcherManager searcherManager) {
+ this.searcherManager = searcherManager;
}
@GET
@javax.ws.rs.Path("/api/geocoder")
- public Response getIpToLocation(@QueryParam("address") String address) {
- if (address == null) {
+ public Response getIpToLocation(@QueryParam("queryText") String queryText,
+ @QueryParam("countryCode") @DefaultValue("") String countryCode,
+ @QueryParam("limit") @DefaultValue("10") int limit) throws IOException {
+ if (queryText == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST)
- .entity("address parameter is mandatory").build());
+ .entity("The queryText parameter is mandatory").build());
}
-
+ var query = new GeonamesQueryBuilder().queryText(queryText).countryCode(countryCode).build();
+ var searcher = searcherManager.acquire();
try {
- var request = new Request(address, 20);
- var response = geocoder.search(request);
+ var result = searcher.search(query, limit);
+ var results =
+ Arrays.stream(result.scoreDocs).map(scoreDoc -> asResult(searcher, scoreDoc)).toList();
return Response.status(200).header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
- .header(CONTENT_TYPE, APPLICATION_JSON).entity(response).build();
+ .header(CONTENT_TYPE, APPLICATION_JSON).entity(new GeocoderResponse(results)).build();
} catch (IllegalArgumentException e) {
return Response.status(400).entity(e.getMessage()).build();
- } catch (IOException | ParseException e) {
+ } catch (IOException e) {
return Response.status(500).entity(e.getMessage()).build();
+ } finally {
+ searcherManager.release(searcher);
+ }
+ }
+
+ private GeocoderResult asResult(IndexSearcher indexSearcher, ScoreDoc scoreDoc) {
+ try {
+ var document = indexSearcher.doc(scoreDoc.doc);
+ var data = new HashMap<String, Object>();
+ for (var field : document.getFields()) {
+ if (field.numericValue() != null) {
+ data.put(field.name(), field.numericValue());
+ } else if (field.stringValue() != null) {
+ data.put(field.name(), field.stringValue());
+ }
+ }
+ return new GeocoderResult(scoreDoc.score, data);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
}
diff --git a/baremaps-server/src/main/resources/geocoder/index.html b/baremaps-server/src/main/resources/geocoder/index.html
index 5058f08c..2662e311 100644
--- a/baremaps-server/src/main/resources/geocoder/index.html
+++ b/baremaps-server/src/main/resources/geocoder/index.html
@@ -21,116 +21,62 @@
</head>
<body>
<!-- A form to submit the IP address -->
-<form onsubmit="searchByAddress(event)">
- <label for="address">Address:</label>
- <input type="text" id="address" name="address" value=""/>
+<form id="searchForm">
+ <input type="text" id="queryText" name="queryText" placeholder="Query text" value=""/>
+ <input type="text" id="countryCode" name="countryCode" placeholder="Country code" value=""/>
<input type="submit" value="Submit"/>
</form>
<!-- The table of retrieved locations -->
<table id="results"></table>
+
<script>
- function searchByAddress(event) {
- event.preventDefault();
- // Get the IP address from the form
- var address = document.getElementById('address').value;
+ // Add a listener to submit the search form
+ const searchForm = document.getElementById('searchForm');
+ searchForm.addEventListener('submit', search);
- // Make a query to retrieve the location from the address
- const request = new XMLHttpRequest();
- request.open('GET', `http://localhost:9000/api/geocoder?address=${address}`, true);
+ // Get the table of results
+ const table = document.getElementById('results');
- // Set request Accept header to application/json
- request.setRequestHeader('Accept', 'application/json');
+ function search(event) {
+ event.preventDefault();
- // Display the request result in the map
- request.onload = function () {
- if (request.status >= 200 && request.status < 400) {
- // Success!
- const response = JSON.parse(request.responseText);
+ // Build the query string
+ const formData = new FormData(searchForm);
+ const queryString = new URLSearchParams(formData).toString()
- // Write the result address in a href
- // Fill the table of geo locations from the resulting geoLocations
- // Geo locations contain an address, an ipv4Range, a location, a network and a country
- const table = document.getElementById('results');
+ // Send the search request
+ fetch(`${window.location.origin}/api/geocoder?${queryString}`)
+ .then(response => response.json())
+ .then(response => {
+ // Extract the headers from the first row
+ const headers = response.results && response.results.length > 0
+ ? Object.keys(response.results[0].data).sort()
+ : [];
+
+ // Clear the results table
table.innerHTML = '';
- // Insert header row
- table.insertRow().innerHTML =
- `<th>#</th>
- <th>Score</th>
- <th>Url</th>
- <th>Name</th>
- <th>Ascii name</th>
- <th>Alternate names</th>
- <th>Latitude</th>
- <th>Longitude</th>
- <th>Feature Class</th>
- <th>feature Code</th>
- <th>Country Code</th>
- <th>Country Code 2</th>
- <th>Admin1Code</th>
- <th>Admin2Code</th>
- <th>Admin3Code</th>
- <th>Admin4Code</th>
- <th>Population</th>
- <th>Elevation</th>
- <th>Dem</th>
- <th>Timezone</th>
- <th>Modification Date</th>`;
- for (let i = 0; i < response.results.length; i++) {
- const result = response.results[i];
- const row = table.insertRow(i + 1);
- let pos = 0;
- let cell = row.insertCell(pos++);
- cell.innerHTML = i + 1;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.score;
- cell = row.insertCell(pos++);
- cell.innerHTML = `<a href="http://www.openstreetmap.org/?mlat=${result.data.latitude}&mlon=${result.data.longitude}&zoom=15" target="_blank">OSM</a>`;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.name;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.asciiname;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.alternatenames;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.latitude;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.longitude;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.featureClass;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.featureCode;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.countryCode;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.cc2;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.admin1Code;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.admin2Code;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.admin3Code;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.admin4Code;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.population;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.elevation;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.dem;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.timezone;
- cell = row.insertCell(pos++);
- cell.innerHTML = result.data.modificationDate;
- }
- } else {
- // We reached our target server, but it returned an error
- console.log('Error');
- }
- };
- request.send();
+ // Insert the headers in the table
+ const headerRow = table.insertRow();
+ headerRow.innerHTML = `<th>#</th><th>score</th>${headers.map(header => `<th>${header}</th>`).join('')}`;
+
+ // Insert the results in the table
+ response.results.forEach((result, index) => {
+ const row = table.insertRow();
+ row.insertCell().innerText = index;
+ row.insertCell().innerText = result.score;
+ headers.forEach(header => {
+ if (result.data[header] !== undefined) {
+ row.insertCell().innerText = result.data[header];
+ } else {
+ row.insertCell().innerText = '';
+ }
+ });
+ });
+ })
+ .catch(error => console.error(error));
}
</script>
</body>
-</html>
\ No newline at end of file
+</html>
diff --git a/examples/geocoding/workflow.js b/examples/geocoding/workflow.js
index 6bd3f837..972d4a24 100644
--- a/examples/geocoding/workflow.js
+++ b/examples/geocoding/workflow.js
@@ -8,7 +8,7 @@ const FetchAndUnzipGeonames = {id: "fetch-geonames-allcountries", needs: [], tas
// Create the Geocoder index
const createGeonamesIndex = {id: "geocoder-index", needs: [FetchAndUnzipGeonames.id], tasks: [
- {type: "CreateGeonamesIndex", geonamesDumpPath: "archives/allCountries.txt", targetGeonamesIndexPath: "geocoder-index"}
+ {type: "CreateGeonamesIndex", dataFile: "archives/allCountries.txt", indexDirectory: "geocoder-index"}
]};
-export default {"steps": [FetchAndUnzipGeonames, createGeonamesIndex]};
\ No newline at end of file
+export default {"steps": [FetchAndUnzipGeonames, createGeonamesIndex]};
diff --git a/examples/ip-to-location/workflow.js b/examples/ip-to-location/workflow.js
index cd0b58ff..2c0acecb 100644
--- a/examples/ip-to-location/workflow.js
+++ b/examples/ip-to-location/workflow.js
@@ -27,26 +27,67 @@ const geonamesUrl =
"https://download.geonames.org/export/dump/allCountries.zip";
// Iterate over nic urls to create a list of downloads and ungzip
-const fetchAndUnzipNic = nics.map((nic,index) =>
- ({id: `fetch-nic-${index}`, needs: [], tasks: [
- {type: "DownloadUrl", url: nic.url, path: `downloads/${nic.filename}.gz`},
- {type: "UngzipFile", file: `downloads/${nic.filename}.gz`, directory: "archives"}
- ]}));
+const fetchAndUnzipNic = nics.map((nic, index) => ({
+ id: `fetch-nic-${index}`,
+ needs: [],
+ tasks: [
+ {
+ type: "DownloadUrl",
+ url: nic.url,
+ path: `downloads/${nic.filename}.gz`
+ },
+ {
+ type: "UngzipFile",
+ file: `downloads/${nic.filename}.gz`,
+ directory: "archives"
+ }
+ ]
+}));
// Fetch and unzip Geonames
-const FetchAndUnzipGeonames = {id: "fetch-geonames-allcountries", needs: [], tasks: [
- {type: "DownloadUrl", url: geonamesUrl, path: "downloads/geonames-allcountries.zip", force: true},
- {type: "UnzipFile", file: "downloads/geonames-allcountries.zip", directory: "archives"}
-]};
+const FetchAndUnzipGeonames = {
+ id: "fetch-geonames-allcountries",
+ needs: [],
+ tasks: [
+ {
+ type: "DownloadUrl",
+ url: geonamesUrl,
+ path: "downloads/geonames-allcountries.zip",
+ force: true
+ },
+ {
+ type: "UnzipFile",
+ file: "downloads/geonames-allcountries.zip",
+ directory: "archives"
+ }
+ ]
+};
// Create the Geocoder index
-const createGeonamesIndex = {id: "geocoder-index", needs: [FetchAndUnzipGeonames.id], tasks: [
- {type: "CreateGeonamesIndex", geonamesDumpPath: "archives/allCountries.txt", targetGeonamesIndexPath: "geocoder-index"}
-]};
+const createGeonamesIndex = {
+ id: "geocoder-index",
+ needs: [FetchAndUnzipGeonames.id],
+ tasks: [
+ {
+ type: "CreateGeonamesIndex",
+ dataFile: "archives/allCountries.txt",
+ indexDirectory: "geocoder-index"
+ }
+ ]
+};
// Create the iploc database
-const createIplocIndex = {id: "iploc-index", needs: fetchAndUnzipNic.map(e => e.id).concat([createGeonamesIndex.id]), tasks: [
- {type: "CreateIplocIndex", geonamesIndexPath: "geocoder-index", nicPaths: nics.map(nic => `archives/${nic.filename}`), targetIplocIndexPath: "iploc.db"}
-]};
+const createIplocIndex = {
+ id: "iploc-index",
+ needs: fetchAndUnzipNic.map(e => e.id).concat([createGeonamesIndex.id]),
+ tasks: [
+ {
+ type: "CreateIplocIndex",
+ geonamesIndexPath: "geocoder-index",
+ nicPaths: nics.map(nic => `archives/${nic.filename}`),
+ targetIplocIndexPath: "iploc.db"
+ }
+ ]
+};
-export default {"steps": fetchAndUnzipNic.concat([FetchAndUnzipGeonames, createGeonamesIndex, createIplocIndex])};
\ No newline at end of file
+export default {"steps": fetchAndUnzipNic.concat([FetchAndUnzipGeonames, createGeonamesIndex, createIplocIndex])};
diff --git a/pom.xml b/pom.xml
index ca1e9894..372487cf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,8 +102,8 @@
<version.lib.swagger-parser>2.0.24</version.lib.swagger-parser>
<version.lib.testcontainers>1.17.3</version.lib.testcontainers>
<version.lib.validation>2.0.2</version.lib.validation>
- <version.lucene>8.10.1</version.lucene>
- <version.lucene-spatial>8.4.1</version.lucene-spatial>
+ <version.lucene>9.4.2</version.lucene>
+ <version.lucene-spatial>9.4.2</version.lucene-spatial>
<version.plugin.jacoco-maven-plugin>0.8.8</version.plugin.jacoco-maven-plugin>
<version.plugin.jib-maven-plugin>3.0.0</version.plugin.jib-maven-plugin>
<version.plugin.maven-compiler-plugin>3.10.1</version.plugin.maven-compiler-plugin>
@@ -346,12 +346,12 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-replicator</artifactId>
- <version>${version.lucene-spatial}</version>
+ <version>${version.lucene}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-spatial</artifactId>
- <version>${version.lucene-spatial}</version>
+ <artifactId>lucene-spatial-extras</artifactId>
+ <version>${version.lucene}</version>
</dependency>
<dependency>
<groupId>org.apache.sis.core</groupId>