You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/08/01 12:08:27 UTC
svn commit: r1693700 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/util/
sandbox/src/java/org/apache/lucene/util/
sandbox/src/test/org/apache/lucene/search/
sandbox/src/test/org/apache/lucene/util/
Author: mikemccand
Date: Sat Aug 1 10:08:26 2015
New Revision: 1693700
URL: http://svn.apache.org/r1693700
Log:
LUCENE-6647: add GeoHash string utility APIs
Added:
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java (with props)
lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/
lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sat Aug 1 10:08:26 2015
@@ -147,6 +147,9 @@ New Features
filtering. Range trees can also handle values larger than 64 bits.
(Adrien Grand, Mike McCandless)
+* LUCENE-6647: Add GeoHash string utility APIs (Nick Knize, Mike
+ McCandless).
+
API Changes
* LUCENE-6508: Simplify Lock api, there is now just
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Sat Aug 1 10:08:26 2015
@@ -26,7 +26,8 @@ public final class BitUtil {
private static final long MAGIC[] = {
0x5555555555555555L, 0x3333333333333333L,
0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL,
- 0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL
+ 0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL,
+ 0xAAAAAAAAAAAAAAAAL
};
// shift values for bit interleaving
private static final short SHIFT[] = {1, 2, 4, 8, 16};
@@ -144,6 +145,13 @@ public final class BitUtil {
return b;
}
+ /**
+ * flip flops odd with even bits
+ */
+ public static final long flipFlop(final long b) {
+ return ((b & MAGIC[6]) >>> 1) | ((b & MAGIC[0]) << 1 );
+ }
+
/** Same as {@link #zigZagEncode(long)} but on integers. */
public static int zigZagEncode(int i) {
return (i >> 31) ^ (i << 1);
Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java?rev=1693700&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java Sat Aug 1 10:08:26 2015
@@ -0,0 +1,153 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities for converting to/from the GeoHash standard
+ *
+ * The geohash long format is represented as lon/lat (x/y) interleaved with the 4 least significant bits
+ * representing the level (1-12) [xyxy...xyxyllll]
+ *
+ * This differs from a morton encoded value which interleaves lat/lon (y/x).
+ *
+ * @lucene.experimental
+ */
+public class GeoHashUtils {
+ public static final char[] BASE_32 = {'0', '1', '2', '3', '4', '5', '6',
+ '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
+
+ public static final String BASE_32_STRING = new String(BASE_32);
+
+ public static final int PRECISION = 12;
+ private static final short MORTON_OFFSET = (GeoUtils.BITS<<1) - (PRECISION*5);
+
+ /**
+ * Encode lon/lat to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
+ */
+ public static final long longEncode(final double lon, final double lat, final int level) {
+ // shift to appropriate level
+ final short msf = (short)(((12 - level) * 5) + MORTON_OFFSET);
+ return ((BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat)) >>> msf) << 4) | level;
+ }
+
+ /**
+ * Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
+ */
+ public static final long longEncode(final String hash) {
+ int level = hash.length()-1;
+ long b;
+ long l = 0L;
+ for(char c : hash.toCharArray()) {
+ b = (long)(BASE_32_STRING.indexOf(c));
+ l |= (b<<(level--*5));
+ }
+ return (l<<4)|hash.length();
+ }
+
+ /**
+ * Encode to a geohash string from the geohash based long format
+ */
+ public static final String stringEncode(long geoHashLong) {
+ int level = (int)geoHashLong&15;
+ geoHashLong >>>= 4;
+ char[] chars = new char[level];
+ do {
+ chars[--level] = BASE_32[(int)(geoHashLong&31L)];
+ geoHashLong>>>=5;
+ } while(level > 0);
+
+ return new String(chars);
+ }
+
+ /**
+ * Encode to a geohash string from full resolution longitude, latitude)
+ */
+ public static final String stringEncode(final double lon, final double lat) {
+ return stringEncode(lon, lat, 12);
+ }
+
+ /**
+ * Encode to a level specific geohash string from full resolution longitude, latitude
+ */
+ public static final String stringEncode(final double lon, final double lat, final int level) {
+ // bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
+ final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
+
+ StringBuilder geoHash = new StringBuilder();
+ short precision = 0;
+ final short msf = (GeoUtils.BITS<<1)-5;
+ long mask = 31L<<msf;
+ do {
+ geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
+ // next 5 bits
+ mask >>>= 5;
+ } while (++precision < level);
+ return geoHash.toString();
+ }
+
+ /**
+ * Encode to a full precision geohash string from a given morton encoded long value
+ */
+ public static final String stringEncodeFromMortonLong(final long hashedVal) throws Exception {
+ return stringEncode(hashedVal, PRECISION);
+ }
+
+ /**
+ * Encode to a geohash string at a given level from a morton long
+ */
+ public static final String stringEncodeFromMortonLong(long hashedVal, final int level) {
+ // bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
+ hashedVal = BitUtil.flipFlop(hashedVal);
+
+ StringBuilder geoHash = new StringBuilder();
+ short precision = 0;
+ final short msf = (GeoUtils.BITS<<1)-5;
+ long mask = 31L<<msf;
+ do {
+ geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
+ // next 5 bits
+ mask >>>= 5;
+ } while (++precision < level);
+ return geoHash.toString();
+ }
+
+ /**
+ * Encode to a morton long value from a given geohash string
+ */
+ public static final long mortonEncode(final String hash) {
+ int level = 11;
+ long b;
+ long l = 0L;
+ for(char c : hash.toCharArray()) {
+ b = (long)(BASE_32_STRING.indexOf(c));
+ l |= (b<<((level--*5) + MORTON_OFFSET));
+ }
+ return BitUtil.flipFlop(l);
+ }
+
+ /**
+ * Encode to a morton long value from a given geohash long value
+ */
+ public static final long mortonEncode(final long geoHashLong) {
+ final int level = (int)(geoHashLong&15);
+ final short odd = (short)(level & 1);
+
+ return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
+ }
+}
Modified: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java Sat Aug 1 10:08:26 2015
@@ -46,6 +46,7 @@ import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath;
+import org.apache.lucene.util.TestGeoUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -68,37 +69,10 @@ public class TestGeoPointQuery extends L
// determining the possible haversine error
private static final int DISTANCE_ERR = 1000;
- // Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
- private static double originLat;
- private static double originLon;
-// private static double range;
- private static double lonRange;
- private static double latRange;
-
@BeforeClass
public static void beforeClass() throws Exception {
directory = newDirectory();
- // when we randomly test the full lat/lon space it can result in very very slow query times, this is due to the
- // number of ranges that can be created in degenerate cases.
-
- // Between 1.0 and 3.0:
-// range = 2*(random().nextDouble() + 0.5);
- // Between 1.0 and 90.0
- //lonRange = 1.0 + (90.0 - 1.0) * random().nextDouble();
- //latRange = 1.0 + (45.0 - 1.0) * random().nextDouble();
-
- // Between 1.0 and 3.0:
- lonRange = 2*(random().nextDouble() + 0.5);
- latRange = 2*(random().nextDouble() + 0.5);
-
- originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2*lonRange) * random().nextDouble();
- originLon = GeoUtils.normalizeLon(originLon);
- originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2*latRange) * random().nextDouble();
- originLat = GeoUtils.normalizeLat(originLat);
- if (VERBOSE) {
- System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
- }
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
@@ -306,13 +280,13 @@ public class TestGeoPointQuery extends L
if (x == 0) {
// Identical lat to old point
lats[docID] = lats[oldDocID];
- lons[docID] = randomLon();
+ lons[docID] = TestGeoUtils.randomLon();
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
}
} else if (x == 1) {
// Identical lon to old point
- lats[docID] = randomLat();
+ lats[docID] = TestGeoUtils.randomLat();
lons[docID] = lons[oldDocID];
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
@@ -327,8 +301,8 @@ public class TestGeoPointQuery extends L
}
}
} else {
- lats[docID] = randomLat();
- lons[docID] = randomLon();
+ lats[docID] = TestGeoUtils.randomLat();
+ lons[docID] = TestGeoUtils.randomLon();
haveRealDoc = true;
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
@@ -618,19 +592,11 @@ public class TestGeoPointQuery extends L
|| (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
}
- private static double randomLat() {
- return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
- }
-
- private static double randomLon() {
- return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
- }
-
private static GeoBoundingBox randomBBox() {
- double lat0 = randomLat();
- double lat1 = randomLat();
- double lon0 = randomLon();
- double lon1 = randomLon();
+ double lat0 = TestGeoUtils.randomLat();
+ double lat1 = TestGeoUtils.randomLat();
+ double lon0 = TestGeoUtils.randomLon();
+ double lon1 = TestGeoUtils.randomLon();
if (lat1 < lat0) {
double x = lat0;
Added: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java?rev=1693700&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java Sat Aug 1 10:08:26 2015
@@ -0,0 +1,100 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests class for methods in GeoUtils
+ *
+ * @lucene.experimental
+ */
+public class TestGeoUtils extends LuceneTestCase {
+
+ // Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
+ private static double originLat;
+ private static double originLon;
+ // private static double range;
+ private static double lonRange;
+ private static double latRange;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // Between 1.0 and 3.0:
+ lonRange = 2 * (random().nextDouble() + 0.5);
+ latRange = 2 * (random().nextDouble() + 0.5);
+
+ originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2 * lonRange) * random().nextDouble();
+ originLon = GeoUtils.normalizeLon(originLon);
+ originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble();
+ originLat = GeoUtils.normalizeLat(originLat);
+
+ if (VERBOSE) {
+ System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
+ }
+ }
+
+ @Test
+ public void testGeoHash() {
+ int numPoints = atLeast(100);
+ String randomGeoHashString;
+ String mortonGeoHash;
+ long mortonLongFromGHLong, geoHashLong, mortonLongFromGHString;
+ int randomLevel;
+ for (int i = 0; i < numPoints; ++i) {
+ // random point
+ double lat = randomLatFullRange();
+ double lon = randomLonFullRange();
+
+ // compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
+ randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
+ mortonGeoHash = GeoHashUtils.stringEncodeFromMortonLong(GeoUtils.mortonHash(lon, lat), randomLevel);
+ assertEquals(randomGeoHashString, mortonGeoHash);
+
+ // v&v conversion from lat/lon or geohashstring to geohash long and back to geohash string
+ geoHashLong = (random().nextBoolean()) ? GeoHashUtils.longEncode(lon, lat, randomLevel) : GeoHashUtils.longEncode(randomGeoHashString);
+ assertEquals(randomGeoHashString, GeoHashUtils.stringEncode(geoHashLong));
+
+ // v&v conversion from geohash long to morton long
+ mortonLongFromGHString = GeoHashUtils.mortonEncode(randomGeoHashString);
+ mortonLongFromGHLong = GeoHashUtils.mortonEncode(geoHashLong);
+ assertEquals(mortonLongFromGHLong, mortonLongFromGHString);
+
+ // v&v lat/lon from geohash string and geohash long
+ assertEquals(GeoUtils.mortonUnhashLat(mortonLongFromGHString), GeoUtils.mortonUnhashLat(mortonLongFromGHLong), 0);
+ assertEquals(GeoUtils.mortonUnhashLon(mortonLongFromGHString), GeoUtils.mortonUnhashLon(mortonLongFromGHLong), 0);
+ }
+ }
+
+ public static double randomLatFullRange() {
+ return (180d * random().nextDouble()) - 90d;
+ }
+
+ public static double randomLonFullRange() {
+ return (360d * random().nextDouble()) - 180d;
+ }
+
+ public static double randomLat() {
+ return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
+ }
+
+ public static double randomLon() {
+ return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
+ }
+}