You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/08/01 12:08:27 UTC

svn commit: r1693700 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/ sandbox/src/java/org/apache/lucene/util/ sandbox/src/test/org/apache/lucene/search/ sandbox/src/test/org/apache/lucene/util/

Author: mikemccand
Date: Sat Aug  1 10:08:26 2015
New Revision: 1693700

URL: http://svn.apache.org/r1693700
Log:
LUCENE-6647: add GeoHash string utility APIs

Added:
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java   (with props)
    lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/
    lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
    lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sat Aug  1 10:08:26 2015
@@ -147,6 +147,9 @@ New Features
   filtering.  Range trees can also handle values larger than 64 bits.
   (Adrien Grand, Mike McCandless)
 
+* LUCENE-6647: Add GeoHash string utility APIs (Nick Knize, Mike
+  McCandless).
+
 API Changes
 
 * LUCENE-6508: Simplify Lock api, there is now just 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Sat Aug  1 10:08:26 2015
@@ -26,7 +26,8 @@ public final class BitUtil {
   private static final long MAGIC[] = {
       0x5555555555555555L, 0x3333333333333333L,
       0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL,
-      0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL
+      0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL,
+      0xAAAAAAAAAAAAAAAAL
   };
   // shift values for bit interleaving
   private static final short SHIFT[] = {1, 2, 4, 8, 16};
@@ -144,6 +145,13 @@ public final class BitUtil {
     return b;
   }
 
+  /**
+   * flip flops odd with even bits
+   */
+  public static final long flipFlop(final long b) {
+    return ((b & MAGIC[6]) >>> 1) | ((b & MAGIC[0]) << 1 );
+  }
+
    /** Same as {@link #zigZagEncode(long)} but on integers. */
    public static int zigZagEncode(int i) {
      return (i >> 31) ^ (i << 1);

Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java?rev=1693700&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/util/GeoHashUtils.java Sat Aug  1 10:08:26 2015
@@ -0,0 +1,153 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities for converting to/from the GeoHash standard
+ *
+ * The geohash long format is represented as lon/lat (x/y) interleaved with the 4 least significant bits
+ * representing the level (1-12) [xyxy...xyxyllll]
+ *
+ * This differs from a morton encoded value which interleaves lat/lon (y/x).
+ *
+ * @lucene.experimental
+ */
+public class GeoHashUtils {
+  public static final char[] BASE_32 = {'0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
+      'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
+
+  public static final String BASE_32_STRING = new String(BASE_32);
+
+  public static final int PRECISION = 12;
+  private static final short MORTON_OFFSET = (GeoUtils.BITS<<1) - (PRECISION*5);
+
+  /**
+   * Encode lon/lat to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
+   */
+  public static final long longEncode(final double lon, final double lat, final int level) {
+    // shift to appropriate level
+    final short msf = (short)(((12 - level) * 5) + MORTON_OFFSET);
+    return ((BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat)) >>> msf) << 4) | level;
+  }
+
+  /**
+   * Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
+   */
+  public static final long longEncode(final String hash) {
+    int level = hash.length()-1;
+    long b;
+    long l = 0L;
+    for(char c : hash.toCharArray()) {
+      b = (long)(BASE_32_STRING.indexOf(c));
+      l |= (b<<(level--*5));
+    }
+    return (l<<4)|hash.length();
+  }
+
+  /**
+   * Encode to a geohash string from the geohash based long format
+   */
+  public static final String stringEncode(long geoHashLong) {
+    int level = (int)geoHashLong&15;
+    geoHashLong >>>= 4;
+    char[] chars = new char[level];
+    do {
+      chars[--level] = BASE_32[(int)(geoHashLong&31L)];
+      geoHashLong>>>=5;
+    } while(level > 0);
+
+    return new String(chars);
+  }
+
+  /**
+   * Encode to a geohash string from full resolution longitude, latitude)
+   */
+  public static final String stringEncode(final double lon, final double lat) {
+    return stringEncode(lon, lat, 12);
+  }
+
+  /**
+   * Encode to a level specific geohash string from full resolution longitude, latitude
+   */
+  public static final String stringEncode(final double lon, final double lat, final int level) {
+    // bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
+    final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
+
+    StringBuilder geoHash = new StringBuilder();
+    short precision = 0;
+    final short msf = (GeoUtils.BITS<<1)-5;
+    long mask = 31L<<msf;
+    do {
+      geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
+      // next 5 bits
+      mask >>>= 5;
+    } while (++precision < level);
+    return geoHash.toString();
+  }
+
+  /**
+   * Encode to a full precision geohash string from a given morton encoded long value
+   */
+  public static final String stringEncodeFromMortonLong(final long hashedVal) throws Exception {
+    return stringEncode(hashedVal, PRECISION);
+  }
+
+  /**
+   * Encode to a geohash string at a given level from a morton long
+   */
+  public static final String stringEncodeFromMortonLong(long hashedVal, final int level) {
+    // bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
+    hashedVal = BitUtil.flipFlop(hashedVal);
+
+    StringBuilder geoHash = new StringBuilder();
+    short precision = 0;
+    final short msf = (GeoUtils.BITS<<1)-5;
+    long mask = 31L<<msf;
+    do {
+      geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
+      // next 5 bits
+      mask >>>= 5;
+    } while (++precision < level);
+    return geoHash.toString();
+  }
+
+  /**
+   * Encode to a morton long value from a given geohash string
+   */
+  public static final long mortonEncode(final String hash) {
+    int level = 11;
+    long b;
+    long l = 0L;
+    for(char c : hash.toCharArray()) {
+      b = (long)(BASE_32_STRING.indexOf(c));
+      l |= (b<<((level--*5) + MORTON_OFFSET));
+    }
+    return BitUtil.flipFlop(l);
+  }
+
+  /**
+   * Encode to a morton long value from a given geohash long value
+   */
+  public static final long mortonEncode(final long geoHashLong) {
+    final int level = (int)(geoHashLong&15);
+    final short odd = (short)(level & 1);
+
+    return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
+  }
+}

Modified: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java?rev=1693700&r1=1693699&r2=1693700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/search/TestGeoPointQuery.java Sat Aug  1 10:08:26 2015
@@ -46,6 +46,7 @@ import org.apache.lucene.util.GeoUtils;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.SloppyMath;
+import org.apache.lucene.util.TestGeoUtils;
 import org.apache.lucene.util.TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -68,37 +69,10 @@ public class TestGeoPointQuery extends L
   // determining the possible haversine error
   private static final int DISTANCE_ERR = 1000;
 
-  // Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
-  private static double originLat;
-  private static double originLon;
-//  private static double range;
-  private static double lonRange;
-  private static double latRange;
-
   @BeforeClass
   public static void beforeClass() throws Exception {
     directory = newDirectory();
 
-    // when we randomly test the full lat/lon space it can result in very very slow query times, this is due to the
-    // number of ranges that can be created in degenerate cases.
-
-    // Between 1.0 and 3.0:
-//    range = 2*(random().nextDouble() + 0.5);
-    // Between 1.0 and 90.0
-    //lonRange = 1.0 + (90.0 - 1.0) * random().nextDouble();
-    //latRange = 1.0 + (45.0 - 1.0) * random().nextDouble();
-
-    // Between 1.0 and 3.0:
-    lonRange = 2*(random().nextDouble() + 0.5);
-    latRange = 2*(random().nextDouble() + 0.5);
-
-    originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2*lonRange) * random().nextDouble();
-    originLon = GeoUtils.normalizeLon(originLon);
-    originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2*latRange) * random().nextDouble();
-    originLat = GeoUtils.normalizeLat(originLat);
-    if (VERBOSE) {
-      System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
-    }
     RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
             newIndexWriterConfig(new MockAnalyzer(random()))
                     .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
@@ -306,13 +280,13 @@ public class TestGeoPointQuery extends L
         if (x == 0) {
           // Identical lat to old point
           lats[docID] = lats[oldDocID];
-          lons[docID] = randomLon();
+          lons[docID] = TestGeoUtils.randomLon();
           if (VERBOSE) {
             //System.out.println("  doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
           }
         } else if (x == 1) {
           // Identical lon to old point
-          lats[docID] = randomLat();
+          lats[docID] = TestGeoUtils.randomLat();
           lons[docID] = lons[oldDocID];
           if (VERBOSE) {
             //System.out.println("  doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
@@ -327,8 +301,8 @@ public class TestGeoPointQuery extends L
           }
         }
       } else {
-        lats[docID] = randomLat();
-        lons[docID] = randomLon();
+        lats[docID] = TestGeoUtils.randomLat();
+        lons[docID] = TestGeoUtils.randomLon();
         haveRealDoc = true;
         if (VERBOSE) {
           //System.out.println("  doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
@@ -618,19 +592,11 @@ public class TestGeoPointQuery extends L
          || (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
   }
 
-  private static double randomLat() {
-    return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
-  }
-
-  private static double randomLon() {
-    return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
-  }
-
   private static GeoBoundingBox randomBBox() {
-    double lat0 = randomLat();
-    double lat1 = randomLat();
-    double lon0 = randomLon();
-    double lon1 = randomLon();
+    double lat0 = TestGeoUtils.randomLat();
+    double lat1 = TestGeoUtils.randomLat();
+    double lon0 = TestGeoUtils.randomLon();
+    double lon1 = TestGeoUtils.randomLon();
 
     if (lat1 < lat0) {
       double x = lat0;

Added: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java?rev=1693700&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/util/TestGeoUtils.java Sat Aug  1 10:08:26 2015
@@ -0,0 +1,100 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests class for methods in GeoUtils
+ *
+ * @lucene.experimental
+ */
+public class TestGeoUtils extends LuceneTestCase {
+
+  // Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
+  private static double originLat;
+  private static double originLon;
+  //  private static double range;
+  private static double lonRange;
+  private static double latRange;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    // Between 1.0 and 3.0:
+    lonRange = 2 * (random().nextDouble() + 0.5);
+    latRange = 2 * (random().nextDouble() + 0.5);
+
+    originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2 * lonRange) * random().nextDouble();
+    originLon = GeoUtils.normalizeLon(originLon);
+    originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble();
+    originLat = GeoUtils.normalizeLat(originLat);
+
+    if (VERBOSE) {
+      System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
+    }
+  }
+
+  @Test
+  public void testGeoHash() {
+    int numPoints = atLeast(100);
+    String randomGeoHashString;
+    String mortonGeoHash;
+    long mortonLongFromGHLong, geoHashLong, mortonLongFromGHString;
+    int randomLevel;
+    for (int i = 0; i < numPoints; ++i) {
+      // random point
+      double lat = randomLatFullRange();
+      double lon = randomLonFullRange();
+
+      // compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
+      randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
+      mortonGeoHash = GeoHashUtils.stringEncodeFromMortonLong(GeoUtils.mortonHash(lon, lat), randomLevel);
+      assertEquals(randomGeoHashString, mortonGeoHash);
+
+      // v&v conversion from lat/lon or geohashstring to geohash long and back to geohash string
+      geoHashLong = (random().nextBoolean()) ? GeoHashUtils.longEncode(lon, lat, randomLevel) : GeoHashUtils.longEncode(randomGeoHashString);
+      assertEquals(randomGeoHashString, GeoHashUtils.stringEncode(geoHashLong));
+
+      // v&v conversion from geohash long to morton long
+      mortonLongFromGHString = GeoHashUtils.mortonEncode(randomGeoHashString);
+      mortonLongFromGHLong = GeoHashUtils.mortonEncode(geoHashLong);
+      assertEquals(mortonLongFromGHLong, mortonLongFromGHString);
+
+      // v&v lat/lon from geohash string and geohash long
+      assertEquals(GeoUtils.mortonUnhashLat(mortonLongFromGHString), GeoUtils.mortonUnhashLat(mortonLongFromGHLong), 0);
+      assertEquals(GeoUtils.mortonUnhashLon(mortonLongFromGHString), GeoUtils.mortonUnhashLon(mortonLongFromGHLong), 0);
+    }
+  }
+
+  public static double randomLatFullRange() {
+    return (180d * random().nextDouble()) - 90d;
+  }
+
+  public static double randomLonFullRange() {
+    return (360d * random().nextDouble()) - 180d;
+  }
+
+  public static double randomLat() {
+    return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
+  }
+
+  public static double randomLon() {
+    return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
+  }
+}