You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/09/07 03:47:00 UTC

[incubator-sedona] branch master updated: [SEDONA-163] Better handle of unsupported types in shapefile reader (#683)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new c4e37bae [SEDONA-163] Better handle of unsupported types in shapefile reader (#683)
c4e37bae is described below

commit c4e37bae0199d4b7fa22e60c68561715b517a8d1
Author: Jia Yu <ji...@apache.org>
AuthorDate: Tue Sep 6 20:46:50 2022 -0700

    [SEDONA-163] Better handle of unsupported types in shapefile reader (#683)
---
 LICENSE                                            |   9 +++
 .../shapefileParser/parseUtils/shp/ShapeType.java  |  66 +++++++++++++--------
 .../parseUtils/shp/TypeUnknownException.java       |   2 +-
 .../shapefileParser/shapes/CombineShapeReader.java |   8 ++-
 .../shapefileParser/shapes/ShpRecord.java          |   6 ++
 .../shapes/ShapefileReaderTest.java                |  19 ++----
 .../unsupported/UrbAdm3D_142166_Bu_Ground.dbf      | Bin 0 -> 6250 bytes
 .../unsupported/UrbAdm3D_142166_Bu_Ground.prj      |   1 +
 .../unsupported/UrbAdm3D_142166_Bu_Ground.shp      | Bin 0 -> 9052 bytes
 .../unsupported/UrbAdm3D_142166_Bu_Ground.shx      | Bin 0 -> 260 bytes
 10 files changed, 68 insertions(+), 43 deletions(-)

diff --git a/LICENSE b/LICENSE
index 9daca346..58c092c0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -213,9 +213,18 @@ BSD 2-Clause License
 --------------------------------------
 zeppelin/index.js (modified based on volume-leaflet: https://github.com/volumeint/helium-volume-leaflet)
 
+
 No-copyright data used in unit tests
 --------------------------------------
 
+UrbIS-Adm 3D from datastore.brussels (Creative Commons CC-0 licence, No copyright)
+---------------
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx
+
+
 TIGER/Line from United States Census Bureau
 ---------------
 core/src/test/resources/arealm-small.csv
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
index 3b327445..e0036d25 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
@@ -22,22 +22,45 @@ package org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.shp;
 import org.locationtech.jts.geom.GeometryFactory;
 
 import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
 
 public enum ShapeType
         implements Serializable
 {
-
-    UNDEFINED(0),
-    POINT(1),
-    POLYLINE(3),
-    POLYGON(5),
-    MULTIPOINT(8);
+    // The following IDs are defined in Shapefile specification
+    NULL(0, false),
+    POINT(1, true),
+    POLYLINE(3, true),
+    POLYGON(5, true),
+    MULTIPOINT(8, true),
+    POINTZ(11, false),
+    POLYLINEZ(13, false),
+    POLYGONZ(15, false),
+    MULTIPOINTZ(18, false),
+    POINTM(21, false),
+    POLYLINEM(23, false),
+    POLYGONM(25, false),
+    MULTIPOINTM(28, false),
+    MULTIPATCH(31, false),
+    // A normal shapefile should NOT have UNDEFINED type
+    UNDEFINED(-1, false);
 
     private final int id;
+    private final boolean supported;
+    // A lookup map for getting a Type from its id
+    private static final Map<Integer, ShapeType> lookup = new HashMap<Integer, ShapeType>();
+
+    static {
+        for (ShapeType s : ShapeType.values()) {
+            lookup.put(s.id, s);
+        }
+    }
 
-    ShapeType(int id)
+    ShapeType(int id, boolean supported)
     {
         this.id = id;
+        this.supported = supported;
     }
 
     /**
@@ -48,24 +71,8 @@ public enum ShapeType
      */
     public static ShapeType getType(int id)
     {
-        ShapeType type;
-        switch (id) {
-            case 1:
-                type = POINT;
-                break;
-            case 3:
-                type = POLYLINE;
-                break;
-            case 5:
-                type = POLYGON;
-                break;
-            case 8:
-                type = MULTIPOINT;
-                break;
-            default:
-                type = UNDEFINED;
-        }
-        return type;
+        ShapeType type = lookup.get(id);
+        return type == null ? UNDEFINED : type;
     }
 
     /**
@@ -99,4 +106,13 @@ public enum ShapeType
     {
         return id;
     }
+
+    /**
+     * return whether the shape type is supported by Sedona
+     * @return
+     */
+    public boolean isSupported()
+    {
+        return supported;
+    }
 }
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
index 45f854f9..287f8243 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
@@ -35,6 +35,6 @@ public class TypeUnknownException
      */
     public TypeUnknownException(int typeID)
     {
-        super("Unknown shape type " + typeID);
+        super("Unknown shape type " + ShapeType.getType(typeID).name());
     }
 }
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
index fca1fbd5..7d8e0754 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
@@ -144,11 +144,13 @@ public class CombineShapeReader
         boolean hasNextShp = shapeFileReader.nextKeyValue();
         if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }
 
-        int curShapeType = shapeFileReader.getCurrentValue().getTypeID();
-        while (hasNextShp && ShapeType.getType(curShapeType) == ShapeType.UNDEFINED) {
+        ShapeType curShapeType = shapeFileReader.getCurrentValue().getType();
+        while (hasNextShp && !curShapeType.isSupported()) {
+            logger.warn("[SEDONA] Shapefile type " + curShapeType.name() + " is not supported. Skipped this record." +
+                    " Please use QGIS or GeoPandas to convert it to a type listed in ShapeType.java");
             if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }
             hasNextShp = shapeFileReader.nextKeyValue();
-            curShapeType = shapeFileReader.getCurrentValue().getTypeID();
+            curShapeType = shapeFileReader.getCurrentValue().getType();
         }
         // check if records match in .shp and .dbf
         if (hasDbf) {
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
index 65a2a6bc..b4880093 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
@@ -20,6 +20,7 @@
 package org.apache.sedona.core.formatMapper.shapefileParser.shapes;
 
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.shp.ShapeType;
 
 import java.io.Serializable;
 
@@ -59,4 +60,9 @@ public class ShpRecord
     {
         return typeID;
     }
+
+    public ShapeType getType()
+    {
+        return ShapeType.getType(typeID);
+    }
 }
diff --git a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
index 904381bf..3e2b1cef 100644
--- a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
+++ b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
@@ -104,23 +104,14 @@ public class ShapefileReaderTest
      *
      * @throws IOException
      */
-    @Ignore
-    public void testShapefileEndWithUndefinedType()
+    @Test
+    public void testShapefileEndWithUnsupportedType()
             throws IOException
     {
-        // load shape with geotool.shapefile
-        String inputLocation = getShapeFilePath("undefined");
-        FeatureCollection<SimpleFeatureType, SimpleFeature> collection = loadFeatures(inputLocation);
-        // load shapes with our tool
+        // Read data that is in PolygonZ format
+        String inputLocation = getShapeFilePath("unsupported");
         SpatialRDD shapeRDD = ShapefileReader.readToGeometryRDD(sc, inputLocation);
-        FeatureIterator<SimpleFeature> features = collection.features();
-        int nullNum = 0;
-        while (features.hasNext()) {
-            SimpleFeature feature = features.next();
-            Geometry g = (Geometry) feature.getDefaultGeometry();
-            if (g == null) { nullNum++; }
-        }
-        assertEquals(shapeRDD.getRawSpatialRDD().count(), collection.size() - nullNum);
+        assertEquals(0, shapeRDD.getRawSpatialRDD().count());
     }
 
     /**
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf
new file mode 100644
index 00000000..936df456
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf differ
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
new file mode 100644
index 00000000..2c385eb3
--- /dev/null
+++ b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
@@ -0,0 +1 @@
+PROJCS["Belge_Lambert_1972",GEOGCS["GCS_Belge_1972",DATUM["D_Belge_1972",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",150000.01256],PARAMETER["False_Northing",5400088.4378],PARAMETER["Central_Meridian",4.367486666666666],PARAMETER["Standard_Parallel_1",49.8333339],PARAMETER["Standard_Parallel_2",51.16666723333333],PARAMETER["Latitude_Of_Origin",90.0],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp
new file mode 100644
index 00000000..055d2c41
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp differ
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx
new file mode 100644
index 00000000..46fc6156
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx differ