You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/09/07 00:27:28 UTC
[incubator-sedona] branch shapefile-better-explanation updated: Add the code and test
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch shapefile-better-explanation
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/shapefile-better-explanation by this push:
new 62afbd44 Add the code and test
62afbd44 is described below
commit 62afbd4454c40b56c24c16b17984982bee8522e6
Author: Jia Yu <ji...@apache.org>
AuthorDate: Tue Sep 6 17:27:17 2022 -0700
Add the code and test
---
LICENSE | 9 +++
.../shapefileParser/parseUtils/shp/ShapeType.java | 62 ++++++++++++---------
.../parseUtils/shp/TypeUnknownException.java | 2 +-
.../shapefileParser/shapes/CombineShapeReader.java | 8 ++-
.../shapefileParser/shapes/ShpRecord.java | 6 ++
.../shapes/ShapefileReaderTest.java | 19 ++-----
.../unsupported/UrbAdm3D_142166_Bu_Ground.dbf | Bin 0 -> 6250 bytes
.../unsupported/UrbAdm3D_142166_Bu_Ground.prj | 1 +
.../unsupported/UrbAdm3D_142166_Bu_Ground.shp | Bin 0 -> 9052 bytes
.../unsupported/UrbAdm3D_142166_Bu_Ground.shx | Bin 0 -> 260 bytes
10 files changed, 64 insertions(+), 43 deletions(-)
diff --git a/LICENSE b/LICENSE
index 9daca346..58c092c0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -213,9 +213,18 @@ BSD 2-Clause License
--------------------------------------
zeppelin/index.js (modified based on volume-leaflet: https://github.com/volumeint/helium-volume-leaflet)
+
No-copyright data used in unit tests
--------------------------------------
+UrbIS-Adm 3D from datastore.brussels (Creative Commons CC-0 licence, No copyright)
+---------------
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp
+core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx
+
+
TIGER/Line from United States Census Bureau
---------------
core/src/test/resources/arealm-small.csv
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
index 3b327445..8124a52c 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/ShapeType.java
@@ -22,22 +22,45 @@ package org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.shp;
import org.locationtech.jts.geom.GeometryFactory;
import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
public enum ShapeType
implements Serializable
{
-
- UNDEFINED(0),
- POINT(1),
- POLYLINE(3),
- POLYGON(5),
- MULTIPOINT(8);
+ // The following IDs are defined in Shapefile specification
+ NULL(0, false),
+ POINT(1, true),
+ POLYLINE(3, true),
+ POLYGON(5, true),
+ MULTIPOINT(8, true),
+ POINTZ(11, false),
+ POLYLINEZ(13, false),
+ POLYGONZ(15, false),
+ MULTIPOINTZ(18, false),
+ POINTM(21, false),
+ POLYLINEM(23, false),
+ POLYGONM(25, false),
+ MULTIPOINTM(28, false),
+ MULTIPATCH(31, false),
+ // A normal shapefile should NOT have UNDEFINED type
+ UNDEFINED(-1, false);
private final int id;
+ private final boolean supported;
+ // A lookup map for getting a Type from its id, or its name
+ private static final Map<Integer, ShapeType> lookup = new HashMap<Integer, ShapeType>();
- ShapeType(int id)
+ static {
+ for (ShapeType s : ShapeType.values()) {
+ lookup.put(s.id, s);
+ }
+ }
+
+ ShapeType(int id, boolean supported)
{
this.id = id;
+ this.supported = supported;
}
/**
@@ -48,24 +71,8 @@ public enum ShapeType
*/
public static ShapeType getType(int id)
{
- ShapeType type;
- switch (id) {
- case 1:
- type = POINT;
- break;
- case 3:
- type = POLYLINE;
- break;
- case 5:
- type = POLYGON;
- break;
- case 8:
- type = MULTIPOINT;
- break;
- default:
- type = UNDEFINED;
- }
- return type;
+ ShapeType type = lookup.get(id);
+ return type == null ? UNDEFINED : type;
}
/**
@@ -99,4 +106,9 @@ public enum ShapeType
{
return id;
}
+
+ public boolean isSupported()
+ {
+ return supported;
+ }
}
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
index 45f854f9..287f8243 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/parseUtils/shp/TypeUnknownException.java
@@ -35,6 +35,6 @@ public class TypeUnknownException
*/
public TypeUnknownException(int typeID)
{
- super("Unknown shape type " + typeID);
+ super("Unknown shape type " + ShapeType.getType(typeID).name());
}
}
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
index fca1fbd5..7d8e0754 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/CombineShapeReader.java
@@ -144,11 +144,13 @@ public class CombineShapeReader
boolean hasNextShp = shapeFileReader.nextKeyValue();
if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }
- int curShapeType = shapeFileReader.getCurrentValue().getTypeID();
- while (hasNextShp && ShapeType.getType(curShapeType) == ShapeType.UNDEFINED) {
+ ShapeType curShapeType = shapeFileReader.getCurrentValue().getType();
+ while (hasNextShp && !curShapeType.isSupported()) {
+ logger.warn("[SEDONA] Shapefile type " + curShapeType.name() + " is not supported. Skipped this record." +
+ " Please use QGIS or GeoPandas to convert it to a type listed in ShapeType.java");
if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }
hasNextShp = shapeFileReader.nextKeyValue();
- curShapeType = shapeFileReader.getCurrentValue().getTypeID();
+ curShapeType = shapeFileReader.getCurrentValue().getType();
}
// check if records match in .shp and .dbf
if (hasDbf) {
diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
index 65a2a6bc..b4880093 100644
--- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
+++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShpRecord.java
@@ -20,6 +20,7 @@
package org.apache.sedona.core.formatMapper.shapefileParser.shapes;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.shp.ShapeType;
import java.io.Serializable;
@@ -59,4 +60,9 @@ public class ShpRecord
{
return typeID;
}
+
+ public ShapeType getType()
+ {
+ return ShapeType.getType(typeID);
+ }
}
diff --git a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
index 904381bf..3e2b1cef 100644
--- a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
+++ b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java
@@ -104,23 +104,14 @@ public class ShapefileReaderTest
*
* @throws IOException
*/
- @Ignore
- public void testShapefileEndWithUndefinedType()
+ @Test
+ public void testShapefileEndWithUnsupportedType()
throws IOException
{
- // load shape with geotool.shapefile
- String inputLocation = getShapeFilePath("undefined");
- FeatureCollection<SimpleFeatureType, SimpleFeature> collection = loadFeatures(inputLocation);
- // load shapes with our tool
+ // Read data that is in PolygonZ format
+ String inputLocation = getShapeFilePath("unsupported");
SpatialRDD shapeRDD = ShapefileReader.readToGeometryRDD(sc, inputLocation);
- FeatureIterator<SimpleFeature> features = collection.features();
- int nullNum = 0;
- while (features.hasNext()) {
- SimpleFeature feature = features.next();
- Geometry g = (Geometry) feature.getDefaultGeometry();
- if (g == null) { nullNum++; }
- }
- assertEquals(shapeRDD.getRawSpatialRDD().count(), collection.size() - nullNum);
+ assertEquals(0, shapeRDD.getRawSpatialRDD().count());
}
/**
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf
new file mode 100644
index 00000000..936df456
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf differ
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
new file mode 100644
index 00000000..2c385eb3
--- /dev/null
+++ b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
@@ -0,0 +1 @@
+PROJCS["Belge_Lambert_1972",GEOGCS["GCS_Belge_1972",DATUM["D_Belge_1972",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",150000.01256],PARAMETER["False_Northing",5400088.4378],PARAMETER["Central_Meridian",4.367486666666666],PARAMETER["Standard_Parallel_1",49.8333339],PARAMETER["Standard_Parallel_2",51.16666723333333],PARAMETER["Latitude_Of_Origin",90.0],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp
new file mode 100644
index 00000000..055d2c41
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp differ
diff --git a/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx
new file mode 100644
index 00000000..46fc6156
Binary files /dev/null and b/core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx differ