You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2023/02/16 04:05:14 UTC

[sedona] branch master updated: [SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 9b99d24a [SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
9b99d24a is described below

commit 9b99d24a65b1e4467275fb1d632ef5fee1fefce9
Author: Kristin Cowalcijk <bo...@wherobots.com>
AuthorDate: Thu Feb 16 12:05:08 2023 +0800

    [SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
---
 python/tests/__init__.py                                    |  8 +-------
 python/tests/sql/test_geoparquet.py                         |  7 +++++++
 .../datasources/parquet/GeoParquetSchemaConverter.scala     | 13 ++++++-------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/tests/__init__.py b/python/tests/__init__.py
index 1695ee2a..09a5f73e 100644
--- a/python/tests/__init__.py
+++ b/python/tests/__init__.py
@@ -16,16 +16,9 @@
 #  under the License.
 
 import os
-from os import path
 
 from tests.tools import tests_resource
 
-# data_path = path.abspath(path.dirname(__file__))
-#
-#
-# def create_data_path(relative_path: str) -> str:
-#     return os.path.join(data_path, relative_path)
-
 
 mixed_wkb_geometry_input_location = os.path.join(tests_resource, "county_small_wkb.tsv")
 mixed_wkt_geometry_input_location = os.path.join(tests_resource, "county_small.tsv")
@@ -45,3 +38,4 @@ csv_point1_input_location = os.path.join(tests_resource, "equalitycheckfiles/tes
 csv_point2_input_location = os.path.join(tests_resource, "equalitycheckfiles/testequals_point2.csv")
 geojson_id_input_location = os.path.join(tests_resource, "testContainsId.json")
 geoparquet_input_location = os.path.join(tests_resource, "geoparquet/example1.parquet")
+plain_parquet_input_location = os.path.join(tests_resource, "geoparquet/plain.parquet")
diff --git a/python/tests/sql/test_geoparquet.py b/python/tests/sql/test_geoparquet.py
index c12433bd..69a7a9ef 100644
--- a/python/tests/sql/test_geoparquet.py
+++ b/python/tests/sql/test_geoparquet.py
@@ -15,6 +15,7 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+import pytest
 import os.path
 
 from shapely.geometry import Point
@@ -25,6 +26,7 @@ import geopandas
 
 from tests.test_base import TestBase
 from tests import geoparquet_input_location
+from tests import plain_parquet_input_location
 
 
 class TestGeoParquet(TestBase):
@@ -61,3 +63,8 @@ class TestGeoParquet(TestBase):
         rows = df.collect()
         assert len(rows) == 1
         assert rows[0]['name'] == 'Tanzania'
+
+    def test_load_plain_parquet_file(self):
+        with pytest.raises(Exception) as excinfo:
+            self.spark.read.format("geoparquet").load(plain_parquet_input_location)
+        assert "does not contain valid geo metadata" in str(excinfo.value)
diff --git a/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala b/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
index 7afdc043..10d5bfd3 100644
--- a/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
@@ -23,7 +23,6 @@ import org.apache.parquet.schema._
 import org.apache.parquet.schema.OriginalType._
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 import org.apache.parquet.schema.Type.Repetition._
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter.checkConversionRequirement
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
@@ -48,7 +47,7 @@ class GeoParquetToSparkSchemaConverter(
   assumeInt96IsTimestamp: Boolean = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get) {
 
   private val geoParquetMetaData: GeoParquetMetaData = GeoParquetMetaData.parseKeyValueMetaData(keyValueMetaData).getOrElse {
-    throw new AnalysisException("GeoParquet file does not contain valid geo metadata")
+    throw new IllegalArgumentException("GeoParquet file does not contain valid geo metadata")
   }
 
   def this(keyValueMetaData: java.util.Map[String, String], conf: SQLConf) = this(
@@ -106,13 +105,13 @@ class GeoParquetToSparkSchemaConverter(
       if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
 
     def typeNotSupported() =
-      throw new AnalysisException(s"Parquet type not supported: $typeString")
+      throw new IllegalArgumentException(s"Parquet type not supported: $typeString")
 
     def typeNotImplemented() =
-      throw new AnalysisException(s"Parquet type not yet supported: $typeString")
+      throw new IllegalArgumentException(s"Parquet type not yet supported: $typeString")
 
     def illegalType() =
-      throw new AnalysisException(s"Illegal Parquet type: $typeString")
+      throw new IllegalArgumentException(s"Illegal Parquet type: $typeString")
 
     // When maxPrecision = -1, we skip precision range check, and always respect the precision
     // specified in field.getDecimalMetadata.  This is useful when interpreting decimal types stored
@@ -242,7 +241,7 @@ class GeoParquetToSparkSchemaConverter(
           valueContainsNull = valueOptional)
 
       case _ =>
-        throw new AnalysisException(s"Unrecognized Parquet type: $field")
+        throw new IllegalArgumentException(s"Unrecognized Parquet type: $field")
     }
   }
 
@@ -560,7 +559,7 @@ extends SparkToParquetSchemaConverter(writeLegacyParquetFormat, outputTimestampT
         convertField(field.copy(dataType = udt.sqlType))
 
       case _ =>
-        throw new AnalysisException(s"Unsupported data type ${field.dataType.catalogString}")
+        throw new IllegalArgumentException(s"Unsupported data type ${field.dataType.catalogString}")
     }
   }
 }