You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2023/02/16 04:05:14 UTC
[sedona] branch master updated: [SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 9b99d24a [SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
9b99d24a is described below
commit 9b99d24a65b1e4467275fb1d632ef5fee1fefce9
Author: Kristin Cowalcijk <bo...@wherobots.com>
AuthorDate: Thu Feb 16 12:05:08 2023 +0800
[SEDONA-226] Replace AnalysisException with IllegalArgumentException in GeoParquet reader since AnalysisException has breaking ABI changes between Spark 3.0 and 3.3 (#771)
---
python/tests/__init__.py | 8 +-------
python/tests/sql/test_geoparquet.py | 7 +++++++
.../datasources/parquet/GeoParquetSchemaConverter.scala | 13 ++++++-------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/python/tests/__init__.py b/python/tests/__init__.py
index 1695ee2a..09a5f73e 100644
--- a/python/tests/__init__.py
+++ b/python/tests/__init__.py
@@ -16,16 +16,9 @@
# under the License.
import os
-from os import path
from tests.tools import tests_resource
-# data_path = path.abspath(path.dirname(__file__))
-#
-#
-# def create_data_path(relative_path: str) -> str:
-# return os.path.join(data_path, relative_path)
-
mixed_wkb_geometry_input_location = os.path.join(tests_resource, "county_small_wkb.tsv")
mixed_wkt_geometry_input_location = os.path.join(tests_resource, "county_small.tsv")
@@ -45,3 +38,4 @@ csv_point1_input_location = os.path.join(tests_resource, "equalitycheckfiles/tes
csv_point2_input_location = os.path.join(tests_resource, "equalitycheckfiles/testequals_point2.csv")
geojson_id_input_location = os.path.join(tests_resource, "testContainsId.json")
geoparquet_input_location = os.path.join(tests_resource, "geoparquet/example1.parquet")
+plain_parquet_input_location = os.path.join(tests_resource, "geoparquet/plain.parquet")
diff --git a/python/tests/sql/test_geoparquet.py b/python/tests/sql/test_geoparquet.py
index c12433bd..69a7a9ef 100644
--- a/python/tests/sql/test_geoparquet.py
+++ b/python/tests/sql/test_geoparquet.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import pytest
import os.path
from shapely.geometry import Point
@@ -25,6 +26,7 @@ import geopandas
from tests.test_base import TestBase
from tests import geoparquet_input_location
+from tests import plain_parquet_input_location
class TestGeoParquet(TestBase):
@@ -61,3 +63,8 @@ class TestGeoParquet(TestBase):
rows = df.collect()
assert len(rows) == 1
assert rows[0]['name'] == 'Tanzania'
+
+ def test_load_plain_parquet_file(self):
+ with pytest.raises(Exception) as excinfo:
+ self.spark.read.format("geoparquet").load(plain_parquet_input_location)
+ assert "does not contain valid geo metadata" in str(excinfo.value)
diff --git a/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala b/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
index 7afdc043..10d5bfd3 100644
--- a/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSchemaConverter.scala
@@ -23,7 +23,6 @@ import org.apache.parquet.schema._
import org.apache.parquet.schema.OriginalType._
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
import org.apache.parquet.schema.Type.Repetition._
-import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter.checkConversionRequirement
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
@@ -48,7 +47,7 @@ class GeoParquetToSparkSchemaConverter(
assumeInt96IsTimestamp: Boolean = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get) {
private val geoParquetMetaData: GeoParquetMetaData = GeoParquetMetaData.parseKeyValueMetaData(keyValueMetaData).getOrElse {
- throw new AnalysisException("GeoParquet file does not contain valid geo metadata")
+ throw new IllegalArgumentException("GeoParquet file does not contain valid geo metadata")
}
def this(keyValueMetaData: java.util.Map[String, String], conf: SQLConf) = this(
@@ -106,13 +105,13 @@ class GeoParquetToSparkSchemaConverter(
if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
def typeNotSupported() =
- throw new AnalysisException(s"Parquet type not supported: $typeString")
+ throw new IllegalArgumentException(s"Parquet type not supported: $typeString")
def typeNotImplemented() =
- throw new AnalysisException(s"Parquet type not yet supported: $typeString")
+ throw new IllegalArgumentException(s"Parquet type not yet supported: $typeString")
def illegalType() =
- throw new AnalysisException(s"Illegal Parquet type: $typeString")
+ throw new IllegalArgumentException(s"Illegal Parquet type: $typeString")
// When maxPrecision = -1, we skip precision range check, and always respect the precision
// specified in field.getDecimalMetadata. This is useful when interpreting decimal types stored
@@ -242,7 +241,7 @@ class GeoParquetToSparkSchemaConverter(
valueContainsNull = valueOptional)
case _ =>
- throw new AnalysisException(s"Unrecognized Parquet type: $field")
+ throw new IllegalArgumentException(s"Unrecognized Parquet type: $field")
}
}
@@ -560,7 +559,7 @@ extends SparkToParquetSchemaConverter(writeLegacyParquetFormat, outputTimestampT
convertField(field.copy(dataType = udt.sqlType))
case _ =>
- throw new AnalysisException(s"Unsupported data type ${field.dataType.catalogString}")
+ throw new IllegalArgumentException(s"Unsupported data type ${field.dataType.catalogString}")
}
}
}