You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2023/11/16 00:02:52 UTC
(sedona) branch master updated: [SEDONA-425] Add RS_Values and RS_Value to accept grid coordinates (#1122)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new e2d3c0208 [SEDONA-425] Add RS_Values and RS_Value to accept grid coordinates (#1122)
e2d3c0208 is described below
commit e2d3c02081d07f558fb40ba7e02ac6b2680b9d45
Author: Pranav Toggi <pr...@gmail.com>
AuthorDate: Wed Nov 15 19:02:47 2023 -0500
[SEDONA-425] Add RS_Values and RS_Value to accept grid coordinates (#1122)
Co-authored-by: Nilesh Gajwani <ni...@gmail.com>
---
.../sedona/common/raster/PixelFunctions.java | 55 ++++++++++++++++++++--
.../apache/sedona/common/raster/FunctionsTest.java | 37 +++++++++++----
docs/api/sql/Raster-operators.md | 36 +++++++++++---
.../expressions/InferredExpression.scala | 26 ++++++++--
.../sql/sedona_sql/expressions/implicits.scala | 38 +++++++++++++++
.../expressions/raster/PixelFunctions.scala | 40 +++-------------
.../org/apache/sedona/sql/rasteralgebraTest.scala | 18 ++++++-
7 files changed, 193 insertions(+), 57 deletions(-)
diff --git a/common/src/main/java/org/apache/sedona/common/raster/PixelFunctions.java b/common/src/main/java/org/apache/sedona/common/raster/PixelFunctions.java
index ce231037c..c4912d0ee 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/PixelFunctions.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/PixelFunctions.java
@@ -19,6 +19,7 @@
package org.apache.sedona.common.raster;
import org.apache.sedona.common.utils.RasterUtils;
+import org.geotools.coverage.grid.GridCoordinates2D;
import org.geotools.coverage.grid.GridCoverage2D;
import org.geotools.geometry.DirectPosition2D;
import org.locationtech.jts.geom.*;
@@ -41,6 +42,18 @@ public class PixelFunctions
return values(rasterGeom, Collections.singletonList(geometry), band).get(0);
}
+ public static Double value(GridCoverage2D rasterGeom, Geometry geometry) throws TransformException
+ {
+ return values(rasterGeom, Collections.singletonList(geometry), 1).get(0);
+ }
+
+ public static Double value(GridCoverage2D rasterGeom, int colX, int rowY, int band) throws TransformException
+ {
+ int[] xCoordinates = {colX};
+ int[] yCoordinates = {rowY};
+ return values(rasterGeom, xCoordinates, yCoordinates, band).get(0);
+ }
+
public static Geometry getPixelAsPolygon(GridCoverage2D raster, int colX, int rowY) throws TransformException, FactoryException {
int srid = RasterAccessors.srid(raster);
Point2D point2D1 = RasterUtils.getWorldCornerCoordinates(raster, colX, rowY);
@@ -77,12 +90,42 @@ public class PixelFunctions
}
return GEOMETRY_FACTORY.createPoint(pointCoord);
}
- public static List<Double> values(GridCoverage2D rasterGeom, List<Geometry> geometries, int band) throws TransformException {
+
+ public static List<Double> values(GridCoverage2D rasterGeom, int[] xCoordinates, int[] yCoordinates, int band) throws TransformException {
+ RasterUtils.ensureBand(rasterGeom, band); // Check for invalid band index
int numBands = rasterGeom.getNumSampleDimensions();
- if (band < 1 || band > numBands) {
- // Invalid band index. Return nulls.
- return geometries.stream().map(geom -> (Double) null).collect(Collectors.toList());
+
+ double noDataValue = RasterUtils.getNoDataValue(rasterGeom.getSampleDimension(band - 1));
+ List<Double> result = new ArrayList<>(xCoordinates.length);
+ double[] pixelBuffer = new double[numBands];
+
+ for (int i = 0; i < xCoordinates.length; i++) {
+ int x = xCoordinates[i];
+ int y = yCoordinates[i];
+
+ GridCoordinates2D gridCoord = new GridCoordinates2D(x, y);
+
+ try {
+ pixelBuffer = rasterGeom.evaluate(gridCoord, pixelBuffer);
+ double pixelValue = pixelBuffer[band - 1];
+ if (Double.compare(noDataValue, pixelValue) == 0) {
+ result.add(null);
+ } else {
+ result.add(pixelValue);
+ }
+ } catch (PointOutsideCoverageException e) {
+ // Points outside the extent should return null
+ result.add(null);
+ }
}
+
+ return result;
+ }
+
+ public static List<Double> values(GridCoverage2D rasterGeom, List<Geometry> geometries, int band) throws TransformException {
+ RasterUtils.ensureBand(rasterGeom, band); // Check for invalid band index
+ int numBands = rasterGeom.getNumSampleDimensions();
+
double noDataValue = RasterUtils.getNoDataValue(rasterGeom.getSampleDimension(band - 1));
double[] pixelBuffer = new double[numBands];
@@ -110,6 +153,10 @@ public class PixelFunctions
return result;
}
+ public static List<Double> values(GridCoverage2D rasterGeom, List<Geometry> geometries) throws TransformException {
+ return values(rasterGeom, geometries, 1);
+ }
+
private static Point ensurePoint(Geometry geometry) {
if (geometry instanceof Point) {
return (Point) geometry;
diff --git a/common/src/test/java/org/apache/sedona/common/raster/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/raster/FunctionsTest.java
index cd76e669e..d141944eb 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/FunctionsTest.java
@@ -60,8 +60,6 @@ public class FunctionsTest extends RasterTestBase {
@Test
public void value() throws TransformException {
assertNull("Points outside of the envelope should return null.", PixelFunctions.value(oneBandRaster, point(1, 1), 1));
- assertNull("Invalid band should return null.", PixelFunctions.value(oneBandRaster, point(378923, 4072346), 0));
- assertNull("Invalid band should return null.", PixelFunctions.value(oneBandRaster, point(378923, 4072346), 2));
Double value = PixelFunctions.value(oneBandRaster, point(378923, 4072346), 1);
assertNotNull(value);
@@ -70,11 +68,30 @@ public class FunctionsTest extends RasterTestBase {
assertNull("Null should be returned for no data values.", PixelFunctions.value(oneBandRaster, point(378923, 4072376), 1));
}
+ @Test
+ public void valueWithGridCoords() throws TransformException {
+ int insideX = 1;
+ int insideY = 0;
+ int outsideX = 4;
+ int outsideY = 4;
+
+ Double insideValue = PixelFunctions.value(oneBandRaster, insideX, insideY, 1);
+ assertNotNull("Value should not be null for points inside the envelope.", insideValue);
+ assertNull("Points outside of the envelope should return null.", PixelFunctions.value(oneBandRaster, outsideX, outsideY, 1));
+
+ int noDataX = 0;
+ int noDataY = 0;
+
+ assertNull("Null should be returned for no data values.", PixelFunctions.value(oneBandRaster, noDataX, noDataY, 1));
+ }
+
@Test
public void valueWithMultibandRaster() throws TransformException {
// Multiband raster
assertEquals(9d, PixelFunctions.value(multiBandRaster, point(4.5d,4.5d), 3), 0.1d);
assertEquals(255d, PixelFunctions.value(multiBandRaster, point(4.5d,4.5d), 4), 0.1d);
+ assertEquals(4d, PixelFunctions.value(multiBandRaster, 2,2, 3), 0.1d);
+ assertEquals(255d, PixelFunctions.value(multiBandRaster, 3,4, 4), 0.1d);
}
@Test
@@ -175,17 +192,19 @@ public class FunctionsTest extends RasterTestBase {
assertEquals(2, values.size());
assertTrue(values.stream().allMatch(Objects::nonNull));
- values = PixelFunctions.values(oneBandRaster, points, 0);
+ values = PixelFunctions.values(oneBandRaster, Arrays.asList(new Geometry[]{point(378923, 4072346), null}), 1);
assertEquals(2, values.size());
- assertTrue("All values should be null for invalid band index.", values.stream().allMatch(Objects::isNull));
+ assertNull("Null geometries should return null values.", values.get(1));
+ }
- values = PixelFunctions.values(oneBandRaster, points, 2);
- assertEquals(2, values.size());
- assertTrue("All values should be null for invalid band index.", values.stream().allMatch(Objects::isNull));
+ @Test
+ public void valuesWithGridCoords() throws TransformException {
+ int[] xCoordinates = {1, 0};
+ int[] yCoordinates = {0, 1};
- values = PixelFunctions.values(oneBandRaster, Arrays.asList(new Geometry[]{point(378923, 4072346), null}), 1);
+ List<Double> values = PixelFunctions.values(oneBandRaster, xCoordinates, yCoordinates, 1);
assertEquals(2, values.size());
- assertNull("Null geometries should return null values.", values.get(1));
+ assertTrue(values.stream().allMatch(Objects::nonNull));
}
private Point point(double x, double y) {
diff --git a/docs/api/sql/Raster-operators.md b/docs/api/sql/Raster-operators.md
index 8eef3a2be..697c21af0 100644
--- a/docs/api/sql/Raster-operators.md
+++ b/docs/api/sql/Raster-operators.md
@@ -1548,8 +1548,7 @@ Output:
### RS_Value
-Introduction: Returns the value at the given point in the raster.
-If no band number is specified it defaults to 1.
+Introduction: Returns the value at the given point in the raster. If no band number is specified it defaults to 1.
Format:
@@ -1557,14 +1556,27 @@ Format:
`RS_Value (raster: Raster, point: Geometry, band: Integer)`
+`RS_Value (raster: Raster, colX: Integer, colY: Integer, band: Integer)`
+
Since: `v1.4.0`
-Spark SQL Example:
+!!!Note
+ The input geometry points must be in the same CRS as the raster. Ensure that all points' CRS matches the raster's CRS to get accurate values.
+
+Spark SQL Examples:
+
+- For Point Geometry:
```sql
SELECT RS_Value(raster, ST_Point(-13077301.685, 4002565.802)) FROM raster_table
```
+- For Grid Coordinates:
+
+```sql
+SELECT RS_Value(raster, 3, 4, 1) FROM raster_table
+```
+
Output:
```
@@ -1573,10 +1585,9 @@ Output:
### RS_Values
-Introduction: Returns the values at the given points in the raster.
-If no band number is specified it defaults to 1.
+Introduction: Returns the values at the given points or grid coordinates in the raster. If no band number is specified it defaults to 1.
-RS_Values is similar to RS_Value but operates on an array of points.
+RS_Values is similar to RS_Value but operates on an array of points or grid coordinates.
RS_Values can be significantly faster since a raster only has to be loaded once for several points.
Format:
@@ -1585,15 +1596,28 @@ Format:
`RS_Values (raster: Raster, points: ARRAY[Geometry], band: Integer)`
+`RS_Values (raster: Raster, xCoordinates: ARRAY[Integer], yCoordinates: ARRAY[Integer], band: Integer)`
+
Since: `v1.4.0`
+!!!Note
+ The input geometry points must be in the same CRS as the raster. Ensure that all points' CRS matches the raster's CRS to get accurate values.
+
Spark SQL Example:
+- For Array of Point geometries:
+
```sql
SELECT RS_Values(raster, Array(ST_Point(-1307.5, 400.8), ST_Point(-1403.3, 399.1)))
FROM raster_table
```
+- For Arrays of grid coordinates:
+
+```sql
+SELECT RS_Values(raster, Array(4, 5), Array(3, 2), 1) FROM raster_table
+```
+
Output:
```
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala
index 4bb91c876..767d663ce 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.sedona_sql.expressions.implicits._
import org.apache.spark.sql.sedona_sql.expressions.raster.implicits._
import org.geotools.coverage.grid.GridCoverage2D
+import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
import scala.reflect.runtime.universe.TypeTag
import scala.reflect.runtime.universe.Type
import scala.reflect.runtime.universe.typeOf
@@ -114,6 +115,10 @@ object InferrableType {
new InferrableType[Array[java.lang.Long]] {}
implicit val doubleArrayInstance: InferrableType[Array[Double]] =
new InferrableType[Array[Double]] {}
+ implicit val javaDoubleListInstance: InferrableType[java.util.List[java.lang.Double]] =
+ new InferrableType[java.util.List[java.lang.Double]] {}
+ implicit val javaGeomListInstance: InferrableType[java.util.List[Geometry]] =
+ new InferrableType[java.util.List[Geometry]] {}
}
object InferredTypes {
@@ -135,6 +140,10 @@ object InferredTypes {
case null => null
case arrayData: ArrayData => arrayData.toIntArray()
}
+ } else if (t =:= typeOf[java.util.List[Geometry]]) {
+ expr => input => expr.toGeometryList(input)
+ } else if (t =:= typeOf[java.util.List[java.lang.Double]]) {
+ expr => input => expr.toDoubleList(input)
} else {
expr => input => expr.eval(input)
}
@@ -162,14 +171,23 @@ object InferredTypes {
} else {
null
}
- } else if (t =:= typeOf[Array[java.lang.Long]] || t =:= typeOf[Array[Long]] || t =:= typeOf[Array[Double]]) {
+ } else if (t =:= typeOf[Array[java.lang.Long]] || t =:= typeOf[Array[Long]] ||
+ t =:= typeOf[Array[Double]]) {
output =>
if (output != null) {
ArrayData.toArrayData(output)
} else {
null
}
- } else if (t =:= typeOf[Array[Geometry]]) {
+ }else if (t =:= typeOf[java.util.List[java.lang.Double]]) {
+ output =>
+ if (output != null) {
+ ArrayData.toArrayData(output.asInstanceOf[java.util.List[java.lang.Double]].map(elem => elem))
+ }else {
+ null
+ }
+ }
+ else if (t =:= typeOf[Array[Geometry]] || t =:= typeOf[java.util.List[Geometry]]) {
output =>
if (output != null) {
ArrayData.toArrayData(output.asInstanceOf[Array[Geometry]].map(_.toGenericArrayData))
@@ -191,7 +209,7 @@ object InferredTypes {
def inferSparkType(t: Type): DataType = {
if (t =:= typeOf[Geometry]) {
GeometryUDT
- } else if (t =:= typeOf[Array[Geometry]]) {
+ } else if (t =:= typeOf[Array[Geometry]] || t =:= typeOf[java.util.List[Geometry]]) {
DataTypes.createArrayType(GeometryUDT)
} else if (t =:= typeOf[GridCoverage2D]) {
RasterUDT
@@ -215,7 +233,7 @@ object InferredTypes {
DataTypes.createArrayType(IntegerType)
} else if (t =:= typeOf[Array[Long]] || t =:= typeOf[Array[java.lang.Long]]) {
DataTypes.createArrayType(LongType)
- } else if (t =:= typeOf[Array[Double]]) {
+ } else if (t =:= typeOf[Array[Double]] || t =:= typeOf[java.util.List[java.lang.Double]]) {
DataTypes.createArrayType(DoubleType)
} else if (t =:= typeOf[Option[Boolean]]) {
BooleanType
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala
index 5b5f02026..85719ce5a 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala
@@ -27,6 +27,8 @@ import org.apache.spark.sql.types.{ByteType, DataTypes}
import org.apache.spark.unsafe.types.UTF8String
import org.locationtech.jts.geom.{Geometry, GeometryFactory, Point}
+import java.util
+
object implicits {
implicit class InputExpressionEnhancer(inputExpression: Expression) {
@@ -59,6 +61,42 @@ object implicits {
}
}
+ def toDoubleList(input: InternalRow): java.util.List[java.lang.Double] = {
+ inputExpression match {
+ case aware: SerdeAware =>
+ aware.evalWithoutSerialization(input).asInstanceOf[java.util.List[java.lang.Double]]
+ case _ =>
+ inputExpression.eval(input).asInstanceOf[ArrayData] match {
+ case arrayData: ArrayData =>
+ val length = arrayData.numElements()
+ val doubleList = new java.util.ArrayList[java.lang.Double]()
+ for (i <- 0 until length) {
+ doubleList.add(arrayData.getDouble(i))
+ }
+ doubleList.asInstanceOf[java.util.List[java.lang.Double]]
+ case _ => null
+ }
+ }
+ }
+
+ def toGeometryList(input: InternalRow): java.util.List[Geometry] = {
+ inputExpression match {
+ case aware: SerdeAware =>
+ aware.evalWithoutSerialization(input).asInstanceOf[java.util.List[Geometry]]
+ case _ =>
+ inputExpression.eval(input).asInstanceOf[ArrayData] match {
+ case arrayData: ArrayData =>
+ val length = arrayData.numElements()
+ val geometries = new java.util.ArrayList[Geometry]()
+ for (i <- 0 until length) {
+ geometries.add(arrayData.getBinary(i).toGeometry)
+ }
+ geometries.asInstanceOf[java.util.List[Geometry]]
+ case _ => null
+ }
+ }
+ }
+
def toInt(input: InternalRow): Int = {
inputExpression.eval(input).asInstanceOf[Int]
}
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctions.scala
index df89f572c..9cde85f71 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctions.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctions.scala
@@ -18,19 +18,14 @@
*/
package org.apache.spark.sql.sedona_sql.expressions.raster
-import org.apache.sedona.common.geometrySerde.GeometrySerializer
import org.apache.sedona.common.raster.PixelFunctions
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
-import org.apache.spark.sql.sedona_sql.UDT.{GeometryUDT, RasterUDT}
-import org.apache.spark.sql.sedona_sql.expressions.raster.implicits.RasterInputExpressionEnhancer
-import org.apache.spark.sql.types.{AbstractDataType, ArrayType, DataType, DoubleType, IntegerType}
+import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
import org.apache.spark.sql.sedona_sql.expressions.InferredExpression
-case class RS_Value(inputExpressions: Seq[Expression]) extends InferredExpression(PixelFunctions.value _) {
+case class RS_Value(inputExpressions: Seq[Expression]) extends InferredExpression(
+ inferrableFunction2(PixelFunctions.value), inferrableFunction3(PixelFunctions.value), inferrableFunction4(PixelFunctions.value)
+) {
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
@@ -54,31 +49,10 @@ case class RS_PixelAsCentroid(inputExpressions: Seq[Expression]) extends Inferre
}
}
-case class RS_Values(inputExpressions: Seq[Expression]) extends Expression with CodegenFallback with ExpectsInputTypes {
-
- override def nullable: Boolean = true
-
- override def dataType: DataType = ArrayType(DoubleType)
-
- override def eval(input: InternalRow): Any = {
- val raster = inputExpressions(0).toRaster(input)
- val serializedGeometries = inputExpressions(1).eval(input).asInstanceOf[ArrayData]
- val band = inputExpressions(2).eval(input).asInstanceOf[Int]
- if (raster == null || serializedGeometries == null) {
- null
- } else {
- val geometries = (0 until serializedGeometries.numElements()).map {
- i => Option(serializedGeometries.getBinary(i)).map(GeometrySerializer.deserialize).orNull
- }
- new GenericArrayData(PixelFunctions.values(raster, java.util.Arrays.asList(geometries:_*), band).toArray)
- }
- }
-
- override def children: Seq[Expression] = inputExpressions
-
+case class RS_Values(inputExpressions: Seq[Expression]) extends InferredExpression(
+ inferrableFunction2(PixelFunctions.values), inferrableFunction3(PixelFunctions.values), inferrableFunction4(PixelFunctions.values)
+) {
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
-
- override def inputTypes: Seq[AbstractDataType] = Seq(RasterUDT, ArrayType(GeometryUDT), IntegerType)
}
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
index 6e77a1ba7..d855e145d 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
@@ -497,6 +497,12 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen
assert(result == 255d)
}
+ it("Passed RS_Value with raster and coordinates") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/test1.tiff")
+ val result = df.selectExpr("RS_Value(RS_FromGeoTiff(content), 4, 4, 1)").first().getDouble(0)
+ assert(result == 123d)
+ }
+
it("Passed RS_Values should handle null values") {
val result = sparkSession.sql("select RS_Values(null, null)").first().get(0)
assert(result == null)
@@ -520,13 +526,23 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen
.withColumn("point", expr("ST_GeomFromText(point)"))
.groupBy().agg(collect_list("point").alias("point"))
- val result = df.crossJoin(points).selectExpr("RS_Values(RS_FromGeoTiff(content), point)").first().getList[Any](0)
+ val result = df.crossJoin(points).selectExpr("RS_Values(RS_FromGeoTiff(content), point, 1)").first().getList[Any](0)
assert(result.size() == 2)
assert(result.get(0) == 255d)
assert(result.get(1) == null)
}
+ it("Passed RS_Values with raster and Grid Coordinates") {
+ val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/test1.tiff")
+ val result = df.selectExpr("RS_Values(RS_FromGeoTiff(content), array(1,2), array(3,2), 1)")
+ .first().getList[Double](0)
+
+ assert(result.size() == 2)
+ assert(result.get(0) == 132.0)
+ assert(result.get(1) == 132.0)
+ }
+
it("Passed RS_Clip with raster") {
val df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster_geotiff_color/FAA_UTM18N_NAD83.tif")
.selectExpr("RS_FromGeoTiff(content) as raster",