You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2023/11/22 15:54:16 UTC
(sedona) branch master updated: [SEDONA-428] Add RS_ZonalStats & RS_ZonalStatsAll (#1124)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 1030d0b6f [SEDONA-428] Add RS_ZonalStats & RS_ZonalStatsAll (#1124)
1030d0b6f is described below
commit 1030d0b6f2555fc06d16fd93729949efe9224ffe
Author: Furqaan Khan <46...@users.noreply.github.com>
AuthorDate: Wed Nov 22 10:54:09 2023 -0500
[SEDONA-428] Add RS_ZonalStats & RS_ZonalStatsAll (#1124)
---
common/pom.xml | 4 +
.../sedona/common/raster/RasterBandAccessors.java | 191 +++++++++++++++++++++
.../sedona/common/raster/RasterConstructors.java | 87 +++++++---
.../common/raster/RasterBandAccessorsTest.java | 113 ++++++++++++
.../common/raster/RasterConstructorsTest.java | 14 ++
.../sedona/common/raster/RasterTestBase.java | 2 +
docs/api/sql/Raster-operators.md | 125 ++++++++++++++
pom.xml | 5 +
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 2 +
.../expressions/raster/RasterBandAccessors.scala | 19 ++
.../org/apache/sedona/sql/rasteralgebraTest.scala | 54 ++++++
11 files changed, 594 insertions(+), 22 deletions(-)
diff --git a/common/pom.xml b/common/pom.xml
index 5923dc794..2f2a7211e 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -37,6 +37,10 @@
</properties>
<dependencies>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
diff --git a/common/src/main/java/org/apache/sedona/common/raster/RasterBandAccessors.java b/common/src/main/java/org/apache/sedona/common/raster/RasterBandAccessors.java
index 2505ba2f7..d1a0b2f42 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/RasterBandAccessors.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/RasterBandAccessors.java
@@ -18,16 +18,23 @@
*/
package org.apache.sedona.common.raster;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.math3.stat.StatUtils;
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.sedona.common.Functions;
import org.apache.sedona.common.utils.RasterUtils;
import org.geotools.coverage.GridSampleDimension;
import org.geotools.coverage.grid.GridCoverage2D;
+import org.locationtech.jts.geom.Geometry;
import org.opengis.referencing.FactoryException;
import javax.media.jai.RasterFactory;
import java.awt.image.Raster;
import java.awt.image.WritableRaster;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
public class RasterBandAccessors {
@@ -81,6 +88,190 @@ public class RasterBandAccessors {
// return getCount(raster, 1, excludeNoDataValue);
// }
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param band Band to be used for computation
+ * @param excludeNoData Specifies whether to exclude no-data value or not
+ * @return An array with all the stats for the region
+ * @throws FactoryException
+ */
+ public static double[] getZonalStatsAll(GridCoverage2D raster, Geometry roi, int band, boolean excludeNoData) throws FactoryException {
+ List<Object> objects = getStatObjects(raster, roi, band, excludeNoData);
+ DescriptiveStatistics stats = (DescriptiveStatistics) objects.get(0);
+ double[] pixelData = (double[]) objects.get(1);
+
+ // order of stats
+ // count, sum, mean, median, mode, stddev, variance, min, max
+ double[] result = new double[9];
+ result[0] = stats.getN();
+ result[1] = stats.getSum();
+ result[2] = stats.getMean();
+ result[3] = stats.getPercentile(50);
+ result[4] = zonalMode(pixelData);
+ result[5] = stats.getStandardDeviation();
+ result[6] = stats.getVariance();
+ result[7] = stats.getMin();
+ result[8] = stats.getMax();
+
+ return result;
+ }
+
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param band Band to be used for computation
+ * @return An array with all the stats for the region, excludeNoData is set to true
+ * @throws FactoryException
+ */
+ public static double[] getZonalStatsAll(GridCoverage2D raster, Geometry roi, int band) throws FactoryException {
+ return getZonalStatsAll(raster, roi, band, true);
+ }
+
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @return An array with all the stats for the region, excludeNoData is set to true and band is set to 1
+ * @throws FactoryException
+ */
+ public static double[] getZonalStatsAll(GridCoverage2D raster, Geometry roi) throws FactoryException {
+ return getZonalStatsAll(raster, roi, 1, true);
+ }
+
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param band Band to be used for computation
+ * @param statType Define the statistic to be computed
+ * @param excludeNoData Specifies whether to exclude no-data value or not
+ * @return A double precision floating point number representing the requested statistic calculated over the specified region.
+ * @throws FactoryException
+ */
+ public static double getZonalStats(GridCoverage2D raster, Geometry roi, int band, String statType, boolean excludeNoData) throws FactoryException {
+
+ List<Object> objects = getStatObjects(raster, roi, band, excludeNoData);
+ DescriptiveStatistics stats = (DescriptiveStatistics) objects.get(0);
+ double[] pixelData = (double[]) objects.get(1);
+
+ switch (statType.toLowerCase()) {
+ case "sum":
+ return stats.getSum();
+ case "average":
+ case "avg":
+ case "mean":
+ return stats.getMean();
+ case "count":
+ return stats.getN();
+ case "max":
+ return stats.getMax();
+ case "min":
+ return stats.getMin();
+ case "stddev":
+ case "sd":
+ return stats.getStandardDeviation();
+ case "median":
+ return stats.getPercentile(50);
+ case "mode":
+ return zonalMode(pixelData);
+ case "variance":
+ return stats.getVariance();
+ default:
+ throw new IllegalArgumentException("Please select from the accepted options. Some of the valid options are sum, mean, stddev, etc.");
+ }
+ }
+
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param band Band to be used for computation
+ * @param statType Define the statistic to be computed
+ * @return A double precision floating point number representing the requested statistic calculated over the specified region. The excludeNoData is set to true.
+ * @throws FactoryException
+ */
+ public static double getZonalStats(GridCoverage2D raster, Geometry roi, int band, String statType) throws FactoryException {
+ return getZonalStats(raster, roi, band, statType, true);
+ }
+
+ /**
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param statType Define the statistic to be computed
+ * @return A double precision floating point number representing the requested statistic calculated over the specified region. The excludeNoData is set to true and band is set to 1.
+ * @throws FactoryException
+ */
+ public static double getZonalStats(GridCoverage2D raster, Geometry roi, String statType) throws FactoryException {
+ return getZonalStats(raster, roi, 1, statType, true);
+ }
+
+ /**
+ * @param pixelData An array of double with pixel values
+ * @return Mode of the pixel values. If there is multiple with same occurrence, then the largest value will be returned.
+ */
+ private static double zonalMode(double[] pixelData) {
+ double[] modes = StatUtils.mode(pixelData);
+ return modes[modes.length - 1];
+ }
+
+ /**
+ * An intermediate function to compute zonal statistics
+ * @param raster Raster to use for computing stats
+ * @param roi Geometry to define the region of interest
+ * @param band Band to be used for computation
+ * @param excludeNoData Specifies whether to exclude no-data value or not
+ * @return an object of DescriptiveStatistics and an array of double with pixel data.
+ * @throws FactoryException
+ */
+ private static List<Object> getStatObjects(GridCoverage2D raster, Geometry roi, int band, boolean excludeNoData) throws FactoryException {
+ RasterUtils.ensureBand(raster, band);
+
+ if(RasterAccessors.srid(raster) != roi.getSRID()) {
+ // implicitly converting roi geometry CRS to raster CRS
+ roi = RasterUtils.convertCRSIfNeeded(roi, raster.getCoordinateReferenceSystem());
+ // have to set the SRID as RasterUtils.convertCRSIfNeeded doesn't set it even though the geometry is in raster's CRS
+ roi = Functions.setSRID(roi, RasterAccessors.srid(raster));
+ }
+
+ // checking if the raster contains the geometry
+ if (!RasterPredicates.rsIntersects(raster, roi)) {
+ throw new IllegalArgumentException("The provided geometry is not intersecting the raster. Please provide a geometry that is in the raster's extent.");
+ }
+
+ Raster rasterData = RasterUtils.getRaster(raster.getRenderedImage());
+ String datatype = RasterBandAccessors.getBandType(raster, band);
+ Double noDataValue = RasterBandAccessors.getBandNoDataValue(raster, band);
+ // Adding an arbitrary value '150' for the pixels that are under the geometry.
+ GridCoverage2D rasterizedGeom = RasterConstructors.asRasterWithRasterExtent(roi, raster, datatype, 150, null);
+ Raster rasterziedData = RasterUtils.getRaster(rasterizedGeom.getRenderedImage());
+ int width = RasterAccessors.getWidth(rasterizedGeom), height = RasterAccessors.getHeight(rasterizedGeom);
+ double[] rasterizedPixelData = rasterziedData.getSamples(0, 0, width, height, 0, (double[]) null);
+ double[] rasterPixelData = rasterData.getSamples(0, 0, width, height, band - 1, (double[]) null);
+
+ List<Double> pixelData = new ArrayList<>();
+
+ for (int k = 0; k < rasterPixelData.length; k++) {
+
+ // Pixels with a value of 0 in the rasterizedPixelData are skipped during computation,
+ // as they fall outside the geometry specified by 'roi'.
+ // The region of interest defined by 'roi' contains pixel values of 150,
+ // as initialized when constructing the raster via RasterConstructors.asRasterWithRasterExtent.
+ if (rasterizedPixelData[k] == 0 || excludeNoData && noDataValue != null && rasterPixelData[k] == noDataValue) {
+ continue;
+ } else {
+ pixelData.add(rasterPixelData[k]);
+ }
+ }
+
+ double[] pixelsArray = pixelData.stream().mapToDouble(d -> d).toArray();
+
+ DescriptiveStatistics stats = new DescriptiveStatistics(pixelsArray);
+
+ List<Object> statObjects = new ArrayList<>();
+
+ statObjects.add(stats);
+ statObjects.add(pixelsArray);
+ return statObjects;
+ }
+
public static double[] getSummaryStats(GridCoverage2D rasterGeom, int band, boolean excludeNoDataValue) {
RasterUtils.ensureBand(rasterGeom, band);
Raster raster = RasterUtils.getRaster(rasterGeom.getRenderedImage());
diff --git a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
index 29459c943..0cc65b5d1 100644
--- a/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
+++ b/common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java
@@ -26,6 +26,7 @@ import org.geotools.gce.arcgrid.ArcGridReader;
import org.geotools.gce.geotiff.GeoTiffReader;
import org.geotools.geometry.Envelope2D;
import org.geotools.geometry.jts.JTS;
+import org.geotools.geometry.jts.ReferencedEnvelope;
import org.geotools.process.vector.VectorToRasterProcess;
import org.geotools.referencing.crs.DefaultEngineeringCRS;
import org.geotools.referencing.operation.transform.AffineTransform2D;
@@ -41,6 +42,9 @@ import org.opengis.referencing.operation.MathTransform;
import javax.media.jai.RasterFactory;
import java.awt.image.WritableRaster;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
public class RasterConstructors
{
@@ -67,6 +71,43 @@ public class RasterConstructors
*/
public static GridCoverage2D asRaster(Geometry geom, GridCoverage2D raster, String pixelType, double value, Double noDataValue) throws FactoryException {
+ List<Object> objects = rasterization(geom, raster, pixelType, value, noDataValue, true);
+ WritableRaster writableRaster = (WritableRaster) objects.get(0);
+ GridCoverage2D rasterized = (GridCoverage2D) objects.get(1);
+
+ return RasterUtils.clone(writableRaster, rasterized.getSampleDimensions(), rasterized, noDataValue, false); //no need to original raster metadata since this is a new raster.
+ }
+
+ /**
+ * Returns a raster that is converted from the geometry provided. A convenience function for asRaster.
+ *
+ * @param geom The geometry to convert
+ * @param raster The reference raster
+ * @param pixelType The data type of pixel/cell of resultant raster.
+ *
+ * @return Rasterized Geometry
+ * @throws FactoryException
+ */
+ public static GridCoverage2D asRaster(Geometry geom, GridCoverage2D raster, String pixelType) throws FactoryException {
+ return asRaster(geom, raster, pixelType, 1, null);
+ }
+
+ /**
+ * Returns a raster that is converted from the geometry provided. A convenience function for asRaster.
+ *
+ * @param geom The geometry to convert
+ * @param raster The reference raster
+ * @param pixelType The data type of pixel/cell of resultant raster.
+ * @param value The value of the pixel of the resultant raster
+ *
+ * @return Rasterized Geometry
+ * @throws FactoryException
+ */
+ public static GridCoverage2D asRaster(Geometry geom, GridCoverage2D raster, String pixelType, double value) throws FactoryException {
+ return asRaster(geom, raster, pixelType, value, null);
+ }
+
+ private static List<Object> rasterization(Geometry geom, GridCoverage2D raster, String pixelType, double value, Double noDataValue, boolean useGeometryExtent) throws FactoryException {
DefaultFeatureCollection featureCollection = getFeatureCollection(geom, raster.getCoordinateReferenceSystem());
double[] metadata = RasterAccessors.metadata(raster);
@@ -89,7 +130,14 @@ public class RasterConstructors
throw new IllegalArgumentException(String.format("SkewY %d of the raster is not zero.", metadata[7]));
}
- Envelope2D bound = JTS.getEnvelope2D(geom.getEnvelopeInternal(), raster.getCoordinateReferenceSystem2D());
+ Envelope2D bound = null;
+
+ if (useGeometryExtent) {
+ bound = JTS.getEnvelope2D(geom.getEnvelopeInternal(), raster.getCoordinateReferenceSystem2D());
+ } else {
+ ReferencedEnvelope envelope = ReferencedEnvelope.create(raster.getEnvelope(), raster.getCoordinateReferenceSystem());
+ bound = JTS.getEnvelope2D(envelope, raster.getCoordinateReferenceSystem2D());
+ }
double scaleX = Math.abs(metadata[4]), scaleY = Math.abs(metadata[5]);
int width = (int) bound.getWidth(), height = (int) bound.getHeight();
@@ -117,37 +165,32 @@ public class RasterConstructors
WritableRaster writableRaster = RasterFactory.createBandedRaster(RasterUtils.getDataTypeCode(pixelType), width, height, 1, null);
double [] samples = RasterUtils.getRaster(rasterized.getRenderedImage()).getSamples(0, 0, width, height, 0, (double[]) null);
writableRaster.setSamples(0, 0, width, height, 0, samples);
+
+ List<Object> objects = new ArrayList<>();
+ objects.add(writableRaster);
+ objects.add(rasterized);
- return RasterUtils.clone(writableRaster, rasterized.getSampleDimensions(), rasterized, noDataValue, false); //no need to original raster metadata since this is a new raster.
- }
-
- /**
- * Returns a raster that is converted from the geometry provided. A convenience function for asRaster.
- *
- * @param geom The geometry to convert
- * @param raster The reference raster
- * @param pixelType The data type of pixel/cell of resultant raster.
- *
- * @return Rasterized Geometry
- * @throws FactoryException
- */
- public static GridCoverage2D asRaster(Geometry geom, GridCoverage2D raster, String pixelType) throws FactoryException {
- return asRaster(geom, raster, pixelType, 1, null);
+ return objects;
}
/**
- * Returns a raster that is converted from the geometry provided. A convenience function for asRaster.
- *
+ * For internal use only!
+ * Returns a raster that is converted from the geometry provided with the extent of the reference raster.
* @param geom The geometry to convert
* @param raster The reference raster
- * @param pixelType The data type of pixel/cell of resultant raster.
+ * @param pixelType The data type of pixel/cell of resultant raster
* @param value The value of the pixel of the resultant raster
+ * @param noDataValue The noDataValue of the resultant raster
*
- * @return Rasterized Geometry
+ * @return Rasterized Geometry with reference raster's extent
* @throws FactoryException
*/
- public static GridCoverage2D asRaster(Geometry geom, GridCoverage2D raster, String pixelType, double value) throws FactoryException {
- return asRaster(geom, raster, pixelType, value, null);
+ public static GridCoverage2D asRasterWithRasterExtent(Geometry geom, GridCoverage2D raster, String pixelType, double value, Double noDataValue) throws FactoryException {
+ List<Object> objects = rasterization(geom, raster, pixelType, value, noDataValue, false);
+ WritableRaster writableRaster = (WritableRaster) objects.get(0);
+ GridCoverage2D rasterized = (GridCoverage2D) objects.get(1);
+
+ return RasterUtils.clone(writableRaster, rasterized.getSampleDimensions(), rasterized, noDataValue, false); //no need to original raster metadata since this is a new raster.
}
public static DefaultFeatureCollection getFeatureCollection(Geometry geom, CoordinateReferenceSystem crs) {
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterBandAccessorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterBandAccessorsTest.java
index 4b1809c5a..1d2e14dcb 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterBandAccessorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterBandAccessorsTest.java
@@ -19,9 +19,15 @@
package org.apache.sedona.common.raster;
+import org.apache.sedona.common.Constructors;
+import org.apache.sedona.common.Functions;
+import org.apache.sedona.common.FunctionsGeoTools;
import org.geotools.coverage.grid.GridCoverage2D;
import org.junit.Test;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
import org.opengis.referencing.FactoryException;
+import org.opengis.referencing.operation.TransformException;
import java.io.IOException;
import java.util.Arrays;
@@ -75,6 +81,113 @@ public class RasterBandAccessorsTest extends RasterTestBase {
assertEquals("Provided band index 2 is not present in the raster", exception.getMessage());
}
+ @Test
+ public void testZonalStats() throws FactoryException, ParseException, IOException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster_geotiff_color/FAA_UTM18N_NAD83.tif");
+ String polygon = "POLYGON ((236722 4204770, 243900 4204770, 243900 4197590, 221170 4197590, 236722 4204770))";
+ Geometry geom = Constructors.geomFromWKT(polygon, RasterAccessors.srid(raster));
+
+ double actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "sum", false);
+ double expected = 1.0690406E7;
+ assertEquals(expected, actual, 0d);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 2, "mean", false);
+ expected = 220.6062;
+ assertEquals(expected, actual, FP_TOLERANCE);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "count");
+ expected = 184792.0;
+ assertEquals(expected, actual, 0.1d);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 3, "variance", false);
+ expected = 13554.5057;
+ assertEquals(expected, actual, FP_TOLERANCE);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, "max");
+ expected = 255.0;
+ assertEquals(expected, actual, 1E-1);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "min", false);
+ expected = 0.0;
+ assertEquals(expected, actual, 1E-1);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "sd", false);
+ expected = 92.1327;
+ assertEquals(expected, actual, FP_TOLERANCE);
+ }
+
+ @Test
+ public void testZonalStatsWithNoData() throws IOException, FactoryException, ParseException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/raster_with_no_data/test5.tiff");
+ String polygon = "POLYGON((-167.750000 87.750000, -155.250000 87.750000, -155.250000 40.250000, -180.250000 40.250000, -167.750000 87.750000))";
+ // Testing implicit CRS transformation
+ Geometry geom = Constructors.geomFromWKT(polygon, 0);
+
+ double actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "sum", true);
+ double expected = 3213526.0;
+ assertEquals(expected, actual, 0d);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "mean", true);
+ expected = 226.5599;
+ assertEquals(expected, actual, FP_TOLERANCE);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "count");
+ expected = 14184.0;
+ assertEquals(expected, actual, 0.1d);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, "variance");
+ expected = 5606.4233;
+ assertEquals(expected, actual, FP_TOLERANCE);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, "max");
+ expected = 255.0;
+ assertEquals(expected, actual, 1E-1);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "min", true);
+ expected = 1.0;
+ assertEquals(expected, actual, 1E-1);
+
+ actual = RasterBandAccessors.getZonalStats(raster, geom, 1, "sd", true);
+ expected = 74.8760;
+ assertEquals(expected, actual, FP_TOLERANCE);
+ }
+
+ @Test
+ public void testZonalStatsAll() throws IOException, FactoryException, ParseException, TransformException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster_geotiff_color/FAA_UTM18N_NAD83.tif");
+ String polygon = "POLYGON ((-8673439.6642 4572993.5327, -8673155.5737 4563873.2099, -8701890.3259 4562931.7093, -8682522.8735 4572703.8908, -8673439.6642 4572993.5327))";
+ Geometry geom = Constructors.geomFromWKT(polygon, 3857);
+
+ double[] actual = RasterBandAccessors.getZonalStatsAll(raster, geom, 1, false);
+ double[] expected = new double[] {184792.0, 1.0690406E7, 57.8510, 0.0, 0.0, 92.1327, 8488.4480, 0.0, 255.0};
+ assertArrayEquals(expected, actual, FP_TOLERANCE);
+ }
+
+ @Test
+ public void testZonalStatsAllWithNoData() throws IOException, FactoryException, ParseException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/raster_with_no_data/test5.tiff");
+ String polygon = "POLYGON((-167.750000 87.750000, -155.250000 87.750000, -155.250000 40.250000, -180.250000 40.250000, -167.750000 87.750000))";
+ Geometry geom = Constructors.geomFromWKT(polygon, RasterAccessors.srid(raster));
+
+ double[] actual = RasterBandAccessors.getZonalStatsAll(raster, geom, 1, true);
+ double[] expected = new double[] {14184.0, 3213526.0, 226.5599, 255.0, 255.0, 74.8760, 5606.4233, 1.0, 255.0};
+ assertArrayEquals(expected, actual, FP_TOLERANCE);
+ }
+
+ @Test
+ public void testZonalStatsAllWithEmptyRaster() throws FactoryException, ParseException {
+ GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 6, 6, 1, -1, 1, -1, 0, 0, 4326);
+ double[] bandValue = new double[]{0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 9, 0, 0, 5, 6, 0, 8, 0, 0, 4, 11, 11, 12, 0, 0, 13, 0, 15, 16, 0, 0, 0, 0, 0, 0, 0};
+ raster = MapAlgebra.addBandFromArray(raster, bandValue, 1);
+ raster = RasterBandEditors.setBandNoDataValue(raster, 1, 0d);
+ // Testing implicit CRS transformation
+ Geometry geom = Constructors.geomFromWKT("POLYGON((2 -2, 2 -6, 6 -6, 6 -2, 2 -2))", 0);
+
+ double[] actual = RasterBandAccessors.getZonalStatsAll(raster, geom, 1, true);
+ double[] expected = new double[] {13.0, 114.0, 8.7692, 9.0, 11.0, 4.7285, 22.3589, 1.0, 16.0};
+ assertArrayEquals(expected, actual, FP_TOLERANCE);
+ }
+
@Test
public void testSummaryStatsWithAllNoData() throws FactoryException {
GridCoverage2D emptyRaster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 0);
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
index 9a3af9198..2487fa75a 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterConstructorsTest.java
@@ -168,6 +168,20 @@ public class RasterConstructorsTest
assertArrayEquals(expected, actual, 0.1d);
}
+ @Test
+ public void testAsRasterWithRasterExtent() throws IOException, ParseException, FactoryException {
+ GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/raster_with_no_data/test5.tiff");
+ Geometry geom = Constructors.geomFromWKT("POLYGON((1.5 1.5, 3.8 3.0, 4.5 4.4, 3.4 3.5, 1.5 1.5))", RasterAccessors.srid(raster));
+ GridCoverage2D rasterized = RasterConstructors.asRasterWithRasterExtent(geom, raster, "d", 612028, 5d);
+
+ int widthActual = RasterAccessors.getWidth(rasterized);
+ int widthExpected = RasterAccessors.getWidth(raster);
+ assertEquals(widthExpected, widthActual);
+
+ int heightActual = RasterAccessors.getHeight(rasterized);
+ int heightExpected = RasterAccessors.getHeight(raster);
+ assertEquals(heightExpected, heightActual);
+ }
@Test
public void makeEmptyRaster() throws FactoryException {
diff --git a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
index 0a4833f86..88eebd58c 100644
--- a/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
+++ b/common/src/test/java/org/apache/sedona/common/raster/RasterTestBase.java
@@ -47,6 +47,8 @@ public class RasterTestBase {
protected static final String resourceFolder = System.getProperty("user.dir") + "/../spark/common/src/test/resources/";
+ protected static final double FP_TOLERANCE = 1E-4;
+
GridCoverage2D oneBandRaster;
GridCoverage2D multiBandRaster;
byte[] geoTiff;
diff --git a/docs/api/sql/Raster-operators.md b/docs/api/sql/Raster-operators.md
index 55d0a8d57..a002eb046 100644
--- a/docs/api/sql/Raster-operators.md
+++ b/docs/api/sql/Raster-operators.md
@@ -937,6 +937,131 @@ Output:
14.0, 204.0, 14.571428571428571, 11.509091348732502, 1.0, 25.0
```
+### RS_ZonalStats
+
+Introduction: This returns a statistic value specified by `statType` over the region of interest defined by `zone`. It computes the statistic from the pixel values within the ROI geometry and returns the result. If the `excludeNoData` parameter is not specified, it will default to `true`. This excludes NoData values from the statistic calculation. Additionally, if the `band` parameter is not provided, band 1 will be used by default for the statistic computation. The valid options for `st [...]
+
+- `count`: Number of pixels in the region.
+- `sum`: Sum of pixel values.
+- `mean|average|avg`: Arithmetic mean.
+- `median`: Middle value in the region.
+- `mode`: Most occurring value, if there are multiple values with same occurrence then will return the largest number.
+- `stddev|sd`: Standard deviation.
+- `variance`: Variance.
+- `min`: Minimum value in the region.
+- `max`: Maximum value in the region.
+
+!!!note
+ If the coordinate reference system (CRS) of the input `zone` geometry differs from that of the `raster`, then `zone` will be transformed to match the CRS of the `raster` before computation.
+
+ The following conditions will throw an `IllegalArgumentException` if they are not met:
+
+ - The provided `raster` and `zone` geometry should intersect.
+ - The option provided to `statType` should be valid.
+
+Format:
+
+```
+RS_ZonalStats(raster: Raster, zone: Geometry, band: Integer, statType: String, excludeNoData: Boolean)
+```
+
+```
+RS_ZonalStats(raster: Raster, zone: Geometry, band: Integer, statType: String)
+```
+
+```
+RS_ZonalStats(raster: Raster, zone: Geometry, statType: String)
+```
+
+Since: `v1.5.1`
+
+Spark SQL Example:
+
+```sql
+RS_ZonalStats(rast1, geom1, 1, 'sum', false)
+```
+
+Output:
+
+```
+10690406
+```
+
+Spark SQL Example:
+
+```sql
+RS_ZonalStats(rast2, geom2, 1, 'mean', true)
+```
+
+Output:
+
+```
+226.55992667794473
+```
+
+### RS_ZonalStatsAll
+
+Introduction: Returns an array of statistic values, where each statistic is computed over a region defined by the `zone` geometry. The array contains the following values:
+
+- 0: Count of the pixels.
+- 1: Sum of the pixel values.
+- 2: Arithmetic mean.
+- 3: Median.
+- 4: Mode.
+- 5: Standard deviation.
+- 6: Variance.
+- 7: Minimum value of the zone.
+- 8: Maximum value of the zone.
+
+!!!note
+ If the coordinate reference system (CRS) of the input `zone` geometry differs from that of the `raster`, then `zone` will be transformed to match the CRS of the `raster` before computation.
+
+ The following conditions will throw an `IllegalArgumentException` if they are not met:
+
+ - The provided `raster` and `zone` geometry should intersect.
+ - The option provided to `statType` should be valid.
+
+
+Format:
+
+```
+RS_ZonalStatsAll(raster: Raster, zone: Geometry, band: Integer, excludeNodata: Boolean)
+```
+
+```
+RS_ZonalStatsAll(raster: Raster, zone: Geometry, band: Integer)
+```
+
+```
+RS_ZonalStatsAll(raster: Raster, zone: Geometry)
+```
+
+Since: `v1.5.1`
+
+Spark SQL Example:
+
+```sql
+RS_ZonalStatsAll(rast1, geom1, 1, false)
+```
+
+Output:
+
+```
+[184792.0, 1.0690406E7, 57.851021689230684, 0.0, 0.0, 92.13277429243035, 8488.448098819916, 0.0, 255.0]
+```
+
+Spark SQL Example:
+
+```sql
+RS_ZonalStatsAll(rast2, geom2, 1, true)
+```
+
+Output:
+
+```
+[14184.0, 3213526.0, 226.55992667794473, 255.0, 255.0, 74.87605357255357, 5606.423398599913, 1.0, 255.0]
+```
+
## Raster Predicates
### RS_Contains
diff --git a/pom.xml b/pom.xml
index 0e8061467..72f4c9eb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -314,6 +314,11 @@
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>3.6.1</version>
+ </dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 3a176db8d..718b014ff 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -251,6 +251,8 @@ object Catalog {
function[RS_MinConvexHull](),
function[RS_AsMatrix](),
function[RS_AsImage](),
+ function[RS_ZonalStats](),
+ function[RS_ZonalStatsAll](),
function[RS_Resample]()
)
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterBandAccessors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterBandAccessors.scala
index 323235819..6405e841c 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterBandAccessors.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterBandAccessors.scala
@@ -42,6 +42,25 @@ case class RS_Count(inputExpressions: Seq[Expression]) extends InferredExpressio
copy(inputExpressions = newChildren)
}
}
+
+case class RS_ZonalStats(inputExpressions: Seq[Expression]) extends InferredExpression(
+ inferrableFunction5(RasterBandAccessors.getZonalStats), inferrableFunction4(RasterBandAccessors.getZonalStats),
+ inferrableFunction3(RasterBandAccessors.getZonalStats)
+) {
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+ copy(inputExpressions = newChildren)
+ }
+}
+
+case class RS_ZonalStatsAll(inputExpressions: Seq[Expression]) extends InferredExpression(
+ inferrableFunction2(RasterBandAccessors.getZonalStatsAll), inferrableFunction4(RasterBandAccessors.getZonalStatsAll),
+ inferrableFunction3(RasterBandAccessors.getZonalStatsAll)
+) {
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+ copy(inputExpressions = newChildren)
+ }
+}
+
case class RS_SummaryStats(inputExpressions: Seq[Expression]) extends InferredExpression(
inferrableFunction1(RasterBandAccessors.getSummaryStats), inferrableFunction2(RasterBandAccessors.getSummaryStats),
inferrableFunction3(RasterBandAccessors.getSummaryStats)) {
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
index 018ed42f6..2493bda2b 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasteralgebraTest.scala
@@ -879,6 +879,60 @@ class rasteralgebraTest extends TestBaseScala with BeforeAndAfter with GivenWhen
assertEquals(expected, actual)
}
+ it("Passed RS_ZonalStats") {
+ var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster_geotiff_color/FAA_UTM18N_NAD83.tif")
+ df = df.selectExpr("RS_FromGeoTiff(content) as raster", "ST_GeomFromWKT('POLYGON ((236722 4204770, 243900 4204770, 243900 4197590, 221170 4197590, 236722 4204770))', 26918) as geom")
+ var actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'sum', true)").first().get(0)
+ assertEquals(1.0690406E7, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'count', false)").first().get(0)
+ assertEquals(184792.0, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'mean', false)").first().get(0)
+ assertEquals(57.851021689230684, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'variance')").first().get(0)
+ assertEquals(8488.448098819916, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 'sd')").first().get(0)
+ assertEquals(92.13277429243035, actual)
+ }
+
+ it("Passed RS_ZonalStats - Raster with no data") {
+ var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/raster_with_no_data/test5.tiff")
+ df = df.selectExpr("RS_FromGeoTiff(content) as raster", "ST_GeomFromWKT('POLYGON((-167.750000 87.750000, -155.250000 87.750000, -155.250000 40.250000, -180.250000 40.250000, -167.750000 87.750000))', 4326) as geom")
+ var actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'sum', true)").first().get(0)
+ assertEquals(3213526.0, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'count', true)").first().get(0)
+ assertEquals(14184.0, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'mean', false)").first().get(0)
+ assertEquals(226.49605300253515, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 1, 'variance')").first().get(0)
+ assertEquals(5606.423398599913, actual)
+
+ actual = df.selectExpr("RS_ZonalStats(raster, geom, 'sd')").first().get(0)
+ assertEquals(74.87605357255357, actual)
+ }
+
+ it("Passed RS_ZonalStatsAll") {
+ var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster_geotiff_color/FAA_UTM18N_NAD83.tif")
+ df = df.selectExpr("RS_FromGeoTiff(content) as raster", "ST_GeomFromWKT('POLYGON ((-8673439.6642 4572993.5327, -8673155.5737 4563873.2099, -8701890.3259 4562931.7093, -8682522.8735 4572703.8908, -8673439.6642 4572993.5327))', 3857) as geom")
+ val actual = df.selectExpr("RS_ZonalStatsAll(raster, geom, 1, true)").first().get(0)
+ val expected = Seq(184792.0, 1.0690406E7, 57.851021689230684, 0.0, 0.0, 92.13277429243035, 8488.448098819916, 0.0, 255.0)
+ assertTrue(expected.equals(actual))
+ }
+
+ it("Passed RS_ZonalStatsAll - Raster with no data") {
+ var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/raster_with_no_data/test5.tiff")
+ df = df.selectExpr("RS_FromGeoTiff(content) as raster", "ST_GeomFromWKT('POLYGON((-167.750000 87.750000, -155.250000 87.750000, -155.250000 40.250000, -180.250000 40.250000, -167.750000 87.750000))', 0) as geom")
+ val actual = df.selectExpr("RS_ZonalStatsAll(raster, geom, 1, true)").first().get(0)
+ val expected = Seq(14184.0, 3213526.0, 226.55992667794473, 255.0, 255.0, 74.87605357255357, 5606.423398599913, 1.0, 255.0)
+ assertTrue(expected.equals(actual))
+ }
+
it("Passed RS_SummaryStats with raster") {
var df = sparkSession.read.format("binaryFile").load(resourceFolder + "raster/raster_with_no_data/test5.tiff")
df = df.selectExpr("RS_FromGeoTiff(content) as raster")