You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2024/02/09 23:00:29 UTC
(sedona) branch master updated: [SEDONA-494] Raster data source cannot write to HDFS (#1235)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 7b3f1f26c [SEDONA-494] Raster data source cannot write to HDFS (#1235)
7b3f1f26c is described below
commit 7b3f1f26cb41f2a67728b0eef14d04e2e0fbc9e4
Author: Jia Yu <ji...@apache.org>
AuthorDate: Fri Feb 9 15:00:24 2024 -0800
[SEDONA-494] Raster data source cannot write to HDFS (#1235)
---
.../spark/sql/sedona_sql/io/raster/RasterFileFormat.scala | 2 +-
.../test/scala/org/apache/sedona/sql/TestBaseScala.scala | 13 +++++++++++++
.../src/test/scala/org/apache/sedona/sql/rasterIOTest.scala | 11 +++++++++--
3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/raster/RasterFileFormat.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/raster/RasterFileFormat.scala
index abf11c9ed..d7851b11d 100644
--- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/raster/RasterFileFormat.scala
+++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/raster/RasterFileFormat.scala
@@ -104,7 +104,7 @@ private class RasterFileWriter(savePath: String,
val rasterFilePath = getRasterFilePath(row, dataSchema, rasterOptions)
// write the image to file
try {
- val out = hfs.create(new Path(Paths.get(savePath, new Path(rasterFilePath).getName).toString))
+ val out = hfs.create(new Path(savePath, new Path(rasterFilePath).getName))
out.write(rasterRaw)
out.close()
} catch {
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala
index 8dd4f743f..fec235696 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala
@@ -19,6 +19,8 @@
package org.apache.sedona.sql
import com.google.common.math.DoubleMath
+import org.apache.hadoop.fs.FileUtil
+import org.apache.hadoop.hdfs.{HdfsConfiguration, MiniDFSCluster}
import org.apache.log4j.{Level, Logger}
import org.apache.sedona.common.Functions.{frechetDistance, hausdorffDistance}
import org.apache.sedona.common.Predicates.dWithin
@@ -28,6 +30,8 @@ import org.apache.spark.sql.DataFrame
import org.locationtech.jts.geom._
import org.scalatest.{BeforeAndAfterAll, FunSpec}
+import java.io.File
+
trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger.setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
@@ -74,10 +78,19 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
val buildingDataLocation: String = resourceFolder + "813_buildings_test.csv"
val smallRasterDataLocation: String = resourceFolder + "raster/test1.tiff"
private val factory = new GeometryFactory()
+ var hdfsURI: String = _
override def beforeAll(): Unit = {
SedonaContext.create(sparkSession)
+ // Set up HDFS minicluster
+ val baseDir = new File("./target/hdfs/").getAbsoluteFile
+ FileUtil.fullyDelete(baseDir)
+ val hdfsConf = new HdfsConfiguration
+ hdfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath)
+ val builder = new MiniDFSCluster.Builder(hdfsConf)
+ val hdfsCluster = builder.build
+ hdfsURI = "hdfs://127.0.0.1:" + hdfsCluster.getNameNodePort + "/"
}
override def afterAll(): Unit = {
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/rasterIOTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/rasterIOTest.scala
index 79d1c6dee..d5203e6a0 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/rasterIOTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/rasterIOTest.scala
@@ -19,9 +19,7 @@
package org.apache.sedona.sql
import org.apache.commons.io.FileUtils
-import org.apache.sedona.common.raster.RasterAccessors
import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.sedona_sql.expressions.raster.RS_Metadata
import org.junit.Assert.assertEquals
import org.scalatest.{BeforeAndAfter, GivenWhenThen}
@@ -149,6 +147,15 @@ class rasterIOTest extends TestBaseScala with BeforeAndAfter with GivenWhenThen
rasterDf = df.selectExpr("RS_FromArcInfoAsciiGrid(content)")
assert(rasterDf.count() == rasterCount)
}
+
+ it("should read geotiff using binary source and write geotiff back to hdfs using raster source") {
+ var rasterDf = sparkSession.read.format("binaryFile").load(rasterdatalocation)
+ val rasterCount = rasterDf.count()
+ rasterDf.write.format("raster").mode(SaveMode.Overwrite).save(hdfsURI + "/raster-written")
+ rasterDf = sparkSession.read.format("binaryFile").load(hdfsURI + "/raster-written/*")
+ rasterDf = rasterDf.selectExpr("RS_FromGeoTiff(content)")
+ assert(rasterDf.count() == rasterCount)
+ }
}
override def afterAll(): Unit = FileUtils.deleteDirectory(new File(tempDir))