You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@carbondata.apache.org by GitBox <gi...@apache.org> on 2021/09/28 05:00:11 UTC
[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #4222: [CARBONDATA-4292] Spatial index creation using spark dataframe

Indhumathi27 commented on a change in pull request #4222:
URL: https://github.com/apache/carbondata/pull/4222#discussion_r717219729



##########
File path: integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
##########
@@ -84,5 +84,22 @@ class CarbonOption(options: Map[String, String]) {
 
   lazy val dateformat: Option[String] = options.get("dateformat")
 
+  lazy val SPATIAL_INDEX: Option[String] = options.get("SPATIAL_INDEX")
+
+  lazy val SPATIAL_INDEX_type: Option[String] = options.get("SPATIAL_INDEX.mygeohash.type")

Review comment:
       looks like the newly added options are case sensitive. please handle with case insensitive options

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
##########
@@ -917,4 +1009,35 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
     sql(s"""LOAD DATA local inpath '$resourcesPath/geodata2.csv' INTO TABLE $tableName OPTIONS
            |('DELIMITER'= ',')""".stripMargin)
   }
+
+  def createDf(): DataFrame = {
+    val geoSchema = StructType(Seq(StructField("timevalue", LongType, nullable = true),
+      StructField("longitude", LongType, nullable = false),
+      StructField("latitude", LongType, nullable = false)))
+    sqlContext.read.option("delimeter", ",").option("header", "true").schema(geoSchema)
+      .csv(s"$resourcesPath/geodata.csv")
+  }
+
+  def createDf2(): DataFrame = {
+    val geoSchema = StructType(Seq(StructField("timevalue", LongType, nullable = true),
+      StructField("longitude", LongType, nullable = false),
+      StructField("latitude", LongType, nullable = false)))
+    sqlContext.read.option("delimeter", ",").option("header", "true").schema(geoSchema)
+      .csv(s"$resourcesPath/geodata2.csv")
+  }
+
+  def createTableWithDf(geoDf: DataFrame, tableName: String = dfTable1): Unit = {
+    geoDf.write
+      .format("carbondata")
+      .option("tableName", s"$tableName")
+      .option("SPATIAL_INDEX", "mygeohash")
+      .option("SPATIAL_INDEX.mygeohash.type", "geohash")
+      .option("SPATIAL_INDEX.mygeohash.sourcecolumns", "longitude, latitude")
+      .option("SPATIAL_INDEX.mygeohash.originLatitude", "39.832277")

Review comment:
       test some options with case insensitve

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
##########
@@ -917,4 +1009,35 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
     sql(s"""LOAD DATA local inpath '$resourcesPath/geodata2.csv' INTO TABLE $tableName OPTIONS
            |('DELIMITER'= ',')""".stripMargin)
   }
+
+  def createDf(): DataFrame = {
+    val geoSchema = StructType(Seq(StructField("timevalue", LongType, nullable = true),
+      StructField("longitude", LongType, nullable = false),
+      StructField("latitude", LongType, nullable = false)))
+    sqlContext.read.option("delimeter", ",").option("header", "true").schema(geoSchema)
+      .csv(s"$resourcesPath/geodata.csv")
+  }
+
+  def createDf2(): DataFrame = {
+    val geoSchema = StructType(Seq(StructField("timevalue", LongType, nullable = true),
+      StructField("longitude", LongType, nullable = false),
+      StructField("latitude", LongType, nullable = false)))
+    sqlContext.read.option("delimeter", ",").option("header", "true").schema(geoSchema)
+      .csv(s"$resourcesPath/geodata2.csv")
+  }
+
+  def createTableWithDf(geoDf: DataFrame, tableName: String = dfTable1): Unit = {
+    geoDf.write
+      .format("carbondata")
+      .option("tableName", s"$tableName")
+      .option("SPATIAL_INDEX", "mygeohash")
+      .option("SPATIAL_INDEX.mygeohash.type", "geohash")

Review comment:
       add testcase with invalid options

##########
File path: integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonOption.scala
##########
@@ -84,5 +84,22 @@ class CarbonOption(options: Map[String, String]) {
 
   lazy val dateformat: Option[String] = options.get("dateformat")
 
+  lazy val SPATIAL_INDEX: Option[String] = options.get("SPATIAL_INDEX")
+
+  lazy val SPATIAL_INDEX_type: Option[String] = options.get("SPATIAL_INDEX.mygeohash.type")
+
+  lazy val SPATIAL_INDEX_sourcecolumns: Option[String] = options.get(
+    "SPATIAL_INDEX.mygeohash.sourcecolumns")
+
+  lazy val SPATIAL_INDEX_originLatitude: Option[String] = options.get(
+    "SPATIAL_INDEX.mygeohash.originLatitude")
+
+  lazy val SPATIAL_INDEX_gridSize: Option[String] = options.get("SPATIAL_INDEX.mygeohash.gridSize")

Review comment:
       what if the user doesn't provide some other required options, when spatial index property is provided. Please check and handle

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala
##########
@@ -91,7 +101,15 @@ class CarbonDataFrameWriter(sqlContext: SQLContext, val dataFrame: DataFrame) {
       "TABLE_PAGE_SIZE_INMB" -> options.tablePageSizeInMb,
       "STREAMING" -> Option(options.isStreaming.toString),
       "DATEFORMAT" -> options.dateformat,
-      "TIMESTAMPFORMAT" -> options.timestampformat
+      "TIMESTAMPFORMAT" -> options.timestampformat,
+      "TIMESTAMPFORMAT" -> options.timestampformat,

Review comment:
       duplicate TIMESTAMPFORMAT definition in line 104 and 105




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@carbondata.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org