You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/01/22 08:46:16 UTC
[carbondata] branch master updated: [CARBONDATA-3260] Fix the Hive stats issue in carbon catalog table

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new cd51220  [CARBONDATA-3260] Fix the Hive stats issue in carbon catalog table
cd51220 is described below

commit cd51220555c1d63d796a9233d03bb16ce8d65fe6
Author: ravipesala <ra...@gmail.com>
AuthorDate: Thu Jan 17 19:20:51 2019 +0530

    [CARBONDATA-3260] Fix the Hive stats issue in carbon catalog table
    
    Problem:
    When carbon table is created hive calculate some junk stats keep it
    in hive catalog table. From Spark 2.3 onwards spark considering the
    hive stats to calculate the broadcast join, so it is not working.
    
    Solution:
    Set the hive stats to None in case of carbon table.
    
    This closes #3082
---
 .../apache/spark/util/CarbonReflectionUtils.scala  |  8 +++++++
 .../apache/spark/sql/hive/CarbonSessionUtil.scala  | 17 ++++++++++++--
 streaming/pom.xml                                  | 27 ----------------------
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
index 92f35f6..ee635e0 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
@@ -361,6 +361,14 @@ object CarbonReflectionUtils {
         instanceMirror.reflectField(field.asTerm).set(updatedSerdeMap)
       case _ =>
     }
+  }
 
+  /**
+   * This method updates the field of case class through reflection.
+   */
+  def setFieldToCaseClass(caseObj: Object, fieldName: String, objToSet: Object): Unit = {
+    val nameField = caseObj.getClass.getDeclaredField(fieldName)
+    nameField.setAccessible(true)
+    nameField.set(caseObj, objToSet)
   }
 }
diff --git a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
index 2accbd6..88a2565 100644
--- a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
+++ b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
@@ -70,8 +70,21 @@ object CarbonSessionUtil {
             "tableMeta",
             relation
           ).asInstanceOf[CatalogTable]
-        isRelationRefreshed =
-          CarbonEnv.refreshRelationFromCache(catalogTable.identifier)(sparkSession)
+        catalogTable.provider match {
+          case Some(provider)
+            if provider.equals("org.apache.spark.sql.CarbonSource") ||
+               provider.equalsIgnoreCase("carbondata") =>
+            // Update stats to none in case of carbon table as we are not expecting any stats from
+            // Hive. Hive gives wrong stats for carbon table.
+            catalogTable.stats match {
+              case Some(stats) =>
+                CarbonReflectionUtils.setFieldToCaseClass(catalogTable, "stats", None)
+              case _ =>
+            }
+            isRelationRefreshed =
+              CarbonEnv.refreshRelationFromCache(catalogTable.identifier)(sparkSession)
+          case _ =>
+        }
       case _ =>
     }
     isRelationRefreshed
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 4e8eb3b..0c90750 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -107,33 +107,6 @@
           <failIfNoTests>false</failIfNoTests>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <version>1.0</version>
-        <!-- Note config is repeated in surefire config -->
-        <configuration>
-          <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-          <junitxml>.</junitxml>
-          <filereports>CarbonTestSuite.txt</filereports>
-          <argLine> ${argLine} -ea -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m
-          </argLine>
-          <stderr />
-          <environmentVariables>
-          </environmentVariables>
-          <systemProperties>
-            <java.awt.headless>true</java.awt.headless>
-          </systemProperties>
-        </configuration>
-        <executions>
-          <execution>
-            <id>test</id>
-            <goals>
-              <goal>test</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
 </project>