You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/01/30 10:39:05 UTC

[carbondata] 01/27: [CARBONDATA-3260] Fix the Hive stats issue in carbon catalog table

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 0d4a6513f568c285b8cf5249046c01a89ff4b1e1
Author: ravipesala <ra...@gmail.com>
AuthorDate: Thu Jan 17 19:20:51 2019 +0530

    [CARBONDATA-3260] Fix the Hive stats issue in carbon catalog table
    
    Problem:
    When carbon table is created hive calculate some junk stats keep it
    in hive catalog table. From Spark 2.3 onwards spark considering the
    hive stats to calculate the broadcast join, so it is not working.
    
    Solution:
    Set the hive stats to None in case of carbon table.
    
    This closes #3082
---
 .../apache/spark/util/CarbonReflectionUtils.scala  |  8 +++++++
 .../apache/spark/sql/hive/CarbonSessionUtil.scala  | 17 ++++++++++++--
 streaming/pom.xml                                  | 27 ----------------------
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
index 92f35f6..ee635e0 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala
@@ -361,6 +361,14 @@ object CarbonReflectionUtils {
         instanceMirror.reflectField(field.asTerm).set(updatedSerdeMap)
       case _ =>
     }
+  }
 
+  /**
+   * This method updates the field of case class through reflection.
+   */
+  def setFieldToCaseClass(caseObj: Object, fieldName: String, objToSet: Object): Unit = {
+    val nameField = caseObj.getClass.getDeclaredField(fieldName)
+    nameField.setAccessible(true)
+    nameField.set(caseObj, objToSet)
   }
 }
diff --git a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
index 2accbd6..88a2565 100644
--- a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
+++ b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionUtil.scala
@@ -70,8 +70,21 @@ object CarbonSessionUtil {
             "tableMeta",
             relation
           ).asInstanceOf[CatalogTable]
-        isRelationRefreshed =
-          CarbonEnv.refreshRelationFromCache(catalogTable.identifier)(sparkSession)
+        catalogTable.provider match {
+          case Some(provider)
+            if provider.equals("org.apache.spark.sql.CarbonSource") ||
+               provider.equalsIgnoreCase("carbondata") =>
+            // Update stats to none in case of carbon table as we are not expecting any stats from
+            // Hive. Hive gives wrong stats for carbon table.
+            catalogTable.stats match {
+              case Some(stats) =>
+                CarbonReflectionUtils.setFieldToCaseClass(catalogTable, "stats", None)
+              case _ =>
+            }
+            isRelationRefreshed =
+              CarbonEnv.refreshRelationFromCache(catalogTable.identifier)(sparkSession)
+          case _ =>
+        }
       case _ =>
     }
     isRelationRefreshed
diff --git a/streaming/pom.xml b/streaming/pom.xml
index c49b5f3..1f7c431 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,33 +105,6 @@
           <failIfNoTests>false</failIfNoTests>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <version>1.0</version>
-        <!-- Note config is repeated in surefire config -->
-        <configuration>
-          <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-          <junitxml>.</junitxml>
-          <filereports>CarbonTestSuite.txt</filereports>
-          <argLine> ${argLine} -ea -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m
-          </argLine>
-          <stderr />
-          <environmentVariables>
-          </environmentVariables>
-          <systemProperties>
-            <java.awt.headless>true</java.awt.headless>
-          </systemProperties>
-        </configuration>
-        <executions>
-          <execution>
-            <id>test</id>
-            <goals>
-              <goal>test</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
 </project>