You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/03/12 13:12:41 UTC

[carbondata] branch master updated: [CARBONDATA-3300] Fixed ClassNotFoundException when using UDF in spark-shell

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new dda9c4d  [CARBONDATA-3300] Fixed ClassNotFoundException when using UDF in spark-shell
dda9c4d is described below

commit dda9c4da3d475c86b84ea7a3f73c00768b1b94c2
Author: kunal642 <ku...@gmail.com>
AuthorDate: Fri Feb 22 12:03:42 2019 +0530

    [CARBONDATA-3300] Fixed ClassNotFoundException when using UDF in spark-shell
    
    Analysis:
    When a spark-shell is run a scala interpreter session is started which is the main thread for that shell. This session uses TranslatingClassLoader, therefore the UDF(  in the stacktrace) that is defined would be loaded into TranslatingClassLoader.
    
    When deserialization happens an ObjectInputStream is create and the application tries to read the object, the ObjectInputStream uses a native method(sun.misc.VM.latestUserDefinedLoader() ) call to determine the ClassLoader that will be used to load the class. This native method returns URLClassLoader which is the parent of TranslatingClassLoader where the class was loaded.
    Because of this ClassNotFoundException is thrown.
    
    Class Loader Hierarchy
    
    ExtClassLoader(head) -> AppClassLoader -> URLClassLoader -> TranslatingClassLoader
    
    This looks like a bug in the java ObjectInputStream implementation as suggested by the following post
    https://stackoverflow.com/questions/1771679/difference-between-threads-context-class-loader-and-normal-classloader
    
    Operation	Thread	Thread ClassLoader	ClassLoader
    Register	Main	Translating	Translating
    Serialize	Main	Translating	Translating
    Deserialize	Thread-1	Translating	URLClassLoader
    Solution:
    Use ClassLoaderObjectInputStream to specify the class loader that should be used to load the class.
    
    This closes #3132
---
 .../org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java    | 4 +++-
 core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java    | 4 +++-
 .../java/org/apache/carbondata/core/util/ObjectSerializationUtil.java | 3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
index 88706b1..104ef1a 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
@@ -31,6 +31,7 @@ import java.util.List;
 import org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk;
 import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
 
+import org.apache.commons.io.input.ClassLoaderObjectInputStream;
 import org.apache.hadoop.io.Writable;
 
 /**
@@ -261,7 +262,8 @@ public class BlockletInfo implements Serializable, Writable {
 
   private DataChunk deserializeDataChunk(byte[] bytes) throws IOException {
     ByteArrayInputStream stream = new ByteArrayInputStream(bytes);
-    ObjectInputStream inputStream = new ObjectInputStream(stream);
+    ObjectInputStream inputStream =
+        new ClassLoaderObjectInputStream(Thread.currentThread().getContextClassLoader(), stream);
     DataChunk dataChunk = null;
     try {
       dataChunk = (DataChunk) inputStream.readObject();
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index ffab9c8..d9f69e3 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -100,6 +100,7 @@ import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.input.ClassLoaderObjectInputStream;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.StringEscapeUtils;
@@ -1536,7 +1537,8 @@ public final class CarbonUtil {
     ValueEncoderMeta meta = null;
     try {
       aos = new ByteArrayInputStream(encoderMeta);
-      objStream = new ObjectInputStream(aos);
+      objStream =
+          new ClassLoaderObjectInputStream(Thread.currentThread().getContextClassLoader(), aos);
       meta = (ValueEncoderMeta) objStream.readObject();
     } catch (ClassNotFoundException e) {
       LOGGER.error(e.getMessage(), e);
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ObjectSerializationUtil.java b/core/src/main/java/org/apache/carbondata/core/util/ObjectSerializationUtil.java
index 48c6e65..169a3da 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/ObjectSerializationUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/ObjectSerializationUtil.java
@@ -26,6 +26,7 @@ import java.util.zip.GZIPOutputStream;
 
 import org.apache.carbondata.common.logging.LogServiceFactory;
 
+import org.apache.commons.io.input.ClassLoaderObjectInputStream;
 import org.apache.log4j.Logger;
 
 /**
@@ -94,7 +95,7 @@ public class ObjectSerializationUtil {
     try {
       bais = new ByteArrayInputStream(bytes);
       gis = new GZIPInputStream(bais);
-      ois = new ObjectInputStream(gis);
+      ois = new ClassLoaderObjectInputStream(Thread.currentThread().getContextClassLoader(), gis);
       return ois.readObject();
     } catch (ClassNotFoundException e) {
       throw new IOException("Could not read object", e);