You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@spark.apache.org by Alex <si...@gmail.com> on 2017/02/01 05:56:45 UTC

Hive Java UDF running on spark-sql issue

Hi ,


we have Java Hive UDFS which are working perfectly fine in Hive

SO for Better performance we are migrating the same To Spark-sql

SO these jar files we are giving --jars argument to spark-sql
and defining temporary functions to make it to run on spark-sql

there is this particular Java UDF which is working fine on hive But when
ran on spark-sql it is giving the error

Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.Long cannot be cast to
org.apache.hadoop.io.LongWritable
org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.String cannot be cast to
org.apache.hadoop.io.Text
Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.Double cannot be cast to
org.apache.hadoop.hive.serde2.io.DoubleWritable

The piece of code where it is throwing the error is in teh switch case below

public String getName(int pos) {
if (pos < 0 && pos >= colnames.size())
return null;
return ((StructField) colnames.get(pos)).getFieldName();
}

public int getPos(String name) {
// System.out.println(name+transactionObject.toString());
Integer pos = (Integer) transactionObject.get(name.toLowerCase());
if (pos == null)
return -1;
return pos;
}

public Object get(Object name) {
int pos = getPos((String) name);
if (pos < 0)
return null;
String f = "string";
Object obj = list.get(pos);
if (obj == null)
return null;
ObjectInspector ins = ((StructField)
colnames.get(pos)).getFieldObjectInspector();
if (ins != null)
f = ins.getTypeName();
switch (f) {
case "double":
return ((DoubleWritable) obj).get();
case "bigint":
return ((Long) obj).get();
case "string":
return ((Text) obj).toString();
default:
return obj;
}
}

So I made the code change to below

public int getPos(String name) {
// System.out.println(name+transactionObject.toString());
Integer pos = (Integer) transactionObject.get(name.toLowerCase());
if (pos == null)
return -1;
return pos;
}

public Object get(Object name) {
int pos = getPos((String) name);
if (pos < 0)
return null;
String f = "string";
Object obj = list.get(pos);
Object result = null;
if (obj == null)
return null;
ObjectInspector ins = ((StructField)
colnames.get(pos)).getFieldObjectInspector();
if (ins != null)
f = ins.getTypeName();

PrimitiveObjectInspector ins2 = (PrimitiveObjectInspector) ins;
switch (ins2.getPrimitiveCategory()) {
case DOUBLE:

Double res = (Double)(((DoubleObjectInspector) ins2).get(obj));

result = (double) res;
System.out.println("printlog when double"+result);
return result;


case LONG:

Long res1 = (Long)(((LongObjectInspector) ins2).get(obj));
result = (long) res1;
System.out.println("printlog when long"+result);
return result;


case STRING:
result = (((StringObjectInspector)
ins2).getPrimitiveJavaObject(obj)).toString();
System.out.println("printlog when String"+result);
return result;

default:
result = obj;
return result;
}

}
After making This Changes .. The java hive udf started working fine on
Spark-sql

But it is giving different results when the UDF is used in the query..

If you think You can give it a shot solving this issue please reach me out
on hangouts or reply here

Re: Hive Java UDF running on spark-sql issue

Posted by Alex <si...@gmail.com>.

Yes...

Its taking values form a record which is a json and converting it into
multiple columns after typecasting...

On Wed, Feb 1, 2017 at 4:07 PM, Marco Mistroni <mm...@gmail.com> wrote:

> Hi
>  What is the UDF supposed to do? Are you trying to write a generic
> function to convert values to another type depending on what is the type of
> the original value?
> Kr
>
>
>
> On 1 Feb 2017 5:56 am, "Alex" <si...@gmail.com> wrote:
>
> Hi ,
>
>
> we have Java Hive UDFS which are working perfectly fine in Hive
>
> SO for Better performance we are migrating the same To Spark-sql
>
> SO these jar files we are giving --jars argument to spark-sql
> and defining temporary functions to make it to run on spark-sql
>
> there is this particular Java UDF which is working fine on hive But when
> ran on spark-sql it is giving the error
>
> Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
> java.lang.ClassCastException: java.lang.Long cannot be cast to
> org.apache.hadoop.io.LongWritable
> org.apache.hadoop.hive.ql.metadata.HiveException:
> java.lang.ClassCastException: java.lang.String cannot be cast to
> org.apache.hadoop.io.Text
> Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
> java.lang.ClassCastException: java.lang.Double cannot be cast to
> org.apache.hadoop.hive.serde2.io.DoubleWritable
>
> The piece of code where it is throwing the error is in teh switch case
> below
>
> public String getName(int pos) {
> if (pos < 0 && pos >= colnames.size())
> return null;
> return ((StructField) colnames.get(pos)).getFieldName();
> }
>
> public int getPos(String name) {
> // System.out.println(name+transactionObject.toString());
> Integer pos = (Integer) transactionObject.get(name.toLowerCase());
> if (pos == null)
> return -1;
> return pos;
> }
>
> public Object get(Object name) {
> int pos = getPos((String) name);
> if (pos < 0)
> return null;
> String f = "string";
> Object obj = list.get(pos);
> if (obj == null)
> return null;
> ObjectInspector ins = ((StructField) colnames.get(pos)).getFieldObj
> ectInspector();
> if (ins != null)
> f = ins.getTypeName();
> switch (f) {
> case "double":
> return ((DoubleWritable) obj).get();
> case "bigint":
> return ((Long) obj).get();
> case "string":
> return ((Text) obj).toString();
> default:
> return obj;
> }
> }
>
> So I made the code change to below
>
> public int getPos(String name) {
> // System.out.println(name+transactionObject.toString());
> Integer pos = (Integer) transactionObject.get(name.toLowerCase());
> if (pos == null)
> return -1;
> return pos;
> }
>
> public Object get(Object name) {
> int pos = getPos((String) name);
> if (pos < 0)
> return null;
> String f = "string";
> Object obj = list.get(pos);
> Object result = null;
> if (obj == null)
> return null;
> ObjectInspector ins = ((StructField) colnames.get(pos)).getFieldObj
> ectInspector();
> if (ins != null)
> f = ins.getTypeName();
>
> PrimitiveObjectInspector ins2 = (PrimitiveObjectInspector) ins;
> switch (ins2.getPrimitiveCategory()) {
> case DOUBLE:
>
> Double res = (Double)(((DoubleObjectInspector) ins2).get(obj));
>
> result = (double) res;
> System.out.println("printlog when double"+result);
> return result;
>
>
> case LONG:
>
> Long res1 = (Long)(((LongObjectInspector) ins2).get(obj));
> result = (long) res1;
> System.out.println("printlog when long"+result);
> return result;
>
>
> case STRING:
> result = (((StringObjectInspector) ins2).getPrimitiveJavaObject(o
> bj)).toString();
> System.out.println("printlog when String"+result);
> return result;
>
> default:
> result = obj;
> return result;
> }
>
> }
> After making This Changes .. The java hive udf started working fine on
> Spark-sql
>
> But it is giving different results when the UDF is used in the query..
>
> If you think You can give it a shot solving this issue please reach me out
> on hangouts or reply here
>
>
>
>
>

Re: Hive Java UDF running on spark-sql issue

Posted by Marco Mistroni <mm...@gmail.com>.

Hi
 What is the UDF supposed to do? Are you trying to write a generic function
to convert values to another type depending on what is the type of the
original value?
Kr



On 1 Feb 2017 5:56 am, "Alex" <si...@gmail.com> wrote:

Hi ,


we have Java Hive UDFS which are working perfectly fine in Hive

SO for Better performance we are migrating the same To Spark-sql

SO these jar files we are giving --jars argument to spark-sql
and defining temporary functions to make it to run on spark-sql

there is this particular Java UDF which is working fine on hive But when
ran on spark-sql it is giving the error

Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.Long cannot be cast to
org.apache.hadoop.io.LongWritable
org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.String cannot be cast to
org.apache.hadoop.io.Text
Caused by:org.apache.hadoop.hive.ql.metadata.HiveException:
java.lang.ClassCastException: java.lang.Double cannot be cast to
org.apache.hadoop.hive.serde2.io.DoubleWritable

The piece of code where it is throwing the error is in teh switch case below

public String getName(int pos) {
if (pos < 0 && pos >= colnames.size())
return null;
return ((StructField) colnames.get(pos)).getFieldName();
}

public int getPos(String name) {
// System.out.println(name+transactionObject.toString());
Integer pos = (Integer) transactionObject.get(name.toLowerCase());
if (pos == null)
return -1;
return pos;
}

public Object get(Object name) {
int pos = getPos((String) name);
if (pos < 0)
return null;
String f = "string";
Object obj = list.get(pos);
if (obj == null)
return null;
ObjectInspector ins = ((StructField) colnames.get(pos)).
getFieldObjectInspector();
if (ins != null)
f = ins.getTypeName();
switch (f) {
case "double":
return ((DoubleWritable) obj).get();
case "bigint":
return ((Long) obj).get();
case "string":
return ((Text) obj).toString();
default:
return obj;
}
}

So I made the code change to below

public int getPos(String name) {
// System.out.println(name+transactionObject.toString());
Integer pos = (Integer) transactionObject.get(name.toLowerCase());
if (pos == null)
return -1;
return pos;
}

public Object get(Object name) {
int pos = getPos((String) name);
if (pos < 0)
return null;
String f = "string";
Object obj = list.get(pos);
Object result = null;
if (obj == null)
return null;
ObjectInspector ins = ((StructField) colnames.get(pos)).
getFieldObjectInspector();
if (ins != null)
f = ins.getTypeName();

PrimitiveObjectInspector ins2 = (PrimitiveObjectInspector) ins;
switch (ins2.getPrimitiveCategory()) {
case DOUBLE:

Double res = (Double)(((DoubleObjectInspector) ins2).get(obj));

result = (double) res;
System.out.println("printlog when double"+result);
return result;


case LONG:

Long res1 = (Long)(((LongObjectInspector) ins2).get(obj));
result = (long) res1;
System.out.println("printlog when long"+result);
return result;


case STRING:
result = (((StringObjectInspector) ins2).getPrimitiveJavaObject(
obj)).toString();
System.out.println("printlog when String"+result);
return result;

default:
result = obj;
return result;
}

}
After making This Changes .. The java hive udf started working fine on
Spark-sql

But it is giving different results when the UDF is used in the query..

If you think You can give it a shot solving this issue please reach me out
on hangouts or reply here