You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Nomii5007 <in...@gmail.com> on 2016/07/21 17:58:48 UTC

how to resolve you must build spark with hive exception?

Hello I know this question is already asked.. but no one answer that..that is
why I am asking again. 
I am using anaconda3.5 distribution and spark 1.6.2 
I have been following this  blog
<http://nbviewer.jupyter.org/github/bensadeghi/pyspark-churn-prediction/blob/master/churn-prediction.ipynb> 
. it was running fine untill i reached at 7th cell here 
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import UserDefinedFunction

binary_map = {'Yes':1.0, 'No':0.0, 'True':1.0, 'False':0.0}
toNum = UserDefinedFunction(lambda k: binary_map[k], DoubleType())

CV_data = CV_data.drop('State').drop('Area code') \
    .drop('Total day charge').drop('Total eve charge') \
    .drop('Total night charge').drop('Total intl charge') \
    .withColumn('Churn', toNum(CV_data['Churn'])) \
    .withColumn('International plan', toNum(CV_data['International plan']))
\
    .withColumn('Voice mail plan', toNum(CV_data['Voice mail
plan'])).cache()

final_test_data = final_test_data.drop('State').drop('Area code') \
    .drop('Total day charge').drop('Total eve charge') \
    .drop('Total night charge').drop('Total intl charge') \
    .withColumn('Churn', toNum(final_test_data['Churn'])) \
    .withColumn('International plan', toNum(final_test_data['International
plan'])) \
    .withColumn('Voice mail plan', toNum(final_test_data['Voice mail
plan'])).cache()


here i am getting exception 

You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt
assembly
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-7-6db2287430d4> in <module>()
      3 
      4 binary_map = {'Yes':1.0, 'No':0.0, 'True':1.0, 'False':0.0}
----> 5 toNum = UserDefinedFunction(lambda k: binary_map[k], DoubleType())
      6 
      7 CV_data = CV_data.drop('State').drop('Area code')     .drop('Total
day charge').drop('Total eve charge')     .drop('Total night
charge').drop('Total intl charge')     .withColumn('Churn',
toNum(CV_data['Churn']))     .withColumn('International plan',
toNum(CV_data['International plan']))     .withColumn('Voice mail plan',
toNum(CV_data['Voice mail plan'])).cache()

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py
in __init__(self, func, returnType, name)
   1556         self.returnType = returnType
   1557         self._broadcast = None
-> 1558         self._judf = self._create_judf(name)
   1559 
   1560     def _create_judf(self, name):

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py
in _create_judf(self, name)
   1567         pickled_command, broadcast_vars, env, includes =
_prepare_for_python_RDD(sc, command, self)
   1568         ctx = SQLContext.getOrCreate(sc)
-> 1569         jdt = ctx._ssql_ctx.parseDataType(self.returnType.json())
   1570         if name is None:
   1571             name = f.__name__ if hasattr(f, '__name__') else
f.__class__.__name__

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py in
_ssql_ctx(self)
    681         try:
    682             if not hasattr(self, '_scala_HiveContext'):
--> 683                 self._scala_HiveContext = self._get_hive_ctx()
    684             return self._scala_HiveContext
    685         except Py4JError as e:

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py in
_get_hive_ctx(self)
    690 
    691     def _get_hive_ctx(self):
--> 692         return self._jvm.HiveContext(self._jsc.sc())
    693 
    694     def refreshTable(self, tableName):

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\java_gateway.py
in __call__(self, *args)
   1062         answer = self._gateway_client.send_command(command)
   1063         return_value = get_return_value(
-> 1064             answer, self._gateway_client, None, self._fqn)
   1065 
   1066         for temp_arg in temp_args:

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\utils.py in
deco(*a, **kw)
     43     def deco(*a, **kw):
     44         try:
---> 45             return f(*a, **kw)
     46         except py4j.protocol.Py4JJavaError as e:
     47             s = e.java_exception.toString()

C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\protocol.py
in get_return_value(answer, gateway_client, target_id, name)
    306                 raise Py4JJavaError(
    307                     "An error occurred while calling {0}{1}{2}.\n".
--> 308                     format(target_id, ".", name), value)
    309             else:
    310                 raise Py4JError(

Py4JJavaError: An error occurred while calling
None.org.apache.spark.sql.hive.HiveContext.
: java.lang.RuntimeException: java.lang.NullPointerException
	at
org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
	at
org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204)
	at
org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:238)
	at
org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:218)
	at
org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:208)
	at
org.apache.spark.sql.hive.HiveContext.functionRegistry$lzycompute(HiveContext.scala:462)
	at
org.apache.spark.sql.hive.HiveContext.functionRegistry(HiveContext.scala:461)
	at org.apache.spark.sql.UDFRegistration.<init>(UDFRegistration.scala:40)
	at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:330)
	at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:90)
	at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
	at py4j.Gateway.invoke(Gateway.java:214)
	at
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79)
	at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68)
	at py4j.GatewayConnection.run(GatewayConnection.java:209)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.NullPointerException
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
	at org.apache.hadoop.util.Shell.runCommand(Shell.java:445)
	at org.apache.hadoop.util.Shell.run(Shell.java:418)
	at
org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:739)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:722)
	at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
	at
org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:559)
	at
org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:534)
	at
org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
	at
org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
	at
org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
	... 21 more



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/how-to-resolve-you-must-build-spark-with-hive-exception-tp27390.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org