You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Patrick Wendell (JIRA)" <ji...@apache.org> on 2015/12/03 01:40:10 UTC
[jira] [Updated] (SPARK-12110) spark-1.5.1-bin-hadoop2.6;
pyspark.ml.feature Exception: ("You must build Spark with Hive
[ https://issues.apache.org/jira/browse/SPARK-12110?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Patrick Wendell updated SPARK-12110:
------------------------------------
Component/s: (was: ML)
(was: SQL)
(was: PySpark)
EC2
> spark-1.5.1-bin-hadoop2.6; pyspark.ml.feature Exception: ("You must build Spark with Hive
> --------------------------------------------------------------------------------------------
>
> Key: SPARK-12110
> URL: https://issues.apache.org/jira/browse/SPARK-12110
> Project: Spark
> Issue Type: Bug
> Components: EC2
> Affects Versions: 1.5.1
> Environment: cluster created using spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2
> Reporter: Andrew Davidson
>
> I am using spark-1.5.1-bin-hadoop2.6. I used spark-1.5.1-bin-hadoop2.6/ec2/spark-ec2 to create a cluster and configured spark-env to use python3. I can not run the tokenizer sample code. Is there a work around?
> Kind regards
> Andy
> /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self)
> 658 raise Exception("You must build Spark with Hive. "
> 659 "Export 'SPARK_HIVE=true' and run "
> --> 660 "build/sbt assembly", e)
> 661
> 662 def _get_hive_ctx(self):
> Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt assembly", Py4JJavaError('An error occurred while calling None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38))
> http://spark.apache.org/docs/latest/ml-features.html#tokenizer
> from pyspark.ml.feature import Tokenizer, RegexTokenizer
> sentenceDataFrame = sqlContext.createDataFrame([
> (0, "Hi I heard about Spark"),
> (1, "I wish Java could use case classes"),
> (2, "Logistic,regression,models,are,neat")
> ], ["label", "sentence"])
> tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
> wordsDataFrame = tokenizer.transform(sentenceDataFrame)
> for words_label in wordsDataFrame.select("words", "label").take(3):
> print(words_label)
> ---------------------------------------------------------------------------
> Py4JJavaError Traceback (most recent call last)
> /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self)
> 654 if not hasattr(self, '_scala_HiveContext'):
> --> 655 self._scala_HiveContext = self._get_hive_ctx()
> 656 return self._scala_HiveContext
> /root/spark/python/pyspark/sql/context.py in _get_hive_ctx(self)
> 662 def _get_hive_ctx(self):
> --> 663 return self._jvm.HiveContext(self._jsc.sc())
> 664
> /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
> 700 return_value = get_return_value(answer, self._gateway_client, None,
> --> 701 self._fqn)
> 702
> /root/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
> 35 try:
> ---> 36 return f(*a, **kw)
> 37 except py4j.protocol.Py4JJavaError as e:
> /root/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
> 299 'An error occurred while calling {0}{1}{2}.\n'.
> --> 300 format(target_id, '.', name), value)
> 301 else:
> Py4JJavaError: An error occurred while calling None.org.apache.spark.sql.hive.HiveContext.
> : java.lang.RuntimeException: java.io.IOException: Filesystem closed
> at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
> at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:171)
> at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162)
> at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160)
> at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:167)
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
> at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234)
> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
> at py4j.Gateway.invoke(Gateway.java:214)
> at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79)
> at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68)
> at py4j.GatewayConnection.run(GatewayConnection.java:207)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: Filesystem closed
> at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:323)
> at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1057)
> at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:554)
> at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
> at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
> at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
> ... 15 more
> During handling of the above exception, another exception occurred:
> Exception Traceback (most recent call last)
> <ipython-input-1-0beb490d573c> in <module>()
> 5 (1, "I wish Java could use case classes"),
> 6 (2, "Logistic,regression,models,are,neat")
> ----> 7 ], ["label", "sentence"])
> 8 tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
> 9 wordsDataFrame = tokenizer.transform(sentenceDataFrame)
> /root/spark/python/pyspark/sql/context.py in createDataFrame(self, data, schema, samplingRatio)
> 406 rdd, schema = self._createFromLocal(data, schema)
> 407 jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
> --> 408 jdf = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
> 409 df = DataFrame(jdf, self)
> 410 df._schema = schema
> /root/spark/python/pyspark/sql/context.py in _ssql_ctx(self)
> 658 raise Exception("You must build Spark with Hive. "
> 659 "Export 'SPARK_HIVE=true' and run "
> --> 660 "build/sbt assembly", e)
> 661
> 662 def _get_hive_ctx(self):
> Exception: ("You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt assembly", Py4JJavaError('An error occurred while calling None.org.apache.spark.sql.hive.HiveContext.\n', JavaObject id=o38))
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org