You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Anwar AliKhan <an...@gmail.com> on 2020/06/20 10:17:08 UTC
Re: Hey good looking toPandas () error stack
Two versions of Spark running against same code
https://towardsdatascience.com/your-first-apache-spark-ml-model-d2bb82b599dd
version spark-2.4.6-bin-hadoop2.7 is producing error for toPandas(). See
error stack below
Jupyter Notebook
import findspark
findspark.init('/home/spark-3.0.0-bin-hadoop2.7')
cell "spark"
cell output
SparkSession - in-memory
SparkContext
Spark UI
Version
v3.0.0
Master
local[*]
AppName
Titanic Data
import findspark
findspark.init('/home/spark-2.4.6-bin-hadoop2.7')
cell "spark"
cell output
SparkSession - in-memory
SparkContext
Spark UI
Version
v2.4.6
Master
local[*]
AppName
Titanic Data
cell "df.show(5)"
+-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
|PassengerId|Survived|Pclass| Name| Sex|Age|SibSp|Parch|
Ticket| Fare|Cabin|Embarked|
+-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
| 1| 0| 3|Braund, Mr. Owen ...| male| 22| 1| 0|
A/5 21171| 7.25| null| S|
| 2| 1| 1|Cumings, Mrs. Joh...|female| 38| 1| 0|
PC 17599|71.2833| C85| C|
| 3| 1| 3|Heikkinen, Miss. ...|female| 26| 0|
0|STON/O2. 3101282| 7.925| null| S|
| 4| 1| 1|Futrelle, Mrs. Ja...|female| 35| 1| 0|
113803| 53.1| C123| S|
| 5| 0| 3|Allen, Mr. Willia...| male| 35| 0| 0|
373450| 8.05| null| S|
+-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
only showing top 5 rows
cell "df.toPandas()"
cell output
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
/home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
**kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
/home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py
in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o33.collectToPython.
: java.lang.IllegalArgumentException: Unsupported class file major version
55
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:166)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:148)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:136)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:237)
at
org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:50)
at
org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:845)
at
org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:828)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
at
scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
at
scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
at
scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
at scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:134)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
at
org.apache.spark.util.FieldAccessFinder$$anon$4.visitMethodInsn(ClosureCleaner.scala:828)
at org.apache.xbean.asm6.ClassReader.readCode(ClassReader.java:2175)
at org.apache.xbean.asm6.ClassReader.readMethod(ClassReader.java:1238)
at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:631)
at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:355)
at
org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:272)
at
org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:271)
at scala.collection.immutable.List.foreach(List.scala:392)
at
org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:271)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:163)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2100)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
at
org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
at
org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
at
org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
at
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:834)
During handling of the above exception, another exception occurred:
IllegalArgumentException Traceback (most recent call last)
<ipython-input-10-a516097529d7> in <module>
----> 1 df.toPandas()
/home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
toPandas(self)
2153
2154 # Below is toPandas without Arrow optimization.
-> 2155 pdf = pd.DataFrame.from_records(self.collect(),
columns=self.columns)
2156 column_counter = Counter(self.columns)
2157
/home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
collect(self)
533 """
534 with SCCallSiteSync(self._sc) as css:
--> 535 sock_info = self._jdf.collectToPython()
536 return list(_load_from_socket(sock_info,
BatchedSerializer(PickleSerializer())))
537
/home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py
in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
**kw)
77 raise QueryExecutionException(s.split(': ', 1)[1],
stackTrace)
78 if s.startswith('java.lang.IllegalArgumentException: '):
---> 79 raise IllegalArgumentException(s.split(': ', 1)[1],
stackTrace)
80 raise
81 return deco
IllegalArgumentException: 'Unsupported class file major version 55'
On Fri, 19 Jun 2020, 08:06 Stephen Boesch, <ja...@gmail.com> wrote:
> afaik It has been there since Spark 2.0 in 2015. Not certain about
> Spark 1.5/1.6
>
> On Thu, 18 Jun 2020 at 23:56, Anwar AliKhan <an...@gmail.com>
> wrote:
>
>> I first ran the command
>> df.show()
>>
>> For sanity check of my dataFrame.
>>
>> I wasn't impressed with the display.
>>
>> I then ran
>> df.toPandas() in Jupiter Notebook.
>>
>> Now the display is really good looking .
>>
>> Is toPandas() a new function which became available in Spark 3.0 ?
>>
>>
>>
>>
>>
>>
Re: Hey good looking toPandas () error stack
Posted by Sean Owen <sr...@gmail.com>.
That part isn't related to Spark. It means you have some code compiled for
Java 11, but are running Java 8.
On Sun, Jun 21, 2020 at 1:51 PM randy clinton <ra...@gmail.com>
wrote:
> You can see from the GitHub history for "toPandas()" that the function has
> been in the code for 5 years.
>
> https://github.com/apache/spark/blame/a075cd5b700f88ef447b559c6411518136558d78/python/pyspark/sql/dataframe.py#L923
>
> When I google IllegalArgumentException: 'Unsupported class file major
> version 55'
>
> I see posts about the Java version being used. Are you sure your configs
> are right?
>
> https://stackoverflow.com/questions/53583199/pyspark-error-unsupported-class-file-major-version
>
> On Sat, Jun 20, 2020 at 6:17 AM Anwar AliKhan <an...@gmail.com>
> wrote:
>
>>
>> Two versions of Spark running against same code
>>
>>
>> https://towardsdatascience.com/your-first-apache-spark-ml-model-d2bb82b599dd
>>
>> version spark-2.4.6-bin-hadoop2.7 is producing error for toPandas(). See
>> error stack below
>>
>> Jupyter Notebook
>>
>> import findspark
>>
>> findspark.init('/home/spark-3.0.0-bin-hadoop2.7')
>>
>> cell "spark"
>>
>> cell output
>>
>> SparkSession - in-memory
>>
>> SparkContext
>>
>> Spark UI
>>
>> Version
>>
>> v3.0.0
>>
>> Master
>>
>> local[*]
>>
>> AppName
>>
>> Titanic Data
>>
>>
>> import findspark
>>
>> findspark.init('/home/spark-2.4.6-bin-hadoop2.7')
>>
>> cell "spark"
>>
>>
>>
>> cell output
>>
>> SparkSession - in-memory
>>
>> SparkContext
>>
>> Spark UI
>>
>> Version
>>
>> v2.4.6
>>
>> Master
>>
>> local[*]
>>
>> AppName
>>
>> Titanic Data
>>
>> cell "df.show(5)"
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> |PassengerId|Survived|Pclass| Name|
>> Sex|Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> | 1| 0| 3|Braund, Mr. Owen ...| male| 22| 1| 0|
>> A/5 21171| 7.25| null| S|
>>
>> | 2| 1| 1|Cumings, Mrs. Joh...|female| 38| 1|
>> 0| PC 17599|71.2833| C85| C|
>>
>> | 3| 1| 3|Heikkinen, Miss. ...|female| 26| 0|
>> 0|STON/O2. 3101282| 7.925| null| S|
>>
>> | 4| 1| 1|Futrelle, Mrs. Ja...|female| 35| 1|
>> 0| 113803| 53.1| C123| S|
>>
>> | 5| 0| 3|Allen, Mr. Willia...| male| 35| 0|
>> 0| 373450| 8.05| null| S|
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> only showing top 5 rows
>>
>> cell "df.toPandas()"
>>
>> cell output
>>
>>
>> ---------------------------------------------------------------------------
>>
>> Py4JJavaError Traceback (most recent call
>> last)
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
>> **kw)
>>
>> 62 try:
>>
>> ---> 63 return f(*a, **kw)
>>
>> 64 except py4j.protocol.Py4JJavaError as e:
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py
>> in get_return_value(answer, gateway_client, target_id, name)
>>
>> 327 "An error occurred while calling
>> {0}{1}{2}.\n".
>>
>> --> 328 format(target_id, ".", name), value)
>>
>> 329 else:
>>
>> Py4JJavaError: An error occurred while calling o33.collectToPython.
>>
>> : java.lang.IllegalArgumentException: Unsupported class file major
>> version 55
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:166)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:148)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:136)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:237)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:50)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:845)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:828)
>>
>> at
>> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>>
>> at
>> scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
>>
>> at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:134)
>>
>> at
>> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4.visitMethodInsn(ClosureCleaner.scala:828)
>>
>> at org.apache.xbean.asm6.ClassReader.readCode(ClassReader.java:2175)
>>
>> at
>> org.apache.xbean.asm6.ClassReader.readMethod(ClassReader.java:1238)
>>
>> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:631)
>>
>> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:355)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:272)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:271)
>>
>> at scala.collection.immutable.List.foreach(List.scala:392)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:271)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:163)
>>
>> at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
>>
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2100)
>>
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
>>
>> at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
>>
>> at
>> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>>
>> at
>> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>>
>> at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
>>
>> at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
>>
>> at
>> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
>>
>> at
>> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
>>
>> at
>> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
>>
>> at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
>>
>> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
>>
>> at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
>>
>> at
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> Method)
>>
>> at
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>
>> at
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>
>> at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>
>> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>
>> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>
>> at py4j.Gateway.invoke(Gateway.java:282)
>>
>> at
>> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>
>> at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>
>> at py4j.GatewayConnection.run(GatewayConnection.java:238)
>>
>> at java.base/java.lang.Thread.run(Thread.java:834)
>>
>>
>> During handling of the above exception, another exception occurred:
>>
>> IllegalArgumentException Traceback (most recent call
>> last)
>>
>> <ipython-input-10-a516097529d7> in <module>
>>
>> ----> 1 df.toPandas()
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
>> toPandas(self)
>>
>> 2153
>>
>> 2154 # Below is toPandas without Arrow optimization.
>>
>> -> 2155 pdf = pd.DataFrame.from_records(self.collect(),
>> columns=self.columns)
>>
>> 2156 column_counter = Counter(self.columns)
>>
>> 2157
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
>> collect(self)
>>
>> 533 """
>>
>> 534 with SCCallSiteSync(self._sc) as css:
>>
>> --> 535 sock_info = self._jdf.collectToPython()
>>
>> 536 return list(_load_from_socket(sock_info,
>> BatchedSerializer(PickleSerializer())))
>>
>> 537
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py
>> in __call__(self, *args)
>>
>> 1255 answer = self.gateway_client.send_command(command)
>>
>> 1256 return_value = get_return_value(
>>
>> -> 1257 answer, self.gateway_client, self.target_id,
>> self.name)
>>
>> 1258
>>
>> 1259 for temp_arg in temp_args:
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
>> **kw)
>>
>> 77 raise QueryExecutionException(s.split(': ',
>> 1)[1], stackTrace)
>>
>> 78 if s.startswith('java.lang.IllegalArgumentException:
>> '):
>>
>> ---> 79 raise IllegalArgumentException(s.split(': ',
>> 1)[1], stackTrace)
>>
>> 80 raise
>>
>> 81 return deco
>>
>> IllegalArgumentException: 'Unsupported class file major version 55'
>>
>>
>> On Fri, 19 Jun 2020, 08:06 Stephen Boesch, <ja...@gmail.com> wrote:
>>
>>> afaik It has been there since Spark 2.0 in 2015. Not certain about
>>> Spark 1.5/1.6
>>>
>>> On Thu, 18 Jun 2020 at 23:56, Anwar AliKhan <an...@gmail.com>
>>> wrote:
>>>
>>>> I first ran the command
>>>> df.show()
>>>>
>>>> For sanity check of my dataFrame.
>>>>
>>>> I wasn't impressed with the display.
>>>>
>>>> I then ran
>>>> df.toPandas() in Jupiter Notebook.
>>>>
>>>> Now the display is really good looking .
>>>>
>>>> Is toPandas() a new function which became available in Spark 3.0 ?
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>
> --
> I appreciate your time,
>
> ~Randy
>
Re: Hey good looking toPandas () error stack
Posted by Anwar AliKhan <an...@gmail.com>.
The only change I am making is spark directory name.
It keeps failing in this same cell. df.toPandas()
findspark.init('/home/spark-2.4.6-bin-hadoop2.7') FAIL
findspark.init('/home/spark-3.0.0-bin-hadoop2.7'). PASS
On Sun, 21 Jun 2020, 19:51 randy clinton, <ra...@gmail.com> wrote:
> You can see from the GitHub history for "toPandas()" that the function has
> been in the code for 5 years.
>
> https://github.com/apache/spark/blame/a075cd5b700f88ef447b559c6411518136558d78/python/pyspark/sql/dataframe.py#L923
>
> When I google IllegalArgumentException: 'Unsupported class file major
> version 55'
>
> I see posts about the Java version being used. Are you sure your configs
> are right?
>
> https://stackoverflow.com/questions/53583199/pyspark-error-unsupported-class-file-major-version
>
> On Sat, Jun 20, 2020 at 6:17 AM Anwar AliKhan <an...@gmail.com>
> wrote:
>
>>
>> Two versions of Spark running against same code
>>
>>
>> https://towardsdatascience.com/your-first-apache-spark-ml-model-d2bb82b599dd
>>
>> version spark-2.4.6-bin-hadoop2.7 is producing error for toPandas(). See
>> error stack below
>>
>> Jupyter Notebook
>>
>> import findspark
>>
>> findspark.init('/home/spark-3.0.0-bin-hadoop2.7')
>>
>> cell "spark"
>>
>> cell output
>>
>> SparkSession - in-memory
>>
>> SparkContext
>>
>> Spark UI
>>
>> Version
>>
>> v3.0.0
>>
>> Master
>>
>> local[*]
>>
>> AppName
>>
>> Titanic Data
>>
>>
>> import findspark
>>
>> findspark.init('/home/spark-2.4.6-bin-hadoop2.7')
>>
>> cell "spark"
>>
>>
>>
>> cell output
>>
>> SparkSession - in-memory
>>
>> SparkContext
>>
>> Spark UI
>>
>> Version
>>
>> v2.4.6
>>
>> Master
>>
>> local[*]
>>
>> AppName
>>
>> Titanic Data
>>
>> cell "df.show(5)"
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> |PassengerId|Survived|Pclass| Name|
>> Sex|Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> | 1| 0| 3|Braund, Mr. Owen ...| male| 22| 1| 0|
>> A/5 21171| 7.25| null| S|
>>
>> | 2| 1| 1|Cumings, Mrs. Joh...|female| 38| 1|
>> 0| PC 17599|71.2833| C85| C|
>>
>> | 3| 1| 3|Heikkinen, Miss. ...|female| 26| 0|
>> 0|STON/O2. 3101282| 7.925| null| S|
>>
>> | 4| 1| 1|Futrelle, Mrs. Ja...|female| 35| 1|
>> 0| 113803| 53.1| C123| S|
>>
>> | 5| 0| 3|Allen, Mr. Willia...| male| 35| 0|
>> 0| 373450| 8.05| null| S|
>>
>>
>> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>>
>> only showing top 5 rows
>>
>> cell "df.toPandas()"
>>
>> cell output
>>
>>
>> ---------------------------------------------------------------------------
>>
>> Py4JJavaError Traceback (most recent call
>> last)
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
>> **kw)
>>
>> 62 try:
>>
>> ---> 63 return f(*a, **kw)
>>
>> 64 except py4j.protocol.Py4JJavaError as e:
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py
>> in get_return_value(answer, gateway_client, target_id, name)
>>
>> 327 "An error occurred while calling
>> {0}{1}{2}.\n".
>>
>> --> 328 format(target_id, ".", name), value)
>>
>> 329 else:
>>
>> Py4JJavaError: An error occurred while calling o33.collectToPython.
>>
>> : java.lang.IllegalArgumentException: Unsupported class file major
>> version 55
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:166)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:148)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:136)
>>
>> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:237)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:50)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:845)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:828)
>>
>> at
>> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>>
>> at
>> scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
>>
>> at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
>>
>> at
>> scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:134)
>>
>> at
>> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
>>
>> at
>> org.apache.spark.util.FieldAccessFinder$$anon$4.visitMethodInsn(ClosureCleaner.scala:828)
>>
>> at org.apache.xbean.asm6.ClassReader.readCode(ClassReader.java:2175)
>>
>> at
>> org.apache.xbean.asm6.ClassReader.readMethod(ClassReader.java:1238)
>>
>> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:631)
>>
>> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:355)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:272)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:271)
>>
>> at scala.collection.immutable.List.foreach(List.scala:392)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:271)
>>
>> at
>> org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:163)
>>
>> at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
>>
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2100)
>>
>> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
>>
>> at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
>>
>> at
>> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>>
>> at
>> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>>
>> at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
>>
>> at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
>>
>> at
>> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
>>
>> at
>> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
>>
>> at
>> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
>>
>> at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
>>
>> at
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
>>
>> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
>>
>> at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
>>
>> at
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
>> Method)
>>
>> at
>> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>>
>> at
>> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>
>> at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>>
>> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>>
>> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>>
>> at py4j.Gateway.invoke(Gateway.java:282)
>>
>> at
>> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>>
>> at py4j.commands.CallCommand.execute(CallCommand.java:79)
>>
>> at py4j.GatewayConnection.run(GatewayConnection.java:238)
>>
>> at java.base/java.lang.Thread.run(Thread.java:834)
>>
>>
>> During handling of the above exception, another exception occurred:
>>
>> IllegalArgumentException Traceback (most recent call
>> last)
>>
>> <ipython-input-10-a516097529d7> in <module>
>>
>> ----> 1 df.toPandas()
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
>> toPandas(self)
>>
>> 2153
>>
>> 2154 # Below is toPandas without Arrow optimization.
>>
>> -> 2155 pdf = pd.DataFrame.from_records(self.collect(),
>> columns=self.columns)
>>
>> 2156 column_counter = Counter(self.columns)
>>
>> 2157
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
>> collect(self)
>>
>> 533 """
>>
>> 534 with SCCallSiteSync(self._sc) as css:
>>
>> --> 535 sock_info = self._jdf.collectToPython()
>>
>> 536 return list(_load_from_socket(sock_info,
>> BatchedSerializer(PickleSerializer())))
>>
>> 537
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py
>> in __call__(self, *args)
>>
>> 1255 answer = self.gateway_client.send_command(command)
>>
>> 1256 return_value = get_return_value(
>>
>> -> 1257 answer, self.gateway_client, self.target_id,
>> self.name)
>>
>> 1258
>>
>> 1259 for temp_arg in temp_args:
>>
>> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
>> **kw)
>>
>> 77 raise QueryExecutionException(s.split(': ',
>> 1)[1], stackTrace)
>>
>> 78 if s.startswith('java.lang.IllegalArgumentException:
>> '):
>>
>> ---> 79 raise IllegalArgumentException(s.split(': ',
>> 1)[1], stackTrace)
>>
>> 80 raise
>>
>> 81 return deco
>>
>> IllegalArgumentException: 'Unsupported class file major version 55'
>>
>>
>> On Fri, 19 Jun 2020, 08:06 Stephen Boesch, <ja...@gmail.com> wrote:
>>
>>> afaik It has been there since Spark 2.0 in 2015. Not certain about
>>> Spark 1.5/1.6
>>>
>>> On Thu, 18 Jun 2020 at 23:56, Anwar AliKhan <an...@gmail.com>
>>> wrote:
>>>
>>>> I first ran the command
>>>> df.show()
>>>>
>>>> For sanity check of my dataFrame.
>>>>
>>>> I wasn't impressed with the display.
>>>>
>>>> I then ran
>>>> df.toPandas() in Jupiter Notebook.
>>>>
>>>> Now the display is really good looking .
>>>>
>>>> Is toPandas() a new function which became available in Spark 3.0 ?
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>
> --
> I appreciate your time,
>
> ~Randy
>
Re: Hey good looking toPandas () error stack
Posted by randy clinton <ra...@gmail.com>.
You can see from the GitHub history for "toPandas()" that the function has
been in the code for 5 years.
https://github.com/apache/spark/blame/a075cd5b700f88ef447b559c6411518136558d78/python/pyspark/sql/dataframe.py#L923
When I google IllegalArgumentException: 'Unsupported class file major
version 55'
I see posts about the Java version being used. Are you sure your configs
are right?
https://stackoverflow.com/questions/53583199/pyspark-error-unsupported-class-file-major-version
On Sat, Jun 20, 2020 at 6:17 AM Anwar AliKhan <an...@gmail.com>
wrote:
>
> Two versions of Spark running against same code
>
>
> https://towardsdatascience.com/your-first-apache-spark-ml-model-d2bb82b599dd
>
> version spark-2.4.6-bin-hadoop2.7 is producing error for toPandas(). See
> error stack below
>
> Jupyter Notebook
>
> import findspark
>
> findspark.init('/home/spark-3.0.0-bin-hadoop2.7')
>
> cell "spark"
>
> cell output
>
> SparkSession - in-memory
>
> SparkContext
>
> Spark UI
>
> Version
>
> v3.0.0
>
> Master
>
> local[*]
>
> AppName
>
> Titanic Data
>
>
> import findspark
>
> findspark.init('/home/spark-2.4.6-bin-hadoop2.7')
>
> cell "spark"
>
>
>
> cell output
>
> SparkSession - in-memory
>
> SparkContext
>
> Spark UI
>
> Version
>
> v2.4.6
>
> Master
>
> local[*]
>
> AppName
>
> Titanic Data
>
> cell "df.show(5)"
>
>
> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>
> |PassengerId|Survived|Pclass| Name| Sex|Age|SibSp|Parch|
> Ticket| Fare|Cabin|Embarked|
>
>
> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>
> | 1| 0| 3|Braund, Mr. Owen ...| male| 22| 1| 0|
> A/5 21171| 7.25| null| S|
>
> | 2| 1| 1|Cumings, Mrs. Joh...|female| 38| 1| 0|
> PC 17599|71.2833| C85| C|
>
> | 3| 1| 3|Heikkinen, Miss. ...|female| 26| 0|
> 0|STON/O2. 3101282| 7.925| null| S|
>
> | 4| 1| 1|Futrelle, Mrs. Ja...|female| 35| 1| 0|
> 113803| 53.1| C123| S|
>
> | 5| 0| 3|Allen, Mr. Willia...| male| 35| 0| 0|
> 373450| 8.05| null| S|
>
>
> +-----------+--------+------+--------------------+------+---+-----+-----+----------------+-------+-----+--------+
>
> only showing top 5 rows
>
> cell "df.toPandas()"
>
> cell output
>
> ---------------------------------------------------------------------------
>
> Py4JJavaError Traceback (most recent call last)
>
> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
> **kw)
>
> 62 try:
>
> ---> 63 return f(*a, **kw)
>
> 64 except py4j.protocol.Py4JJavaError as e:
>
> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py
> in get_return_value(answer, gateway_client, target_id, name)
>
> 327 "An error occurred while calling {0}{1}{2}.\n".
>
> --> 328 format(target_id, ".", name), value)
>
> 329 else:
>
> Py4JJavaError: An error occurred while calling o33.collectToPython.
>
> : java.lang.IllegalArgumentException: Unsupported class file major version
> 55
>
> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:166)
>
> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:148)
>
> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:136)
>
> at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:237)
>
> at
> org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:50)
>
> at
> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:845)
>
> at
> org.apache.spark.util.FieldAccessFinder$$anon$4$$anonfun$visitMethodInsn$7.apply(ClosureCleaner.scala:828)
>
> at
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
>
> at
> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>
> at
> scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
>
> at
> scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
>
> at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
>
> at scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:134)
>
> at
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
>
> at
> org.apache.spark.util.FieldAccessFinder$$anon$4.visitMethodInsn(ClosureCleaner.scala:828)
>
> at org.apache.xbean.asm6.ClassReader.readCode(ClassReader.java:2175)
>
> at org.apache.xbean.asm6.ClassReader.readMethod(ClassReader.java:1238)
>
> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:631)
>
> at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:355)
>
> at
> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:272)
>
> at
> org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:271)
>
> at scala.collection.immutable.List.foreach(List.scala:392)
>
> at
> org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:271)
>
> at
> org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:163)
>
> at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2100)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
>
> at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
>
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
>
> at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
>
> at
> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
>
> at
> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
>
> at
> org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
>
> at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
>
> at
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
>
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
>
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
>
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
>
> at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
>
> at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
>
> at
> java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>
> at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>
> at java.base/java.lang.reflect.Method.invoke(Method.java:566)
>
> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>
> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>
> at py4j.Gateway.invoke(Gateway.java:282)
>
> at
> py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>
> at py4j.commands.CallCommand.execute(CallCommand.java:79)
>
> at py4j.GatewayConnection.run(GatewayConnection.java:238)
>
> at java.base/java.lang.Thread.run(Thread.java:834)
>
>
> During handling of the above exception, another exception occurred:
>
> IllegalArgumentException Traceback (most recent call last)
>
> <ipython-input-10-a516097529d7> in <module>
>
> ----> 1 df.toPandas()
>
> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
> toPandas(self)
>
> 2153
>
> 2154 # Below is toPandas without Arrow optimization.
>
> -> 2155 pdf = pd.DataFrame.from_records(self.collect(),
> columns=self.columns)
>
> 2156 column_counter = Counter(self.columns)
>
> 2157
>
> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/dataframe.py in
> collect(self)
>
> 533 """
>
> 534 with SCCallSiteSync(self._sc) as css:
>
> --> 535 sock_info = self._jdf.collectToPython()
>
> 536 return list(_load_from_socket(sock_info,
> BatchedSerializer(PickleSerializer())))
>
> 537
>
> /home/spark-2.4.6-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py
> in __call__(self, *args)
>
> 1255 answer = self.gateway_client.send_command(command)
>
> 1256 return_value = get_return_value(
>
> -> 1257 answer, self.gateway_client, self.target_id, self.name
> )
>
> 1258
>
> 1259 for temp_arg in temp_args:
>
> /home/spark-2.4.6-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a,
> **kw)
>
> 77 raise QueryExecutionException(s.split(': ', 1)[1],
> stackTrace)
>
> 78 if s.startswith('java.lang.IllegalArgumentException:
> '):
>
> ---> 79 raise IllegalArgumentException(s.split(': ',
> 1)[1], stackTrace)
>
> 80 raise
>
> 81 return deco
>
> IllegalArgumentException: 'Unsupported class file major version 55'
>
>
> On Fri, 19 Jun 2020, 08:06 Stephen Boesch, <ja...@gmail.com> wrote:
>
>> afaik It has been there since Spark 2.0 in 2015. Not certain about
>> Spark 1.5/1.6
>>
>> On Thu, 18 Jun 2020 at 23:56, Anwar AliKhan <an...@gmail.com>
>> wrote:
>>
>>> I first ran the command
>>> df.show()
>>>
>>> For sanity check of my dataFrame.
>>>
>>> I wasn't impressed with the display.
>>>
>>> I then ran
>>> df.toPandas() in Jupiter Notebook.
>>>
>>> Now the display is really good looking .
>>>
>>> Is toPandas() a new function which became available in Spark 3.0 ?
>>>
>>>
>>>
>>>
>>>
>>>
--
I appreciate your time,
~Randy