You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spark.apache.org by "Prabeesh K." <pr...@gmail.com> on 2015/09/08 10:45:03 UTC
Pyspark DataFrame TypeError
I am trying to run the code RandomForestClassifier example in the PySpark
1.4.1 documentation,
https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier
.
Below is screen shot of ipython notebook
​But for df.columns. It shows following error.
TypeError Traceback (most recent call
last)<ipython-input-79-6a4642092433> in <module>()----> 1 df.columns
/home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in
columns(self) 484 ['age', 'name'] 485 """--> 486
return [f.name for f in self.schema.fields] 487 488
@ignore_unicode_prefix
/home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in
schema(self) 194 """ 195 if self._schema is
None:--> 196 self._schema =
_parse_datatype_json_string(self._jdf.schema().json()) 197
return self._schema 198
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_string(json_string) 519 >>>
check_datatype(structtype_with_udt) 520 """--> 521 return
_parse_datatype_json_value(json.loads(json_string)) 522 523
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_value(json_value) 539 tpe =
json_value["type"] 540 if tpe in _all_complex_types:--> 541
return _all_complex_types[tpe].fromJson(json_value) 542
elif tpe == 'udt': 543 return
UserDefinedType.fromJson(json_value)
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json) 386 @classmethod 387 def fromJson(cls, json):-->
388 return StructType([StructField.fromJson(f) for f in
json["fields"]]) 389 390
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json) 347 def fromJson(cls, json): 348 return
StructField(json["name"],--> 349
_parse_datatype_json_value(json["type"]), 350
json["nullable"], 351
json["metadata"])
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_value(json_value) 541 return
_all_complex_types[tpe].fromJson(json_value) 542 elif tpe
== 'udt':--> 543 return
UserDefinedType.fromJson(json_value) 544 else: 545
raise ValueError("not supported type: %s" % tpe)
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json) 453 pyModule = pyUDT[:split] 454 pyClass =
pyUDT[split+1:]--> 455 m = __import__(pyModule, globals(),
locals(), [pyClass]) 456 UDT = getattr(m, pyClass) 457
return UDT()
TypeError: Item in ``from list'' not a string
Re: Pyspark DataFrame TypeError
Posted by "Prabeesh K." <pr...@gmail.com>.
Thanks for the reply. after rebuild now it looks good.
On 8 September 2015 at 22:38, Davies Liu <da...@databricks.com> wrote:
> I tried with Python 2.7/3.4 and Spark 1.4.1/1.5-RC3, they all work as
> expected:
>
> ```
> >>> from pyspark.mllib.linalg import Vectors
> >>> df = sqlContext.createDataFrame([(1.0, Vectors.dense([1.0])), (0.0,
> Vectors.sparse(1, [], []))], ["label", "featuers"])
> >>> df.show()
> +-----+---------+
> |label| featuers|
> +-----+---------+
> | 1.0| [1.0]|
> | 0.0|(1,[],[])|
> +-----+---------+
>
> >>> df.columns
> ['label', 'featuers']
> ```
>
> On Tue, Sep 8, 2015 at 1:45 AM, Prabeesh K. <pr...@gmail.com> wrote:
> > I am trying to run the code RandomForestClassifier example in the PySpark
> > 1.4.1 documentation,
> >
> https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier
> .
> >
> > Below is screen shot of ipython notebook
> >
> >
> >
> > But for df.columns. It shows following error.
> >
> >
> > TypeError Traceback (most recent call
> last)
> > <ipython-input-79-6a4642092433> in <module>()
> > ----> 1 df.columns
> >
> > /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in columns(self)
> > 484 ['age', 'name']
> > 485 """
> > --> 486 return [f.name for f in self.schema.fields]
> > 487
> > 488 @ignore_unicode_prefix
> >
> > /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in schema(self)
> > 194 """
> > 195 if self._schema is None:
> > --> 196 self._schema =
> > _parse_datatype_json_string(self._jdf.schema().json())
> > 197 return self._schema
> > 198
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_string(json_string)
> > 519 >>> check_datatype(structtype_with_udt)
> > 520 """
> > --> 521 return _parse_datatype_json_value(json.loads(json_string))
> > 522
> > 523
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_value(json_value)
> > 539 tpe = json_value["type"]
> > 540 if tpe in _all_complex_types:
> > --> 541 return _all_complex_types[tpe].fromJson(json_value)
> > 542 elif tpe == 'udt':
> > 543 return UserDefinedType.fromJson(json_value)
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> > 386 @classmethod
> > 387 def fromJson(cls, json):
> > --> 388 return StructType([StructField.fromJson(f) for f in
> > json["fields"]])
> > 389
> > 390
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> > 347 def fromJson(cls, json):
> > 348 return StructField(json["name"],
> > --> 349
> _parse_datatype_json_value(json["type"]),
> > 350 json["nullable"],
> > 351 json["metadata"])
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_value(json_value)
> > 541 return _all_complex_types[tpe].fromJson(json_value)
> > 542 elif tpe == 'udt':
> > --> 543 return UserDefinedType.fromJson(json_value)
> > 544 else:
> > 545 raise ValueError("not supported type: %s" % tpe)
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> > 453 pyModule = pyUDT[:split]
> > 454 pyClass = pyUDT[split+1:]
> > --> 455 m = __import__(pyModule, globals(), locals(), [pyClass])
> > 456 UDT = getattr(m, pyClass)
> > 457 return UDT()
> >
> > TypeError: Item in ``from list'' not a string
> >
> >
> >
> >
> >
>
Re: Pyspark DataFrame TypeError
Posted by Davies Liu <da...@databricks.com>.
I tried with Python 2.7/3.4 and Spark 1.4.1/1.5-RC3, they all work as expected:
```
>>> from pyspark.mllib.linalg import Vectors
>>> df = sqlContext.createDataFrame([(1.0, Vectors.dense([1.0])), (0.0, Vectors.sparse(1, [], []))], ["label", "featuers"])
>>> df.show()
+-----+---------+
|label| featuers|
+-----+---------+
| 1.0| [1.0]|
| 0.0|(1,[],[])|
+-----+---------+
>>> df.columns
['label', 'featuers']
```
On Tue, Sep 8, 2015 at 1:45 AM, Prabeesh K. <pr...@gmail.com> wrote:
> I am trying to run the code RandomForestClassifier example in the PySpark
> 1.4.1 documentation,
> https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier.
>
> Below is screen shot of ipython notebook
>
>
>
> But for df.columns. It shows following error.
>
>
> TypeError Traceback (most recent call last)
> <ipython-input-79-6a4642092433> in <module>()
> ----> 1 df.columns
>
> /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in columns(self)
> 484 ['age', 'name']
> 485 """
> --> 486 return [f.name for f in self.schema.fields]
> 487
> 488 @ignore_unicode_prefix
>
> /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in schema(self)
> 194 """
> 195 if self._schema is None:
> --> 196 self._schema =
> _parse_datatype_json_string(self._jdf.schema().json())
> 197 return self._schema
> 198
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_string(json_string)
> 519 >>> check_datatype(structtype_with_udt)
> 520 """
> --> 521 return _parse_datatype_json_value(json.loads(json_string))
> 522
> 523
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_value(json_value)
> 539 tpe = json_value["type"]
> 540 if tpe in _all_complex_types:
> --> 541 return _all_complex_types[tpe].fromJson(json_value)
> 542 elif tpe == 'udt':
> 543 return UserDefinedType.fromJson(json_value)
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
> 386 @classmethod
> 387 def fromJson(cls, json):
> --> 388 return StructType([StructField.fromJson(f) for f in
> json["fields"]])
> 389
> 390
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
> 347 def fromJson(cls, json):
> 348 return StructField(json["name"],
> --> 349 _parse_datatype_json_value(json["type"]),
> 350 json["nullable"],
> 351 json["metadata"])
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_value(json_value)
> 541 return _all_complex_types[tpe].fromJson(json_value)
> 542 elif tpe == 'udt':
> --> 543 return UserDefinedType.fromJson(json_value)
> 544 else:
> 545 raise ValueError("not supported type: %s" % tpe)
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
> 453 pyModule = pyUDT[:split]
> 454 pyClass = pyUDT[split+1:]
> --> 455 m = __import__(pyModule, globals(), locals(), [pyClass])
> 456 UDT = getattr(m, pyClass)
> 457 return UDT()
>
> TypeError: Item in ``from list'' not a string
>
>
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org