You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@spark.apache.org by "Prabeesh K." <pr...@gmail.com> on 2015/09/08 10:45:03 UTC

Pyspark DataFrame TypeError

I am trying to run the code RandomForestClassifier example in the PySpark
1.4.1 documentation,
https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier
.

Below is screen shot of ipython notebook



But for df.columns. It shows following error.


TypeError                                 Traceback (most recent call
last)<ipython-input-79-6a4642092433> in <module>()----> 1 df.columns
/home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in
columns(self)    484         ['age', 'name']    485         """--> 486
        return [f.name for f in self.schema.fields]    487     488
@ignore_unicode_prefix
/home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in
schema(self)    194         """    195         if self._schema is
None:--> 196             self._schema =
_parse_datatype_json_string(self._jdf.schema().json())    197
return self._schema    198
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_string(json_string)    519     >>>
check_datatype(structtype_with_udt)    520     """--> 521     return
_parse_datatype_json_value(json.loads(json_string))    522     523
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_value(json_value)    539         tpe =
json_value["type"]    540         if tpe in _all_complex_types:--> 541
            return _all_complex_types[tpe].fromJson(json_value)    542
        elif tpe == 'udt':    543             return
UserDefinedType.fromJson(json_value)
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json)    386     @classmethod    387     def fromJson(cls, json):-->
388         return StructType([StructField.fromJson(f) for f in
json["fields"]])    389     390
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json)    347     def fromJson(cls, json):    348         return
StructField(json["name"],--> 349
_parse_datatype_json_value(json["type"]),    350
     json["nullable"],    351
json["metadata"])
/home/datasci/src/spark/python/pyspark/sql/types.pyc in
_parse_datatype_json_value(json_value)    541             return
_all_complex_types[tpe].fromJson(json_value)    542         elif tpe
== 'udt':--> 543             return
UserDefinedType.fromJson(json_value)    544         else:    545
      raise ValueError("not supported type: %s" % tpe)
/home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
json)    453         pyModule = pyUDT[:split]    454         pyClass =
pyUDT[split+1:]--> 455         m = __import__(pyModule, globals(),
locals(), [pyClass])    456         UDT = getattr(m, pyClass)    457
      return UDT()
TypeError: Item in ``from list'' not a string

Re: Pyspark DataFrame TypeError

Posted by "Prabeesh K." <pr...@gmail.com>.

Thanks for the reply. after rebuild now it looks good.

On 8 September 2015 at 22:38, Davies Liu <da...@databricks.com> wrote:

> I tried with Python 2.7/3.4 and Spark 1.4.1/1.5-RC3, they all work as
> expected:
>
> ```
> >>> from pyspark.mllib.linalg import Vectors
> >>> df = sqlContext.createDataFrame([(1.0, Vectors.dense([1.0])), (0.0,
> Vectors.sparse(1, [], []))], ["label", "featuers"])
> >>> df.show()
> +-----+---------+
> |label| featuers|
> +-----+---------+
> |  1.0|    [1.0]|
> |  0.0|(1,[],[])|
> +-----+---------+
>
> >>> df.columns
> ['label', 'featuers']
> ```
>
> On Tue, Sep 8, 2015 at 1:45 AM, Prabeesh K. <pr...@gmail.com> wrote:
> > I am trying to run the code RandomForestClassifier example in the PySpark
> > 1.4.1 documentation,
> >
> https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier
> .
> >
> > Below is screen shot of ipython notebook
> >
> >
> >
> > But for df.columns. It shows following error.
> >
> >
> > TypeError                                 Traceback (most recent call
> last)
> > <ipython-input-79-6a4642092433> in <module>()
> > ----> 1 df.columns
> >
> > /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in columns(self)
> >     484         ['age', 'name']
> >     485         """
> > --> 486         return [f.name for f in self.schema.fields]
> >     487
> >     488     @ignore_unicode_prefix
> >
> > /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in schema(self)
> >     194         """
> >     195         if self._schema is None:
> > --> 196             self._schema =
> > _parse_datatype_json_string(self._jdf.schema().json())
> >     197         return self._schema
> >     198
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_string(json_string)
> >     519     >>> check_datatype(structtype_with_udt)
> >     520     """
> > --> 521     return _parse_datatype_json_value(json.loads(json_string))
> >     522
> >     523
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_value(json_value)
> >     539         tpe = json_value["type"]
> >     540         if tpe in _all_complex_types:
> > --> 541             return _all_complex_types[tpe].fromJson(json_value)
> >     542         elif tpe == 'udt':
> >     543             return UserDefinedType.fromJson(json_value)
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> >     386     @classmethod
> >     387     def fromJson(cls, json):
> > --> 388         return StructType([StructField.fromJson(f) for f in
> > json["fields"]])
> >     389
> >     390
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> >     347     def fromJson(cls, json):
> >     348         return StructField(json["name"],
> > --> 349
> _parse_datatype_json_value(json["type"]),
> >     350                            json["nullable"],
> >     351                            json["metadata"])
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> > _parse_datatype_json_value(json_value)
> >     541             return _all_complex_types[tpe].fromJson(json_value)
> >     542         elif tpe == 'udt':
> > --> 543             return UserDefinedType.fromJson(json_value)
> >     544         else:
> >     545             raise ValueError("not supported type: %s" % tpe)
> >
> > /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls,
> json)
> >     453         pyModule = pyUDT[:split]
> >     454         pyClass = pyUDT[split+1:]
> > --> 455         m = __import__(pyModule, globals(), locals(), [pyClass])
> >     456         UDT = getattr(m, pyClass)
> >     457         return UDT()
> >
> > TypeError: Item in ``from list'' not a string
> >
> >
> >
> >
> >
>

Re: Pyspark DataFrame TypeError

Posted by Davies Liu <da...@databricks.com>.

I tried with Python 2.7/3.4 and Spark 1.4.1/1.5-RC3, they all work as expected:

```
>>> from pyspark.mllib.linalg import Vectors
>>> df = sqlContext.createDataFrame([(1.0, Vectors.dense([1.0])), (0.0, Vectors.sparse(1, [], []))], ["label", "featuers"])
>>> df.show()
+-----+---------+
|label| featuers|
+-----+---------+
|  1.0|    [1.0]|
|  0.0|(1,[],[])|
+-----+---------+

>>> df.columns
['label', 'featuers']
```

On Tue, Sep 8, 2015 at 1:45 AM, Prabeesh K. <pr...@gmail.com> wrote:
> I am trying to run the code RandomForestClassifier example in the PySpark
> 1.4.1 documentation,
> https://spark.apache.org/docs/1.4.1/api/python/pyspark.ml.html#pyspark.ml.classification.RandomForestClassifier.
>
> Below is screen shot of ipython notebook
>
>
>
> But for df.columns. It shows following error.
>
>
> TypeError                                 Traceback (most recent call last)
> <ipython-input-79-6a4642092433> in <module>()
> ----> 1 df.columns
>
> /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in columns(self)
>     484         ['age', 'name']
>     485         """
> --> 486         return [f.name for f in self.schema.fields]
>     487
>     488     @ignore_unicode_prefix
>
> /home/datasci/src/spark/python/pyspark/sql/dataframe.pyc in schema(self)
>     194         """
>     195         if self._schema is None:
> --> 196             self._schema =
> _parse_datatype_json_string(self._jdf.schema().json())
>     197         return self._schema
>     198
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_string(json_string)
>     519     >>> check_datatype(structtype_with_udt)
>     520     """
> --> 521     return _parse_datatype_json_value(json.loads(json_string))
>     522
>     523
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_value(json_value)
>     539         tpe = json_value["type"]
>     540         if tpe in _all_complex_types:
> --> 541             return _all_complex_types[tpe].fromJson(json_value)
>     542         elif tpe == 'udt':
>     543             return UserDefinedType.fromJson(json_value)
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
>     386     @classmethod
>     387     def fromJson(cls, json):
> --> 388         return StructType([StructField.fromJson(f) for f in
> json["fields"]])
>     389
>     390
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
>     347     def fromJson(cls, json):
>     348         return StructField(json["name"],
> --> 349                            _parse_datatype_json_value(json["type"]),
>     350                            json["nullable"],
>     351                            json["metadata"])
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in
> _parse_datatype_json_value(json_value)
>     541             return _all_complex_types[tpe].fromJson(json_value)
>     542         elif tpe == 'udt':
> --> 543             return UserDefinedType.fromJson(json_value)
>     544         else:
>     545             raise ValueError("not supported type: %s" % tpe)
>
> /home/datasci/src/spark/python/pyspark/sql/types.pyc in fromJson(cls, json)
>     453         pyModule = pyUDT[:split]
>     454         pyClass = pyUDT[split+1:]
> --> 455         m = __import__(pyModule, globals(), locals(), [pyClass])
>     456         UDT = getattr(m, pyClass)
>     457         return UDT()
>
> TypeError: Item in ``from list'' not a string
>
>
>
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@spark.apache.org
For additional commands, e-mail: dev-help@spark.apache.org