You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Cristian Garcia <cg...@gmail.com> on 2017/07/29 19:57:13 UTC

ALSModel.load not working on pyspark 2.1.0

This code is not working:

================
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS, ALSModel
from pyspark.sql import Row

als = ALS(maxIter=10, regParam=0.01, userCol="user_id", itemCol="movie_id",
ratingCol="rating")
model = als.fit(training)

model.save("/models/als")

model2 = ALSModel.load("/models/als") # <-- error here
=================



Gives rise to this error:
=================

---------------------------------------------------------------------------Py4JJavaError
                            Traceback (most recent call
last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 =
ALSModel.load("/models/als")
/usr/local/spark/python/pyspark/ml/util.py in load(cls, path)    251
  def load(cls, path):    252         """Reads an ML instance from the
input path, a shortcut of `read().load(path)`."""--> 253
return cls.read().load(path)    254     255
/usr/local/spark/python/pyspark/ml/util.py in load(self, path)    192
       if not isinstance(path, basestring):    193             raise
TypeError("path should be a basestring, got type %s" % type(path))-->
194         java_obj = self._jread.load(path)    195         if not
hasattr(self._clazz, "_from_java"):    196             raise
NotImplementedError("This Java ML type cannot be loaded into Python
currently: %r"
/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py
in __call__(self, *args)   1131         answer =
self.gateway_client.send_command(command)   1132         return_value
= get_return_value(-> 1133             answer, self.gateway_client,
self.target_id, self.name)   1134    1135         for temp_arg in
temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)     61
   def deco(*a, **kw):     62         try:---> 63             return
f(*a, **kw)     64         except py4j.protocol.Py4JJavaError as e:
 65             s = e.java_exception.toString()
/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name)    317
          raise Py4JJavaError(    318                     "An error
occurred while calling {0}{1}{2}.\n".--> 319
format(target_id, ".", name), value)    320             else:    321
              raise Py4JError(
Py4JJavaError: An error occurred while calling o337.load.
: java.lang.UnsupportedOperationException: empty collection
	at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
	at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
	at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
	at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)

=================

Re: ALSModel.load not working on pyspark 2.1.0

Posted by Cristian Garcia <cg...@gmail.com>.
Thanks Irving,

The problem was that I was using spark in cluster mode and had to resort to
HDFS to properly save/load the model.

On Mon, Jul 31, 2017 at 9:09 AM Irving Duran <ir...@gmail.com> wrote:

> I think the problem is because you are calling "model2 =
> ALSModel.load("/models/als")" instead of "model2 = *model*.load("/models/als")".
> See my working sample below.
>
> >>> model.save('/models/als.test')
> SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
> SLF4J: Defaulting to no-operation (NOP) logger implementation
> SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further
> details.
> >>> model2 = model.load('/models/als.test')
> >>> model
> ALS_4324a1082d889dd1f0e4
> >>> model2
> ALS_4324a1082d889dd1f0e4
>
>
> Thank You,
>
> Irving Duran
>
> On Sat, Jul 29, 2017 at 2:57 PM, Cristian Garcia <cg...@gmail.com>
> wrote:
>
>> This code is not working:
>>
>> ================
>> from pyspark.ml.evaluation import RegressionEvaluator
>> from pyspark.ml.recommendation import ALS, ALSModel
>> from pyspark.sql import Row
>>
>> als = ALS(maxIter=10, regParam=0.01, userCol="user_id",
>> itemCol="movie_id", ratingCol="rating")
>> model = als.fit(training)
>>
>> model.save("/models/als")
>>
>> model2 = ALSModel.load("/models/als") # <-- error here
>> =================
>>
>>
>>
>> Gives rise to this error:
>> =================
>>
>> ---------------------------------------------------------------------------Py4JJavaError                             Traceback (most recent call last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 = ALSModel.load("/models/als")
>> /usr/local/spark/python/pyspark/ml/util.py in load(cls, path)    251     def load(cls, path):    252         """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""--> 253         return cls.read().load(path)    254     255
>> /usr/local/spark/python/pyspark/ml/util.py in load(self, path)    192         if not isinstance(path, basestring):    193             raise TypeError("path should be a basestring, got type %s" % type(path))--> 194         java_obj = self._jread.load(path)    195         if not hasattr(self._clazz, "_from_java"):    196             raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
>> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)   1131         answer = self.gateway_client.send_command(command)   1132         return_value = get_return_value(-> 1133             answer, self.gateway_client, self.target_id, self.name)   1134    1135         for temp_arg in temp_args:
>> /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)     61     def deco(*a, **kw):     62         try:---> 63             return f(*a, **kw)     64         except py4j.protocol.Py4JJavaError as e:     65             s = e.java_exception.toString()
>> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)    317                 raise Py4JJavaError(    318                     "An error occurred while calling {0}{1}{2}.\n".--> 319                     format(target_id, ".", name), value)    320             else:    321                 raise Py4JError(
>> Py4JJavaError: An error occurred while calling o337.load.
>> : java.lang.UnsupportedOperationException: empty collection
>> 	at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
>> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>> 	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
>> 	at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
>> 	at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
>> 	at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> 	at java.lang.reflect.Method.invoke(Method.java:498)
>> 	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> 	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> 	at py4j.Gateway.invoke(Gateway.java:280)
>> 	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> 	at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> 	at py4j.GatewayConnection.run(GatewayConnection.java:214)
>> 	at java.lang.Thread.run(Thread.java:748)
>>
>> =================
>>
>
>

Re: ALSModel.load not working on pyspark 2.1.0

Posted by Irving Duran <ir...@gmail.com>.
I think the problem is because you are calling "model2 =
ALSModel.load("/models/als")" instead of "model2 =
*model*.load("/models/als")".
See my working sample below.

>>> model.save('/models/als.test')
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further
details.
>>> model2 = model.load('/models/als.test')
>>> model
ALS_4324a1082d889dd1f0e4
>>> model2
ALS_4324a1082d889dd1f0e4


Thank You,

Irving Duran

On Sat, Jul 29, 2017 at 2:57 PM, Cristian Garcia <cg...@gmail.com>
wrote:

> This code is not working:
>
> ================
> from pyspark.ml.evaluation import RegressionEvaluator
> from pyspark.ml.recommendation import ALS, ALSModel
> from pyspark.sql import Row
>
> als = ALS(maxIter=10, regParam=0.01, userCol="user_id",
> itemCol="movie_id", ratingCol="rating")
> model = als.fit(training)
>
> model.save("/models/als")
>
> model2 = ALSModel.load("/models/als") # <-- error here
> =================
>
>
>
> Gives rise to this error:
> =================
>
> ---------------------------------------------------------------------------Py4JJavaError                             Traceback (most recent call last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 = ALSModel.load("/models/als")
> /usr/local/spark/python/pyspark/ml/util.py in load(cls, path)    251     def load(cls, path):    252         """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""--> 253         return cls.read().load(path)    254     255
> /usr/local/spark/python/pyspark/ml/util.py in load(self, path)    192         if not isinstance(path, basestring):    193             raise TypeError("path should be a basestring, got type %s" % type(path))--> 194         java_obj = self._jread.load(path)    195         if not hasattr(self._clazz, "_from_java"):    196             raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)   1131         answer = self.gateway_client.send_command(command)   1132         return_value = get_return_value(-> 1133             answer, self.gateway_client, self.target_id, self.name)   1134    1135         for temp_arg in temp_args:
> /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)     61     def deco(*a, **kw):     62         try:---> 63             return f(*a, **kw)     64         except py4j.protocol.Py4JJavaError as e:     65             s = e.java_exception.toString()
> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)    317                 raise Py4JJavaError(    318                     "An error occurred while calling {0}{1}{2}.\n".--> 319                     format(target_id, ".", name), value)    320             else:    321                 raise Py4JError(
> Py4JJavaError: An error occurred while calling o337.load.
> : java.lang.UnsupportedOperationException: empty collection
> 	at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> 	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
> 	at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
> 	at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
> 	at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> 	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> 	at py4j.Gateway.invoke(Gateway.java:280)
> 	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> 	at py4j.commands.CallCommand.execute(CallCommand.java:79)
> 	at py4j.GatewayConnection.run(GatewayConnection.java:214)
> 	at java.lang.Thread.run(Thread.java:748)
>
> =================
>