You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Cristian Garcia <cg...@gmail.com> on 2017/07/29 19:57:13 UTC
ALSModel.load not working on pyspark 2.1.0
This code is not working:
================
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS, ALSModel
from pyspark.sql import Row
als = ALS(maxIter=10, regParam=0.01, userCol="user_id", itemCol="movie_id",
ratingCol="rating")
model = als.fit(training)
model.save("/models/als")
model2 = ALSModel.load("/models/als") # <-- error here
=================
Gives rise to this error:
=================
---------------------------------------------------------------------------Py4JJavaError
Traceback (most recent call
last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 =
ALSModel.load("/models/als")
/usr/local/spark/python/pyspark/ml/util.py in load(cls, path) 251
def load(cls, path): 252 """Reads an ML instance from the
input path, a shortcut of `read().load(path)`."""--> 253
return cls.read().load(path) 254 255
/usr/local/spark/python/pyspark/ml/util.py in load(self, path) 192
if not isinstance(path, basestring): 193 raise
TypeError("path should be a basestring, got type %s" % type(path))-->
194 java_obj = self._jread.load(path) 195 if not
hasattr(self._clazz, "_from_java"): 196 raise
NotImplementedError("This Java ML type cannot be loaded into Python
currently: %r"
/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py
in __call__(self, *args) 1131 answer =
self.gateway_client.send_command(command) 1132 return_value
= get_return_value(-> 1133 answer, self.gateway_client,
self.target_id, self.name) 1134 1135 for temp_arg in
temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61
def deco(*a, **kw): 62 try:---> 63 return
f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name) 317
raise Py4JJavaError( 318 "An error
occurred while calling {0}{1}{2}.\n".--> 319
format(target_id, ".", name), value) 320 else: 321
raise Py4JError(
Py4JJavaError: An error occurred while calling o337.load.
: java.lang.UnsupportedOperationException: empty collection
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
=================
Re: ALSModel.load not working on pyspark 2.1.0
Posted by Cristian Garcia <cg...@gmail.com>.
Thanks Irving,
The problem was that I was using spark in cluster mode and had to resort to
HDFS to properly save/load the model.
On Mon, Jul 31, 2017 at 9:09 AM Irving Duran <ir...@gmail.com> wrote:
> I think the problem is because you are calling "model2 =
> ALSModel.load("/models/als")" instead of "model2 = *model*.load("/models/als")".
> See my working sample below.
>
> >>> model.save('/models/als.test')
> SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
> SLF4J: Defaulting to no-operation (NOP) logger implementation
> SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further
> details.
> >>> model2 = model.load('/models/als.test')
> >>> model
> ALS_4324a1082d889dd1f0e4
> >>> model2
> ALS_4324a1082d889dd1f0e4
>
>
> Thank You,
>
> Irving Duran
>
> On Sat, Jul 29, 2017 at 2:57 PM, Cristian Garcia <cg...@gmail.com>
> wrote:
>
>> This code is not working:
>>
>> ================
>> from pyspark.ml.evaluation import RegressionEvaluator
>> from pyspark.ml.recommendation import ALS, ALSModel
>> from pyspark.sql import Row
>>
>> als = ALS(maxIter=10, regParam=0.01, userCol="user_id",
>> itemCol="movie_id", ratingCol="rating")
>> model = als.fit(training)
>>
>> model.save("/models/als")
>>
>> model2 = ALSModel.load("/models/als") # <-- error here
>> =================
>>
>>
>>
>> Gives rise to this error:
>> =================
>>
>> ---------------------------------------------------------------------------Py4JJavaError Traceback (most recent call last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 = ALSModel.load("/models/als")
>> /usr/local/spark/python/pyspark/ml/util.py in load(cls, path) 251 def load(cls, path): 252 """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""--> 253 return cls.read().load(path) 254 255
>> /usr/local/spark/python/pyspark/ml/util.py in load(self, path) 192 if not isinstance(path, basestring): 193 raise TypeError("path should be a basestring, got type %s" % type(path))--> 194 java_obj = self._jread.load(path) 195 if not hasattr(self._clazz, "_from_java"): 196 raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
>> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args) 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value(-> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args:
>> /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try:---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString()
>> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 317 raise Py4JJavaError( 318 "An error occurred while calling {0}{1}{2}.\n".--> 319 format(target_id, ".", name), value) 320 else: 321 raise Py4JError(
>> Py4JJavaError: An error occurred while calling o337.load.
>> : java.lang.UnsupportedOperationException: empty collection
>> at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
>> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>> at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
>> at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
>> at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
>> at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:498)
>> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> at py4j.Gateway.invoke(Gateway.java:280)
>> at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> at py4j.GatewayConnection.run(GatewayConnection.java:214)
>> at java.lang.Thread.run(Thread.java:748)
>>
>> =================
>>
>
>
Re: ALSModel.load not working on pyspark 2.1.0
Posted by Irving Duran <ir...@gmail.com>.
I think the problem is because you are calling "model2 =
ALSModel.load("/models/als")" instead of "model2 =
*model*.load("/models/als")".
See my working sample below.
>>> model.save('/models/als.test')
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further
details.
>>> model2 = model.load('/models/als.test')
>>> model
ALS_4324a1082d889dd1f0e4
>>> model2
ALS_4324a1082d889dd1f0e4
Thank You,
Irving Duran
On Sat, Jul 29, 2017 at 2:57 PM, Cristian Garcia <cg...@gmail.com>
wrote:
> This code is not working:
>
> ================
> from pyspark.ml.evaluation import RegressionEvaluator
> from pyspark.ml.recommendation import ALS, ALSModel
> from pyspark.sql import Row
>
> als = ALS(maxIter=10, regParam=0.01, userCol="user_id",
> itemCol="movie_id", ratingCol="rating")
> model = als.fit(training)
>
> model.save("/models/als")
>
> model2 = ALSModel.load("/models/als") # <-- error here
> =================
>
>
>
> Gives rise to this error:
> =================
>
> ---------------------------------------------------------------------------Py4JJavaError Traceback (most recent call last)<ipython-input-24-c0454f47bb1d> in <module>()----> 1 m2 = ALSModel.load("/models/als")
> /usr/local/spark/python/pyspark/ml/util.py in load(cls, path) 251 def load(cls, path): 252 """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""--> 253 return cls.read().load(path) 254 255
> /usr/local/spark/python/pyspark/ml/util.py in load(self, path) 192 if not isinstance(path, basestring): 193 raise TypeError("path should be a basestring, got type %s" % type(path))--> 194 java_obj = self._jread.load(path) 195 if not hasattr(self._clazz, "_from_java"): 196 raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args) 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value(-> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args:
> /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try:---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString()
> /usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 317 raise Py4JJavaError( 318 "An error occurred while calling {0}{1}{2}.\n".--> 319 format(target_id, ".", name), value) 320 else: 321 raise Py4JError(
> Py4JJavaError: An error occurred while calling o337.load.
> : java.lang.UnsupportedOperationException: empty collection
> at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1370)
> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
> at org.apache.spark.rdd.RDD.first(RDD.scala:1367)
> at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:379)
> at org.apache.spark.ml.recommendation.ALSModel$ALSModelReader.load(ALS.scala:317)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
> at py4j.Gateway.invoke(Gateway.java:280)
> at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
> at py4j.commands.CallCommand.execute(CallCommand.java:79)
> at py4j.GatewayConnection.run(GatewayConnection.java:214)
> at java.lang.Thread.run(Thread.java:748)
>
> =================
>