You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hadoop.apache.org by mw <mw...@plista.com> on 2015/01/09 10:30:01 UTC
Using Mahout 1.0-SNAPSHOT with yarn cluster
Hello,
i am working on a web application that should execute lda on a external
yarn cluster.
I am uploading all the relevant sequence files onto the yarn cluter.
This is how it try to remotely execute lda on the cluster.
try {
ugi.doAs(new PrivilegedExceptionAction<Void>() {
public Void run() throws Exception {
Configuration hdoopConf = new Configuration();
hdoopConf.set("fs.defaultFS",
"hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
hdoopConf.set("yarn.resourcemanager.hostname",
"xxx.xxx.xxx.xxx");
hdoopConf.set("mapreduce.framework.name", "yarn");
hdoopConf.set("mapred.framework.name", "yarn");
hdoopConf.set("mapred.job.tracker",
"xxx.xxx.xxx.xxx");
hdoopConf.set("dfs.permissions.enabled", "false");
hdoopConf.set("hadoop.job.ugi", "xx");
hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
CVB0Driver driver = new CVB0Driver();
try {
driver.run(hdoopConf,
sparseVectorIn.suffix("/matrix"),
topicsOut, k, numTerms,
doc_topic_smoothening, term_topic_smoothening,
maxIter, iteration_block_size,
convergenceDelta,
sparseVectorIn.suffix("/dictionary.file-0"),
topicsOut.suffix("/DocumentTopics/"), sparseVectorIn,
seed, testFraction, numTrainThreads,
numUpdateThreads, maxItersPerDoc,
numReduceTasks, backfillPerplexity);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return null;
}
});
} catch (InterruptedException e) {
e.printStackTrace();
}
I am getting the following error message:
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:344)
at
org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
at
org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
at
org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
at
org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:344)
at
org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
at
org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
at
org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
at
org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:344)
at
org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
at
org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
at
org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
at
org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:344)
at
org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
at
org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
at
org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
at
org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
java.lang.InterruptedException: Failed to complete iteration 1 stage 1
at
org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
at
org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
...
I was told on the user-mahout mailing list that the lda jar mahout sents
to yarn should contain all relevant classes. Any idea why this error is
thrown nevertheless?
Best,
Max
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
According to http://www.java2s.com/Code/Jar/m/Downloadmahoutmath07jar.htm ,
Vector class is in mahout-math- jar.
You can pass mahout-math- jar location through "tmpjars" config as shown
below.
Cheers
On Fri, Jan 9, 2015 at 7:59 AM, mw <mw...@plista.com> wrote:
> Ty, i figured out that the jar file that the CVBODriver of mahout sents
> to yarn does not contain that class org.apache.mahout.math.Vector.
> Is there a way to include it into the jar throught the config or maybe
> another way?
>
> Best,
> Max
>
>
> On 01/09/2015 04:23 PM, Ted Yu wrote:
>
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
>
>> In your driver, you can call:
>>
>> job.setJarByClass(theclass.class);
>>
>> where theclass can be one of the classes in mahout jar.
>>
>> FYI
>>
>> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>>
>>> Hello,
>>>
>>> i am working on a web application that should execute lda on a external
>>> yarn cluster.
>>>
>>> I am uploading all the relevant sequence files onto the yarn cluter.
>>> This is how it try to remotely execute lda on the cluster.
>>>
>>> try {
>>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>>> public Void run() throws Exception {
>>> Configuration hdoopConf = new Configuration();
>>> hdoopConf.set("fs.defaultFS",
>>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>>> hdoopConf.set("yarn.resourcemanager.hostname",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("mapreduce.framework.name", "yarn");
>>> hdoopConf.set("mapred.framework.name", "yarn");
>>> hdoopConf.set("mapred.job.tracker",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("dfs.permissions.enabled", "false");
>>> hdoopConf.set("hadoop.job.ugi", "xx");
>>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>>> CVB0Driver driver = new CVB0Driver();
>>> try {
>>> driver.run(hdoopConf,
>>> sparseVectorIn.suffix("/matrix"),
>>> topicsOut, k, numTerms,
>>> doc_topic_smoothening, term_topic_smoothening,
>>> maxIter, iteration_block_size,
>>> convergenceDelta,
>>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>>> */DocumentTopics/*"), sparseVectorIn,
>>> seed, testFraction, numTrainThreads,
>>> numUpdateThreads, maxItersPerDoc,
>>> numReduceTasks, backfillPerplexity);
>>> } catch (ClassNotFoundException e) {
>>> e.printStackTrace();
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>> return null;
>>> }
>>> });
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>>
>>> I am getting the following error message:
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>>> ...
>>>
>>> I was told on the user-mahout mailing list that the lda jar mahout sents
>>> to yarn should contain all relevant classes. Any idea why this error is
>>> thrown nevertheless?
>>>
>>> Best,
>>> Max
>>>
>>
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
According to http://www.java2s.com/Code/Jar/m/Downloadmahoutmath07jar.htm ,
Vector class is in mahout-math- jar.
You can pass mahout-math- jar location through "tmpjars" config as shown
below.
Cheers
On Fri, Jan 9, 2015 at 7:59 AM, mw <mw...@plista.com> wrote:
> Ty, i figured out that the jar file that the CVBODriver of mahout sents
> to yarn does not contain that class org.apache.mahout.math.Vector.
> Is there a way to include it into the jar throught the config or maybe
> another way?
>
> Best,
> Max
>
>
> On 01/09/2015 04:23 PM, Ted Yu wrote:
>
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
>
>> In your driver, you can call:
>>
>> job.setJarByClass(theclass.class);
>>
>> where theclass can be one of the classes in mahout jar.
>>
>> FYI
>>
>> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>>
>>> Hello,
>>>
>>> i am working on a web application that should execute lda on a external
>>> yarn cluster.
>>>
>>> I am uploading all the relevant sequence files onto the yarn cluter.
>>> This is how it try to remotely execute lda on the cluster.
>>>
>>> try {
>>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>>> public Void run() throws Exception {
>>> Configuration hdoopConf = new Configuration();
>>> hdoopConf.set("fs.defaultFS",
>>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>>> hdoopConf.set("yarn.resourcemanager.hostname",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("mapreduce.framework.name", "yarn");
>>> hdoopConf.set("mapred.framework.name", "yarn");
>>> hdoopConf.set("mapred.job.tracker",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("dfs.permissions.enabled", "false");
>>> hdoopConf.set("hadoop.job.ugi", "xx");
>>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>>> CVB0Driver driver = new CVB0Driver();
>>> try {
>>> driver.run(hdoopConf,
>>> sparseVectorIn.suffix("/matrix"),
>>> topicsOut, k, numTerms,
>>> doc_topic_smoothening, term_topic_smoothening,
>>> maxIter, iteration_block_size,
>>> convergenceDelta,
>>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>>> */DocumentTopics/*"), sparseVectorIn,
>>> seed, testFraction, numTrainThreads,
>>> numUpdateThreads, maxItersPerDoc,
>>> numReduceTasks, backfillPerplexity);
>>> } catch (ClassNotFoundException e) {
>>> e.printStackTrace();
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>> return null;
>>> }
>>> });
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>>
>>> I am getting the following error message:
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>>> ...
>>>
>>> I was told on the user-mahout mailing list that the lda jar mahout sents
>>> to yarn should contain all relevant classes. Any idea why this error is
>>> thrown nevertheless?
>>>
>>> Best,
>>> Max
>>>
>>
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
According to http://www.java2s.com/Code/Jar/m/Downloadmahoutmath07jar.htm ,
Vector class is in mahout-math- jar.
You can pass mahout-math- jar location through "tmpjars" config as shown
below.
Cheers
On Fri, Jan 9, 2015 at 7:59 AM, mw <mw...@plista.com> wrote:
> Ty, i figured out that the jar file that the CVBODriver of mahout sents
> to yarn does not contain that class org.apache.mahout.math.Vector.
> Is there a way to include it into the jar throught the config or maybe
> another way?
>
> Best,
> Max
>
>
> On 01/09/2015 04:23 PM, Ted Yu wrote:
>
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
>
>> In your driver, you can call:
>>
>> job.setJarByClass(theclass.class);
>>
>> where theclass can be one of the classes in mahout jar.
>>
>> FYI
>>
>> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>>
>>> Hello,
>>>
>>> i am working on a web application that should execute lda on a external
>>> yarn cluster.
>>>
>>> I am uploading all the relevant sequence files onto the yarn cluter.
>>> This is how it try to remotely execute lda on the cluster.
>>>
>>> try {
>>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>>> public Void run() throws Exception {
>>> Configuration hdoopConf = new Configuration();
>>> hdoopConf.set("fs.defaultFS",
>>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>>> hdoopConf.set("yarn.resourcemanager.hostname",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("mapreduce.framework.name", "yarn");
>>> hdoopConf.set("mapred.framework.name", "yarn");
>>> hdoopConf.set("mapred.job.tracker",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("dfs.permissions.enabled", "false");
>>> hdoopConf.set("hadoop.job.ugi", "xx");
>>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>>> CVB0Driver driver = new CVB0Driver();
>>> try {
>>> driver.run(hdoopConf,
>>> sparseVectorIn.suffix("/matrix"),
>>> topicsOut, k, numTerms,
>>> doc_topic_smoothening, term_topic_smoothening,
>>> maxIter, iteration_block_size,
>>> convergenceDelta,
>>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>>> */DocumentTopics/*"), sparseVectorIn,
>>> seed, testFraction, numTrainThreads,
>>> numUpdateThreads, maxItersPerDoc,
>>> numReduceTasks, backfillPerplexity);
>>> } catch (ClassNotFoundException e) {
>>> e.printStackTrace();
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>> return null;
>>> }
>>> });
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>>
>>> I am getting the following error message:
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>>> ...
>>>
>>> I was told on the user-mahout mailing list that the lda jar mahout sents
>>> to yarn should contain all relevant classes. Any idea why this error is
>>> thrown nevertheless?
>>>
>>> Best,
>>> Max
>>>
>>
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
According to http://www.java2s.com/Code/Jar/m/Downloadmahoutmath07jar.htm ,
Vector class is in mahout-math- jar.
You can pass mahout-math- jar location through "tmpjars" config as shown
below.
Cheers
On Fri, Jan 9, 2015 at 7:59 AM, mw <mw...@plista.com> wrote:
> Ty, i figured out that the jar file that the CVBODriver of mahout sents
> to yarn does not contain that class org.apache.mahout.math.Vector.
> Is there a way to include it into the jar throught the config or maybe
> another way?
>
> Best,
> Max
>
>
> On 01/09/2015 04:23 PM, Ted Yu wrote:
>
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
>
>> In your driver, you can call:
>>
>> job.setJarByClass(theclass.class);
>>
>> where theclass can be one of the classes in mahout jar.
>>
>> FYI
>>
>> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>>
>>> Hello,
>>>
>>> i am working on a web application that should execute lda on a external
>>> yarn cluster.
>>>
>>> I am uploading all the relevant sequence files onto the yarn cluter.
>>> This is how it try to remotely execute lda on the cluster.
>>>
>>> try {
>>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>>> public Void run() throws Exception {
>>> Configuration hdoopConf = new Configuration();
>>> hdoopConf.set("fs.defaultFS",
>>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>>> hdoopConf.set("yarn.resourcemanager.hostname",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("mapreduce.framework.name", "yarn");
>>> hdoopConf.set("mapred.framework.name", "yarn");
>>> hdoopConf.set("mapred.job.tracker",
>>> "xxx.xxx.xxx.xxx");
>>> hdoopConf.set("dfs.permissions.enabled", "false");
>>> hdoopConf.set("hadoop.job.ugi", "xx");
>>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>>> CVB0Driver driver = new CVB0Driver();
>>> try {
>>> driver.run(hdoopConf,
>>> sparseVectorIn.suffix("/matrix"),
>>> topicsOut, k, numTerms,
>>> doc_topic_smoothening, term_topic_smoothening,
>>> maxIter, iteration_block_size,
>>> convergenceDelta,
>>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>>> */DocumentTopics/*"), sparseVectorIn,
>>> seed, testFraction, numTrainThreads,
>>> numUpdateThreads, maxItersPerDoc,
>>> numReduceTasks, backfillPerplexity);
>>> } catch (ClassNotFoundException e) {
>>> e.printStackTrace();
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>> return null;
>>> }
>>> });
>>> } catch (InterruptedException e) {
>>> e.printStackTrace();
>>> }
>>>
>>> I am getting the following error message:
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>>> at java.lang.Class.forName0(Native Method)
>>> at java.lang.Class.forName(Class.java:344)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>>> at
>>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>>> at
>>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>>> at
>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>>> at
>>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>>> at
>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>>> at java.security.AccessController.doPrivileged(Native Method)
>>> at javax.security.auth.Subject.doAs(Subject.java:422)
>>> at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>>
>>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>>> at
>>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>>> ...
>>>
>>> I was told on the user-mahout mailing list that the lda jar mahout sents
>>> to yarn should contain all relevant classes. Any idea why this error is
>>> thrown nevertheless?
>>>
>>> Best,
>>> Max
>>>
>>
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
Ty, i figured out that the jar file that the CVBODriver of mahout sents
to yarn does not contain that class org.apache.mahout.math.Vector.
Is there a way to include it into the jar throught the config or maybe
another way?
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
I got it working. I took me a moment to figure out that tmpjars is
actually an attribute :)
Thank you very much!
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
Ty, i figured out that the jar file that the CVBODriver of mahout sents
to yarn does not contain that class org.apache.mahout.math.Vector.
Is there a way to include it into the jar throught the config or maybe
another way?
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
Ty, i figured out that the jar file that the CVBODriver of mahout sents
to yarn does not contain that class org.apache.mahout.math.Vector.
Is there a way to include it into the jar throught the config or maybe
another way?
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
I got it working. I took me a moment to figure out that tmpjars is
actually an attribute :)
Thank you very much!
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
I got it working. I took me a moment to figure out that tmpjars is
actually an attribute :)
Thank you very much!
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
Ty, i figured out that the jar file that the CVBODriver of mahout sents
to yarn does not contain that class org.apache.mahout.math.Vector.
Is there a way to include it into the jar throught the config or maybe
another way?
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by mw <mw...@plista.com>.
I got it working. I took me a moment to figure out that tmpjars is
actually an attribute :)
Thank you very much!
Best,
Max
On 01/09/2015 04:23 PM, Ted Yu wrote:
> Pardon me, setJarByClass() should point to your class.
> You can pass comma separated list of jars (including mahout jar) this way:
>
> conf.set("tmpjars", list-of-jars);
>
> Cheers
>
>
> On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yuzhihong@gmail.com
> <ma...@gmail.com>> wrote:
>
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw@plista.com
> <ma...@plista.com>> wrote:
>
> Hello,
>
> i am working on a web application that should execute lda on a
> external yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn
> cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new
> Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name
> <http://mapreduce.framework.name>", "yarn");
> hdoopConf.set("mapred.framework.name
> <http://mapred.framework.name>", "yarn");
> hdoopConf.set("mapred.job.tracker", "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020"
> );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"),
> topicsOut.suffix("//DocumentTopics//"), sparseVectorIn,
> seed, testFraction,
> numTrainThreads, numUpdateThreads, maxItersPerDoc,
> numReduceTasks,
> backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException:
> org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at
> org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at
> org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at
> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at
> org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1
> stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>
> ...
>
> I was told on the user-mahout mailing list that the lda jar
> mahout sents to yarn should contain all relevant classes. Any
> idea why this error is thrown nevertheless?
>
> Best,
> Max
>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
Pardon me, setJarByClass() should point to your class.
You can pass comma separated list of jars (including mahout jar) this way:
conf.set("tmpjars", list-of-jars);
Cheers
On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>
>> Hello,
>>
>> i am working on a web application that should execute lda on a external
>> yarn cluster.
>>
>> I am uploading all the relevant sequence files onto the yarn cluter.
>> This is how it try to remotely execute lda on the cluster.
>>
>> try {
>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>> public Void run() throws Exception {
>> Configuration hdoopConf = new Configuration();
>> hdoopConf.set("fs.defaultFS",
>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>> hdoopConf.set("yarn.resourcemanager.hostname",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("mapreduce.framework.name", "yarn");
>> hdoopConf.set("mapred.framework.name", "yarn");
>> hdoopConf.set("mapred.job.tracker",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("dfs.permissions.enabled", "false");
>> hdoopConf.set("hadoop.job.ugi", "xx");
>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>> CVB0Driver driver = new CVB0Driver();
>> try {
>> driver.run(hdoopConf,
>> sparseVectorIn.suffix("/matrix"),
>> topicsOut, k, numTerms,
>> doc_topic_smoothening, term_topic_smoothening,
>> maxIter, iteration_block_size,
>> convergenceDelta,
>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>> */DocumentTopics/*"), sparseVectorIn,
>> seed, testFraction, numTrainThreads,
>> numUpdateThreads, maxItersPerDoc,
>> numReduceTasks, backfillPerplexity);
>> } catch (ClassNotFoundException e) {
>> e.printStackTrace();
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>> return null;
>> }
>> });
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>>
>> I am getting the following error message:
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>> ...
>>
>> I was told on the user-mahout mailing list that the lda jar mahout sents
>> to yarn should contain all relevant classes. Any idea why this error is
>> thrown nevertheless?
>>
>> Best,
>> Max
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
Pardon me, setJarByClass() should point to your class.
You can pass comma separated list of jars (including mahout jar) this way:
conf.set("tmpjars", list-of-jars);
Cheers
On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>
>> Hello,
>>
>> i am working on a web application that should execute lda on a external
>> yarn cluster.
>>
>> I am uploading all the relevant sequence files onto the yarn cluter.
>> This is how it try to remotely execute lda on the cluster.
>>
>> try {
>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>> public Void run() throws Exception {
>> Configuration hdoopConf = new Configuration();
>> hdoopConf.set("fs.defaultFS",
>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>> hdoopConf.set("yarn.resourcemanager.hostname",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("mapreduce.framework.name", "yarn");
>> hdoopConf.set("mapred.framework.name", "yarn");
>> hdoopConf.set("mapred.job.tracker",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("dfs.permissions.enabled", "false");
>> hdoopConf.set("hadoop.job.ugi", "xx");
>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>> CVB0Driver driver = new CVB0Driver();
>> try {
>> driver.run(hdoopConf,
>> sparseVectorIn.suffix("/matrix"),
>> topicsOut, k, numTerms,
>> doc_topic_smoothening, term_topic_smoothening,
>> maxIter, iteration_block_size,
>> convergenceDelta,
>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>> */DocumentTopics/*"), sparseVectorIn,
>> seed, testFraction, numTrainThreads,
>> numUpdateThreads, maxItersPerDoc,
>> numReduceTasks, backfillPerplexity);
>> } catch (ClassNotFoundException e) {
>> e.printStackTrace();
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>> return null;
>> }
>> });
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>>
>> I am getting the following error message:
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>> ...
>>
>> I was told on the user-mahout mailing list that the lda jar mahout sents
>> to yarn should contain all relevant classes. Any idea why this error is
>> thrown nevertheless?
>>
>> Best,
>> Max
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
Pardon me, setJarByClass() should point to your class.
You can pass comma separated list of jars (including mahout jar) this way:
conf.set("tmpjars", list-of-jars);
Cheers
On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>
>> Hello,
>>
>> i am working on a web application that should execute lda on a external
>> yarn cluster.
>>
>> I am uploading all the relevant sequence files onto the yarn cluter.
>> This is how it try to remotely execute lda on the cluster.
>>
>> try {
>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>> public Void run() throws Exception {
>> Configuration hdoopConf = new Configuration();
>> hdoopConf.set("fs.defaultFS",
>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>> hdoopConf.set("yarn.resourcemanager.hostname",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("mapreduce.framework.name", "yarn");
>> hdoopConf.set("mapred.framework.name", "yarn");
>> hdoopConf.set("mapred.job.tracker",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("dfs.permissions.enabled", "false");
>> hdoopConf.set("hadoop.job.ugi", "xx");
>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>> CVB0Driver driver = new CVB0Driver();
>> try {
>> driver.run(hdoopConf,
>> sparseVectorIn.suffix("/matrix"),
>> topicsOut, k, numTerms,
>> doc_topic_smoothening, term_topic_smoothening,
>> maxIter, iteration_block_size,
>> convergenceDelta,
>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>> */DocumentTopics/*"), sparseVectorIn,
>> seed, testFraction, numTrainThreads,
>> numUpdateThreads, maxItersPerDoc,
>> numReduceTasks, backfillPerplexity);
>> } catch (ClassNotFoundException e) {
>> e.printStackTrace();
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>> return null;
>> }
>> });
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>>
>> I am getting the following error message:
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>> ...
>>
>> I was told on the user-mahout mailing list that the lda jar mahout sents
>> to yarn should contain all relevant classes. Any idea why this error is
>> thrown nevertheless?
>>
>> Best,
>> Max
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
Pardon me, setJarByClass() should point to your class.
You can pass comma separated list of jars (including mahout jar) this way:
conf.set("tmpjars", list-of-jars);
Cheers
On Fri, Jan 9, 2015 at 7:17 AM, Ted Yu <yu...@gmail.com> wrote:
> In your driver, you can call:
>
> job.setJarByClass(theclass.class);
>
> where theclass can be one of the classes in mahout jar.
>
> FYI
>
> On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
>
>> Hello,
>>
>> i am working on a web application that should execute lda on a external
>> yarn cluster.
>>
>> I am uploading all the relevant sequence files onto the yarn cluter.
>> This is how it try to remotely execute lda on the cluster.
>>
>> try {
>> ugi.doAs(new PrivilegedExceptionAction<Void>() {
>> public Void run() throws Exception {
>> Configuration hdoopConf = new Configuration();
>> hdoopConf.set("fs.defaultFS",
>> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
>> hdoopConf.set("yarn.resourcemanager.hostname",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("mapreduce.framework.name", "yarn");
>> hdoopConf.set("mapred.framework.name", "yarn");
>> hdoopConf.set("mapred.job.tracker",
>> "xxx.xxx.xxx.xxx");
>> hdoopConf.set("dfs.permissions.enabled", "false");
>> hdoopConf.set("hadoop.job.ugi", "xx");
>> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
>> CVB0Driver driver = new CVB0Driver();
>> try {
>> driver.run(hdoopConf,
>> sparseVectorIn.suffix("/matrix"),
>> topicsOut, k, numTerms,
>> doc_topic_smoothening, term_topic_smoothening,
>> maxIter, iteration_block_size,
>> convergenceDelta,
>> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
>> */DocumentTopics/*"), sparseVectorIn,
>> seed, testFraction, numTrainThreads,
>> numUpdateThreads, maxItersPerDoc,
>> numReduceTasks, backfillPerplexity);
>> } catch (ClassNotFoundException e) {
>> e.printStackTrace();
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>> return null;
>> }
>> });
>> } catch (InterruptedException e) {
>> e.printStackTrace();
>> }
>>
>> I am getting the following error message:
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
>> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
>> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
>> at java.lang.Class.forName0(Native Method)
>> at java.lang.Class.forName(Class.java:344)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
>> at
>> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
>> at
>> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
>> at
>> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
>> at
>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
>> at
>> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
>> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
>> at
>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
>> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
>> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
>> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
>> at java.security.AccessController.doPrivileged(Native Method)
>> at javax.security.auth.Subject.doAs(Subject.java:422)
>> at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
>> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>>
>> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
>> at
>> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
>> ...
>>
>> I was told on the user-mahout mailing list that the lda jar mahout sents
>> to yarn should contain all relevant classes. Any idea why this error is
>> thrown nevertheless?
>>
>> Best,
>> Max
>>
>
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
In your driver, you can call:
job.setJarByClass(theclass.class);
where theclass can be one of the classes in mahout jar.
FYI
On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
> Hello,
>
> i am working on a web application that should execute lda on a external
> yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name", "yarn");
> hdoopConf.set("mapred.framework.name", "yarn");
> hdoopConf.set("mapred.job.tracker",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
> */DocumentTopics/*"), sparseVectorIn,
> seed, testFraction, numTrainThreads,
> numUpdateThreads, maxItersPerDoc,
> numReduceTasks, backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
> ...
>
> I was told on the user-mahout mailing list that the lda jar mahout sents
> to yarn should contain all relevant classes. Any idea why this error is
> thrown nevertheless?
>
> Best,
> Max
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
In your driver, you can call:
job.setJarByClass(theclass.class);
where theclass can be one of the classes in mahout jar.
FYI
On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
> Hello,
>
> i am working on a web application that should execute lda on a external
> yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name", "yarn");
> hdoopConf.set("mapred.framework.name", "yarn");
> hdoopConf.set("mapred.job.tracker",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
> */DocumentTopics/*"), sparseVectorIn,
> seed, testFraction, numTrainThreads,
> numUpdateThreads, maxItersPerDoc,
> numReduceTasks, backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
> ...
>
> I was told on the user-mahout mailing list that the lda jar mahout sents
> to yarn should contain all relevant classes. Any idea why this error is
> thrown nevertheless?
>
> Best,
> Max
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
In your driver, you can call:
job.setJarByClass(theclass.class);
where theclass can be one of the classes in mahout jar.
FYI
On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
> Hello,
>
> i am working on a web application that should execute lda on a external
> yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name", "yarn");
> hdoopConf.set("mapred.framework.name", "yarn");
> hdoopConf.set("mapred.job.tracker",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
> */DocumentTopics/*"), sparseVectorIn,
> seed, testFraction, numTrainThreads,
> numUpdateThreads, maxItersPerDoc,
> numReduceTasks, backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
> ...
>
> I was told on the user-mahout mailing list that the lda jar mahout sents
> to yarn should contain all relevant classes. Any idea why this error is
> thrown nevertheless?
>
> Best,
> Max
>
Re: Using Mahout 1.0-SNAPSHOT with yarn cluster
Posted by Ted Yu <yu...@gmail.com>.
In your driver, you can call:
job.setJarByClass(theclass.class);
where theclass can be one of the classes in mahout jar.
FYI
On Fri, Jan 9, 2015 at 1:30 AM, mw <mw...@plista.com> wrote:
> Hello,
>
> i am working on a web application that should execute lda on a external
> yarn cluster.
>
> I am uploading all the relevant sequence files onto the yarn cluter.
> This is how it try to remotely execute lda on the cluster.
>
> try {
> ugi.doAs(new PrivilegedExceptionAction<Void>() {
> public Void run() throws Exception {
> Configuration hdoopConf = new Configuration();
> hdoopConf.set("fs.defaultFS",
> "hdfs://xxx.xxx.xxx.xxx:9000/user/xx");
> hdoopConf.set("yarn.resourcemanager.hostname",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("mapreduce.framework.name", "yarn");
> hdoopConf.set("mapred.framework.name", "yarn");
> hdoopConf.set("mapred.job.tracker",
> "xxx.xxx.xxx.xxx");
> hdoopConf.set("dfs.permissions.enabled", "false");
> hdoopConf.set("hadoop.job.ugi", "xx");
> hdoopConf.set("mapreduce.jobhistory.address","xxx.xxx.xxx.xxx:10020" );
> CVB0Driver driver = new CVB0Driver();
> try {
> driver.run(hdoopConf,
> sparseVectorIn.suffix("/matrix"),
> topicsOut, k, numTerms,
> doc_topic_smoothening, term_topic_smoothening,
> maxIter, iteration_block_size,
> convergenceDelta,
> sparseVectorIn.suffix("/dictionary.file-0"), topicsOut.suffix("
> */DocumentTopics/*"), sparseVectorIn,
> seed, testFraction, numTrainThreads,
> numUpdateThreads, maxItersPerDoc,
> numReduceTasks, backfillPerplexity);
> } catch (ClassNotFoundException e) {
> e.printStackTrace();
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
> return null;
> }
> });
> } catch (InterruptedException e) {
> e.printStackTrace();
> }
>
> I am getting the following error message:
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector
> at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:344)
> at
> org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1844)
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1809)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1903)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1929)
> at
> org.apache.hadoop.mapred.JobConf.getMapOutputValueClass(JobConf.java:837)
> at
> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.init(MapTask.java:983)
> at
> org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:391)
> at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:80)
> at
> org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:675)
> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:747)
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
> at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
> at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
>
> java.lang.InterruptedException: Failed to complete iteration 1 stage 1
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.runIteration(CVB0Driver.java:502)
> at
> org.apache.mahout.clustering.lda.cvb.CVB0Driver.run(CVB0Driver.java:319)
> ...
>
> I was told on the user-mahout mailing list that the lda jar mahout sents
> to yarn should contain all relevant classes. Any idea why this error is
> thrown nevertheless?
>
> Best,
> Max
>