You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@mahout.apache.org by "Martin Provencher (JIRA)" <ji...@apache.org> on 2011/03/31 20:20:05 UTC

[jira] [Created] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
--------------------------------------------------------------

                 Key: MAHOUT-646
                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
             Project: Mahout
          Issue Type: Bug
          Components: Classification
    Affects Versions: 0.5
            Reporter: Martin Provencher
            Priority: Minor


When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :

org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
    at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
    at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)

    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)

    at org.apache.hadoop.ipc.Client.call(Client.java:740)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
    at $Proxy1.create(Unknown Source)

    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
    at $Proxy1.create(Unknown Source)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
    at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
    at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
    at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
    at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
    at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
    at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
    at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
    at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
    at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
    at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
    at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
    at org.apache.hadoop.mapred.Child.main(Child.java:170)

org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
    at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
    at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)

    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)

    at org.apache.hadoop.ipc.Client.call(Client.java:740)
    at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
    at $Proxy1.create(Unknown Source)

    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
    at $Proxy1.create(Unknown Source)
    at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
    at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
    at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
    at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
    at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
    at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
    at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
    at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
    at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
    at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
    at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
    at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
    at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
    at org.apache.hadoop.mapred.Child.main(Child.java:170)

    4 more :
   org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Sean Owen (JIRA)" <ji...@apache.org>.

     [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Sean Owen updated MAHOUT-646:
-----------------------------

       Resolution: Fixed
    Fix Version/s: 0.5
         Assignee: Sean Owen
           Status: Resolved  (was: Patch Available)

OK done.

> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Assignee: Sean Owen
>            Priority: Minor
>             Fix For: 0.5
>
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Sean Owen (JIRA)" <ji...@apache.org>.

    [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13014144#comment-13014144 ] 

Sean Owen commented on MAHOUT-646:
----------------------------------

OK. Commenting out the line means the output format is not being used at all. Is that valid? (I don't know.)
But in any event there seems to be an actual problem, here:

{{...
at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
...}}

It's this attempt to create a file here that's resulting in the "already being created" exception. Anyone have any ideas here?

> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Updated] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Martin Provencher (JIRA)" <ji...@apache.org>.

     [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Martin Provencher updated MAHOUT-646:
-------------------------------------

    Status: Patch Available  (was: Open)

To fix it, I've applied this patch :

===================================================================
--- examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java	(revision 1087334)
+++ examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java	(working copy)
@@ -185,7 +185,7 @@
     //TODO: job.setNumMapTasks(100);
     job.setInputFormatClass(XmlInputFormat.class);
     job.setReducerClass(WikipediaDatasetCreatorReducer.class);
-    job.setOutputFormatClass(WikipediaDatasetCreatorOutputFormat.class);
+    //job.setOutputFormatClass(WikipediaDatasetCreatorOutputFormat.class);
     
     FileInputFormat.setInputPaths(job, new Path(input));
     Path outPath = new Path(output);

> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Hudson (JIRA)" <ji...@apache.org>.

    [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13015233#comment-13015233 ] 

Hudson commented on MAHOUT-646:
-------------------------------

Integrated in Mahout-Quality #715 (See [https://hudson.apache.org/hudson/job/Mahout-Quality/715/])
    MAHOUT-646 Just output one text file for Wikipedia example to avoid some bug in MultipleOutputFormat subclass


> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Assignee: Sean Owen
>            Priority: Minor
>             Fix For: 0.5
>
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Sean Owen (JIRA)" <ji...@apache.org>.

    [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13014526#comment-13014526 ] 

Sean Owen commented on MAHOUT-646:
----------------------------------

Could be... but somehow I doubt it. I note that in MAHOUT-614 we fixed up an apparent problem with this class. The change might have uncovered a different issue (or, er, actually messed it up in a different way).

One way forward is to fix it. Another way is to delete it. I say that's an option since WikipediaDatasetCreatorOutputFormat is the one and only class which depends on two classes copied and modified from Hadoop. Going forward we'd want to go back to the main-line version somehow.

And if removing use of this custom output format doesn't "hurt", as the issue implies, well, why not just remove it?

But the question is... is it really just as well to let this dump to a sequence file or does that defeat the purpose? Robin?

> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Mat Kelcey (JIRA)" <ji...@apache.org>.

    [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13014179#comment-13014179 ] 

Mat Kelcey commented on MAHOUT-646:
-----------------------------------

Could this be anything to do with running on EMR?


> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

[jira] [Commented] (MAHOUT-646) Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)

Posted by "Robin Anil (JIRA)" <ji...@apache.org>.

    [ https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13015071#comment-13015071 ] 

Robin Anil commented on MAHOUT-646:
-----------------------------------

Unfortunately Bayes classifier reads from a text input format. And I wanted to split the output of different categories into multiple files for the wikipedia example, which btw is not necessary for Bayes, it reads of all the files anyways. Dropping it wouldnt create any problems. Just have to update all tutorials and references which mention that fact.

> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
>                 Key: MAHOUT-646
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-646
>             Project: Mahout
>          Issue Type: Bug
>          Components: Classification
>    Affects Versions: 0.5
>            Reporter: Martin Provencher
>            Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157 because current leaseholder is trying to recreate file.
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
>     at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
>     at org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
>     at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
>     at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
>     at java.security.AccessController.doPrivileged(Native Method)
>     at javax.security.auth.Subject.doAs(Subject.java:396)
>     at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
>     at org.apache.hadoop.ipc.Client.call(Client.java:740)
>     at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
>     at $Proxy1.create(Unknown Source)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>     at java.lang.reflect.Method.invoke(Method.java:597)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
>     at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
>     at $Proxy1.create(Unknown Source)
>     at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
>     at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
>     at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
>     at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
>     at org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
>     at org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
>     at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
>     at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
>     at org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
>     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
>     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
>     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
>     at org.apache.hadoop.mapred.Child.main(Child.java:170)
>     4 more :
>    org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create file /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000 on client 10.240.10.157 either because the filename is invalid or the file exists

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira