You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@spark.apache.org by bstempi <br...@gmail.com> on 2015/08/18 17:20:22 UTC

Issues with S3 paths that contain colons

Hi,
I'm running Spark on Amazon EMR (Spark 1.4.1, Hadoop 2.6.0).  I'm seeing the
exception below when encountering file names that contain colons.  Any idea
on how to get around this?

scala> val files = sc.textFile("s3a://redactedbucketname/*")

2015-08-18 04:38:34,567 INFO  [main] storage.MemoryStore
(Logging.scala:logInfo(59)) - ensureFreeSpace(242224) called with
curMem=669367, maxMem=285203496

2015-08-18 04:38:34,568 INFO  [main] storage.MemoryStore
(Logging.scala:logInfo(59)) - Block broadcast_3 stored as values in memory
(estimated size 236.5 KB, free 271.1 MB)

2015-08-18 04:38:34,663 INFO  [main] storage.MemoryStore
(Logging.scala:logInfo(59)) - ensureFreeSpace(21533) called with
curMem=911591, maxMem=285203496

2015-08-18 04:38:34,664 INFO  [main] storage.MemoryStore
(Logging.scala:logInfo(59)) - Block broadcast_3_piece0 stored as bytes in
memory (estimated size 21.0 KB, free 271.1 MB)

2015-08-18 04:38:34,665 INFO  [sparkDriver-akka.actor.default-dispatcher-19]
storage.BlockManagerInfo (Logging.scala:logInfo(59)) - Added
broadcast_3_piece0 in memory on 10.182.184.26:60338 (size: 21.0 KB, free:
271.9 MB)

2015-08-18 04:38:34,667 INFO  [main] spark.SparkContext
(Logging.scala:logInfo(59)) - Created broadcast 3 from textFile at
<console>:21

files: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[7] at textFile at
<console>:21


scala> files.count

2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
(S3AFileSystem.java:listStatus(533)) - List status for path:
s3a://redactedbucketname/

2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
(S3AFileSystem.java:getFileStatus(684)) - Getting path status for
s3a://redactedbucketname/ ()

java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative
path in absolute URI:
[922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv

at org.apache.hadoop.fs.Path.initialize(Path.java:206)

at org.apache.hadoop.fs.Path.<init>(Path.java:172)

at org.apache.hadoop.fs.Path.<init>(Path.java:94)

at org.apache.hadoop.fs.Globber.glob(Globber.java:240)

at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1700)

at
org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:229)

at
org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:200)

at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:279)

at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:207)

at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)

at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)

at scala.Option.getOrElse(Option.scala:120)

at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)

at
org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)

at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)

at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)

at scala.Option.getOrElse(Option.scala:120)

at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)

at org.apache.spark.SparkContext.runJob(SparkContext.scala:1781)

at org.apache.spark.rdd.RDD.count(RDD.scala:1099)

at $iwC$iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:24)

at $iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:29)

at $iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:31)

at $iwC$iwC$iwC$iwC$iwC.<init>(<console>:33)

at $iwC$iwC$iwC$iwC.<init>(<console>:35)

at $iwC$iwC$iwC.<init>(<console>:37)

at $iwC$iwC.<init>(<console>:39)

at $iwC.<init>(<console>:41)

at <init>(<console>:43)

at .<init>(<console>:47)

at .<clinit>(<console>)

at .<init>(<console>:7)

at .<clinit>(<console>)

at $print(<console>)

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:606)

at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)

at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)

at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)

at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)

at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)

at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)

at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)

at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)

at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)

at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)

at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$loop(SparkILoop.scala:670)

at
org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply$mcZ$sp(SparkILoop.scala:997)

at
org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)

at
org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)

at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)

at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$process(SparkILoop.scala:945)

at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)

at org.apache.spark.repl.Main$.main(Main.scala:31)

at org.apache.spark.repl.Main.main(Main.scala)

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:606)

at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:665)

at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)

at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)

at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)

at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Caused by: java.net.URISyntaxException: Relative path in absolute URI:
[922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv

at java.net.URI.checkPath(URI.java:1804)

at java.net.URI.<init>(URI.java:752)

at org.apache.hadoop.fs.Path.initialize(Path.java:203)

... 65 more



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Issues-with-S3-paths-that-contain-colons-tp24320.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org

Re: Issues with S3 paths that contain colons

Posted by Romi Kuntsman <ro...@totango.com>.

I had the exact same issue, and overcame it by overriding
NativeS3FileSystem with my own class, where I replaced the implementation
of globStatus. It's a hack but it works.
Then I set the hadoop config fs.myschema.impl to my class name, and
accessed the files through myschema:// instead of s3n://

@Override
public FileStatus[] globStatus(final Path pathPattern, final PathFilter filter)
    throws IOException {
  final FileStatus[] statusList = super.listStatus(pathPattern);
  final List<FileStatus> result = Lists.newLinkedList();
  for (FileStatus fileStatus : statusList) {
    if (filter.accept(fileStatus.getPath())) {
      result.add(fileStatus);
    }
  }
  return result.toArray(new FileStatus[] {});
}



On Wed, Aug 19, 2015 at 9:14 PM Steve Loughran <st...@hortonworks.com>
wrote:

> you might want to think about filing a JIRA on issues.apache.org against
> HADOOP here, component being fs/s3. That doesn't mean it is fixable, only
> known.
>
> Every FS has its own set of forbidden characters & filenames; unix doesn't
> files named "."; windows doesn't allow files called COM1, ..., so hitting
> some filesystem rule is sometimes a problem. Here, though, you've got the
> file in S3, the listing finds it, but other bits of the codepath are
> failing -which implies that it is something in the Hadoop libs.
>
>
> > On 18 Aug 2015, at 08:20, Brian Stempin <br...@gmail.com> wrote:
> >
> > Hi,
> > I'm running Spark on Amazon EMR (Spark 1.4.1, Hadoop 2.6.0).  I'm seeing
> the
> > exception below when encountering file names that contain colons.  Any
> idea
> > on how to get around this?
> >
> > scala> val files = sc.textFile("s3a://redactedbucketname/*")
> >
> > 2015-08-18 04:38:34,567 INFO  [main] storage.MemoryStore
> > (Logging.scala:logInfo(59)) - ensureFreeSpace(242224) called with
> > curMem=669367, maxMem=285203496
> >
> > 2015-08-18 04:38:34,568 INFO  [main] storage.MemoryStore
> > (Logging.scala:logInfo(59)) - Block broadcast_3 stored as values in
> memory
> > (estimated size 236.5 KB, free 271.1 MB)
> >
> > 2015-08-18 04:38:34,663 INFO  [main] storage.MemoryStore
> > (Logging.scala:logInfo(59)) - ensureFreeSpace(21533) called with
> > curMem=911591, maxMem=285203496
> >
> > 2015-08-18 04:38:34,664 INFO  [main] storage.MemoryStore
> > (Logging.scala:logInfo(59)) - Block broadcast_3_piece0 stored as bytes in
> > memory (estimated size 21.0 KB, free 271.1 MB)
> >
> > 2015-08-18 04:38:34,665 INFO
> [sparkDriver-akka.actor.default-dispatcher-19]
> > storage.BlockManagerInfo (Logging.scala:logInfo(59)) - Added
> > broadcast_3_piece0 in memory on 10.182.184.26:60338 (size: 21.0 KB,
> free:
> > 271.9 MB)
> >
> > 2015-08-18 04:38:34,667 INFO  [main] spark.SparkContext
> > (Logging.scala:logInfo(59)) - Created broadcast 3 from textFile at
> > <console>:21
> >
> > files: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[7] at
> textFile at
> > <console>:21
> >
> >
> > scala> files.count
> >
> > 2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
> > (S3AFileSystem.java:listStatus(533)) - List status for path:
> > s3a://redactedbucketname/
> >
> > 2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
> > (S3AFileSystem.java:getFileStatus(684)) - Getting path status for
> > s3a://redactedbucketname/ ()
> >
> > java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative
> > path in absolute URI:
> >
> [922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv
> >
> > at org.apache.hadoop.fs.Path.initialize(Path.java:206)
> >
> > at org.apache.hadoop.fs.Path.<init>(Path.java:172)
> >
> > at org.apache.hadoop.fs.Path.<init>(Path.java:94)
> >
> > at org.apache.hadoop.fs.Globber.glob(Globber.java:240)
> >
> > at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1700)
> >
> > at
> >
> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:229)
> >
> > at
> >
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:200)
> >
> > at
> >
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:279)
> >
> > at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:207)
> >
> > at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)
> >
> > at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)
> >
> > at scala.Option.getOrElse(Option.scala:120)
> >
> > at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
> >
> > at
> >
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
> >
> > at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)
> >
> > at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)
> >
> > at scala.Option.getOrElse(Option.scala:120)
> >
> > at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
> >
> > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1781)
> >
> > at org.apache.spark.rdd.RDD.count(RDD.scala:1099)
> >
> > at $iwC$iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:24)
> >
> > at $iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:29)
> >
> > at $iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:31)
> >
> > at $iwC$iwC$iwC$iwC$iwC.<init>(<console>:33)
> >
> > at $iwC$iwC$iwC$iwC.<init>(<console>:35)
> >
> > at $iwC$iwC$iwC.<init>(<console>:37)
> >
> > at $iwC$iwC.<init>(<console>:39)
> >
> > at $iwC.<init>(<console>:41)
> >
> > at <init>(<console>:43)
> >
> > at .<init>(<console>:47)
> >
> > at .<clinit>(<console>)
> >
> > at .<init>(<console>:7)
> >
> > at .<clinit>(<console>)
> >
> > at $print(<console>)
> >
> > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >
> > at
> >
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >
> > at
> >
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >
> > at java.lang.reflect.Method.invoke(Method.java:606)
> >
> > at
> >
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
> >
> > at
> >
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
> >
> > at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
> >
> > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
> >
> > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
> >
> > at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
> >
> > at
> >
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
> >
> > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
> >
> > at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
> >
> > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
> >
> > at
> > org.apache.spark.repl.SparkILoop.org
> $apache$spark$repl$SparkILoop$loop(SparkILoop.scala:670)
> >
> > at
> >
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply$mcZ$sp(SparkILoop.scala:997)
> >
> > at
> >
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)
> >
> > at
> >
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)
> >
> > at
> >
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> >
> > at
> > org.apache.spark.repl.SparkILoop.org
> $apache$spark$repl$SparkILoop$process(SparkILoop.scala:945)
> >
> > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
> >
> > at org.apache.spark.repl.Main$.main(Main.scala:31)
> >
> > at org.apache.spark.repl.Main.main(Main.scala)
> >
> > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> >
> > at
> >
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> >
> > at
> >
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> >
> > at java.lang.reflect.Method.invoke(Method.java:606)
> >
> > at
> >
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:665)
> >
> > at
> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)
> >
> > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)
> >
> > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
> >
> > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> >
> > Caused by: java.net.URISyntaxException: Relative path in absolute URI:
> >
> [922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv
> >
> > at java.net.URI.checkPath(URI.java:1804)
> >
> > at java.net.URI.<init>(URI.java:752)
> >
> > at org.apache.hadoop.fs.Path.initialize(Path.java:203)
> >
> > ... 65 more
> >
> >
> >
> > --
> > View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/Issues-with-S3-paths-that-contain-colons-tp24320.html
> > Sent from the Apache Spark User List mailing list archive at Nabble.com.
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
> > For additional commands, e-mail: user-help@spark.apache.org
> >
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
> For additional commands, e-mail: user-help@spark.apache.org
>
>

Re: Issues with S3 paths that contain colons

Posted by Steve Loughran <st...@hortonworks.com>.

you might want to think about filing a JIRA on issues.apache.org against HADOOP here, component being fs/s3. That doesn't mean it is fixable, only known.

Every FS has its own set of forbidden characters & filenames; unix doesn't files named "."; windows doesn't allow files called COM1, ..., so hitting some filesystem rule is sometimes a problem. Here, though, you've got the file in S3, the listing finds it, but other bits of the codepath are failing -which implies that it is something in the Hadoop libs. 


> On 18 Aug 2015, at 08:20, Brian Stempin <br...@gmail.com> wrote:
> 
> Hi,
> I'm running Spark on Amazon EMR (Spark 1.4.1, Hadoop 2.6.0).  I'm seeing the
> exception below when encountering file names that contain colons.  Any idea
> on how to get around this?
> 
> scala> val files = sc.textFile("s3a://redactedbucketname/*")
> 
> 2015-08-18 04:38:34,567 INFO  [main] storage.MemoryStore
> (Logging.scala:logInfo(59)) - ensureFreeSpace(242224) called with
> curMem=669367, maxMem=285203496
> 
> 2015-08-18 04:38:34,568 INFO  [main] storage.MemoryStore
> (Logging.scala:logInfo(59)) - Block broadcast_3 stored as values in memory
> (estimated size 236.5 KB, free 271.1 MB)
> 
> 2015-08-18 04:38:34,663 INFO  [main] storage.MemoryStore
> (Logging.scala:logInfo(59)) - ensureFreeSpace(21533) called with
> curMem=911591, maxMem=285203496
> 
> 2015-08-18 04:38:34,664 INFO  [main] storage.MemoryStore
> (Logging.scala:logInfo(59)) - Block broadcast_3_piece0 stored as bytes in
> memory (estimated size 21.0 KB, free 271.1 MB)
> 
> 2015-08-18 04:38:34,665 INFO  [sparkDriver-akka.actor.default-dispatcher-19]
> storage.BlockManagerInfo (Logging.scala:logInfo(59)) - Added
> broadcast_3_piece0 in memory on 10.182.184.26:60338 (size: 21.0 KB, free:
> 271.9 MB)
> 
> 2015-08-18 04:38:34,667 INFO  [main] spark.SparkContext
> (Logging.scala:logInfo(59)) - Created broadcast 3 from textFile at
> <console>:21
> 
> files: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[7] at textFile at
> <console>:21
> 
> 
> scala> files.count
> 
> 2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
> (S3AFileSystem.java:listStatus(533)) - List status for path:
> s3a://redactedbucketname/
> 
> 2015-08-18 04:38:37,262 INFO  [main] s3a.S3AFileSystem
> (S3AFileSystem.java:getFileStatus(684)) - Getting path status for
> s3a://redactedbucketname/ ()
> 
> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative
> path in absolute URI:
> [922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv
> 
> at org.apache.hadoop.fs.Path.initialize(Path.java:206)
> 
> at org.apache.hadoop.fs.Path.<init>(Path.java:172)
> 
> at org.apache.hadoop.fs.Path.<init>(Path.java:94)
> 
> at org.apache.hadoop.fs.Globber.glob(Globber.java:240)
> 
> at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1700)
> 
> at
> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:229)
> 
> at
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:200)
> 
> at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:279)
> 
> at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:207)
> 
> at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)
> 
> at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)
> 
> at scala.Option.getOrElse(Option.scala:120)
> 
> at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
> 
> at
> org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32)
> 
> at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:219)
> 
> at org.apache.spark.rdd.RDD$anonfun$partitions$2.apply(RDD.scala:217)
> 
> at scala.Option.getOrElse(Option.scala:120)
> 
> at org.apache.spark.rdd.RDD.partitions(RDD.scala:217)
> 
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1781)
> 
> at org.apache.spark.rdd.RDD.count(RDD.scala:1099)
> 
> at $iwC$iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:24)
> 
> at $iwC$iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:29)
> 
> at $iwC$iwC$iwC$iwC$iwC$iwC.<init>(<console>:31)
> 
> at $iwC$iwC$iwC$iwC$iwC.<init>(<console>:33)
> 
> at $iwC$iwC$iwC$iwC.<init>(<console>:35)
> 
> at $iwC$iwC$iwC.<init>(<console>:37)
> 
> at $iwC$iwC.<init>(<console>:39)
> 
> at $iwC.<init>(<console>:41)
> 
> at <init>(<console>:43)
> 
> at .<init>(<console>:47)
> 
> at .<clinit>(<console>)
> 
> at .<init>(<console>:7)
> 
> at .<clinit>(<console>)
> 
> at $print(<console>)
> 
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> 
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 
> at java.lang.reflect.Method.invoke(Method.java:606)
> 
> at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
> 
> at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
> 
> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
> 
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
> 
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
> 
> at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
> 
> at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
> 
> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
> 
> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
> 
> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
> 
> at
> org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$loop(SparkILoop.scala:670)
> 
> at
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply$mcZ$sp(SparkILoop.scala:997)
> 
> at
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)
> 
> at
> org.apache.spark.repl.SparkILoop$anonfun$org$apache$spark$repl$SparkILoop$process$1.apply(SparkILoop.scala:945)
> 
> at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> 
> at
> org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$process(SparkILoop.scala:945)
> 
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
> 
> at org.apache.spark.repl.Main$.main(Main.scala:31)
> 
> at org.apache.spark.repl.Main.main(Main.scala)
> 
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> 
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 
> at java.lang.reflect.Method.invoke(Method.java:606)
> 
> at
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:665)
> 
> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)
> 
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)
> 
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
> 
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> 
> Caused by: java.net.URISyntaxException: Relative path in absolute URI:
> [922-212-4438]-[119]-[1]-[2015-08-13T15:43:12.346193%5D-%5B2015-01-01T00:00:00%5D-redacted.csv
> 
> at java.net.URI.checkPath(URI.java:1804)
> 
> at java.net.URI.<init>(URI.java:752)
> 
> at org.apache.hadoop.fs.Path.initialize(Path.java:203)
> 
> ... 65 more
> 
> 
> 
> --
> View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Issues-with-S3-paths-that-contain-colons-tp24320.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
> For additional commands, e-mail: user-help@spark.apache.org
> 
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org