You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@flink.apache.org by Sourigna Phetsarath <gn...@teamaol.com> on 2016/03/17 22:05:39 UTC

S3 Timeouts with lots of Files Using Flink 0.10.2

All:

I'm trying to read lots of files from S3 and I am getting timeouts from S3:

java.io.IOException: Error opening the Input Split <FILE> [0,558574890]:
Input opening request timed out. Opener was  alive. Stack of split open
thread:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
at
org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
at
org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
at
org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
at
org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
at
org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
at
org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
at
org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
at
org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
at
com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
at
com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
at
com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
at
com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
at
com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)
at
com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy23.retrievePair(Unknown Source) at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
at
org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
at
org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
at
org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
at
org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:680)
at
com.aol.ds.arc.ml.flink.AvroGenericRecordInputFormat.open(AvroGenericRecordInputFormat.java:56)
at
org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:57)
at
org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:146)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:584)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: Input opening request timed out. Opener was
 alive. Stack of split open thread:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
at
org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
at
org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
at
org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
at
org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
at
org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
at
org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
at
org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
at
org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at
org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
at
com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
at
com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
at
com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
at
com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
at
com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)
at
com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

at java.lang.reflect.Method.invoke(Method.java:606)

at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy23.retrievePair(Unknown Source)
at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)
at
org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
at
org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
at
org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)

at
org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)

at
org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.waitForCompletion(FileInputFormat.java:890)

at
org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:676)


Is there  a Flink Configuration to address this?

I've also tried setting:

avroInputFormat.setUnsplittable(true)

Are there any S3 properties that we should set?

Thanks in advance.
-- 


*Gna Phetsarath*System Architect // AOL Platforms // Data Services //
Applied Research Chapter
770 Broadway, 5th Floor, New York, NY 10003
o: 212.402.4871 // m: 917.373.7363
vvmr: 8890237 aim: sphetsarath20 t: @sourigna

* <http://www.aolplatforms.com>*

Re: S3 Timeouts with lots of Files Using Flink 0.10.2

Posted by Robert Metzger <rm...@apache.org>.
The default timeout for opening a split is 5 minutes. You can set a higher
value with "taskmanager.runtime.fs_timeout" (milliseconds), but I believe
that 5 minutes is already way too long.
It would be interesting to find out the root cause of this.

On Thu, Mar 17, 2016 at 11:00 PM, Sourigna Phetsarath <
gna.phetsarath@teamaol.com> wrote:

> Thanks for the info, will give it a try.
>
> BTW - We're using Hadoop 2.7 on AMR EMR 4.4.0.
>
> On Thu, Mar 17, 2016 at 5:55 PM, Ken Krugler <kk...@transpac.com>
> wrote:
>
>> With Hadoop 2.6 or later, you can use the s3a:// protocol (vs. s3n://),
>> which should be more reliable (though some bug fixes aren't available until
>> 2.7, see https://issues.apache.org/jira/browse/HADOOP-11571)
>>
>> And you can also then set these properties to control timeouts:
>>
>> <property>
>>   <name>fs.s3a.connection.establish.timeout</name>
>>   <value>5000</value>
>>   <description>Socket connection setup timeout in milliseconds.</description>
>> </property>
>>
>> <property>
>>   <name>fs.s3a.connection.timeout</name>
>>   <value>50000</value>
>>   <description>Socket connection timeout in milliseconds.</description>
>> </property>
>>
>>
>> -- Ken
>>
>>
>> ------------------------------
>>
>> *From:* Sourigna Phetsarath
>>
>> *Sent:* March 17, 2016 2:05:39pm PDT
>>
>> *To:* user@flink.apache.org
>>
>> *Subject:* S3 Timeouts with lots of Files Using Flink 0.10.2
>>
>> All:
>>
>> I'm trying to read lots of files from S3 and I am getting timeouts from
>> S3:
>>
>> java.io.IOException: Error opening the Input Split <FILE> [0,558574890]:
>> Input opening request timed out. Opener was  alive. Stack of split open
>> thread:
>> at sun.misc.Unsafe.park(Native Method)
>> at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
>> at
>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
>> at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
>> at
>> org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
>> at
>> org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
>> at
>> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
>> at
>> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
>> at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
>> at
>> org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
>> at
>> org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
>> at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:606)
>> at
>> com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
>> at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
>> at
>> org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
>> at
>> org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
>> at
>> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
>> at
>> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
>> at
>> com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
>> at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
>> at
>> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
>> at
>> com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
>> at
>> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357) at
>> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
>> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:606)
>> at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>> at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>> at com.sun.proxy.$Proxy23.retrievePair(Unknown Source) at
>> com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)
>> at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
>> at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
>> at
>> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
>> at
>> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
>> at
>> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
>> at
>> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:680)
>> at
>> com.aol.ds.arc.ml.flink.AvroGenericRecordInputFormat.open(AvroGenericRecordInputFormat.java:56)
>> at
>> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:57)
>> at
>> org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:146)
>> at org.apache.flink.runtime.taskmanager.Task.run(Task.java:584)
>> at java.lang.Thread.run(Thread.java:745)
>> Caused by: java.io.IOException: Input opening request timed out. Opener
>> was  alive. Stack of split open thread:
>> at sun.misc.Unsafe.park(Native Method)
>> at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
>> at
>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
>> at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
>> at
>> org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
>> at
>> org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
>> at
>> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
>> at
>> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
>> at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
>> at
>> org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
>> at
>> org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
>> at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:606)
>> at
>> com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
>> at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
>> at
>> org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
>> at
>> org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
>> at
>> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
>> at
>> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
>> at
>> com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
>> at
>> com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
>> at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
>> at
>> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
>> at
>> com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
>> at
>> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)
>> at
>> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
>> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>
>> at java.lang.reflect.Method.invoke(Method.java:606)
>>
>> at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>> at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>> at com.sun.proxy.$Proxy23.retrievePair(Unknown Source)
>> at
>> com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157) at
>> org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
>> at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
>> at
>> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
>> at
>> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
>>
>> at
>> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
>>
>> at
>> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.waitForCompletion(FileInputFormat.java:890)
>>
>> at
>> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:676)
>>
>>
>>
>>
>> Is there  a Flink Configuration to address this?
>>
>> I've also tried setting:
>>
>> avroInputFormat.setUnsplittable(true)
>>
>> Are there any S3 properties that we should set?
>>
>> Thanks in advance.
>> --
>>
>>
>> *Gna Phetsarath*System Architect // AOL Platforms // Data Services //
>> Applied Research Chapter
>> 770 Broadway, 5th Floor, New York, NY 10003
>> o: 212.402.4871 // m: 917.373.7363
>> vvmr: 8890237 aim: sphetsarath20 t: @sourigna
>>
>> * <http://www.aolplatforms.com/>*
>>
>>
>> --------------------------
>> Ken Krugler
>> +1 530-210-6378
>> http://www.scaleunlimited.com
>> custom big data solutions & training
>> Hadoop, Cascading, Cassandra & Solr
>>
>>
>>
>>
>>
>> --------------------------
>> Ken Krugler
>> +1 530-210-6378
>> http://www.scaleunlimited.com
>> custom big data solutions & training
>> Hadoop, Cascading, Cassandra & Solr
>>
>>
>>
>>
>>
>>
>
>
> --
>
>
> *Gna Phetsarath*System Architect // AOL Platforms // Data Services //
> Applied Research Chapter
> 770 Broadway, 5th Floor, New York, NY 10003
> o: 212.402.4871 // m: 917.373.7363
> vvmr: 8890237 aim: sphetsarath20 t: @sourigna
>
> * <http://www.aolplatforms.com>*
>

Re: S3 Timeouts with lots of Files Using Flink 0.10.2

Posted by Sourigna Phetsarath <gn...@teamaol.com>.
Thanks for the info, will give it a try.

BTW - We're using Hadoop 2.7 on AMR EMR 4.4.0.

On Thu, Mar 17, 2016 at 5:55 PM, Ken Krugler <kk...@transpac.com>
wrote:

> With Hadoop 2.6 or later, you can use the s3a:// protocol (vs. s3n://),
> which should be more reliable (though some bug fixes aren't available until
> 2.7, see https://issues.apache.org/jira/browse/HADOOP-11571)
>
> And you can also then set these properties to control timeouts:
>
> <property>
>   <name>fs.s3a.connection.establish.timeout</name>
>   <value>5000</value>
>   <description>Socket connection setup timeout in milliseconds.</description>
> </property>
>
> <property>
>   <name>fs.s3a.connection.timeout</name>
>   <value>50000</value>
>   <description>Socket connection timeout in milliseconds.</description>
> </property>
>
>
> -- Ken
>
>
> ------------------------------
>
> *From:* Sourigna Phetsarath
>
> *Sent:* March 17, 2016 2:05:39pm PDT
>
> *To:* user@flink.apache.org
>
> *Subject:* S3 Timeouts with lots of Files Using Flink 0.10.2
>
> All:
>
> I'm trying to read lots of files from S3 and I am getting timeouts from S3:
>
> java.io.IOException: Error opening the Input Split <FILE> [0,558574890]:
> Input opening request timed out. Opener was  alive. Stack of split open
> thread:
> at sun.misc.Unsafe.park(Native Method)
> at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
> at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
> at
> org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
> at
> org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
> at
> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
> at
> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
> at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
> at
> org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
> at
> org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
> at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at
> com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
> at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
> at
> org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
> at
> org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
> at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
> at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
> at
> com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
> at
> com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
> at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
> at
> com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
> at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
> at
> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
> at
> com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
> at
> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357) at
> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
> at com.sun.proxy.$Proxy23.retrievePair(Unknown Source) at
> com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)
> at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
> at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
> at
> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
> at
> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:680)
> at
> com.aol.ds.arc.ml.flink.AvroGenericRecordInputFormat.open(AvroGenericRecordInputFormat.java:56)
> at
> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:57)
> at
> org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:146)
> at org.apache.flink.runtime.taskmanager.Task.run(Task.java:584)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: Input opening request timed out. Opener
> was  alive. Stack of split open thread:
> at sun.misc.Unsafe.park(Native Method)
> at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
> at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
> at
> org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
> at
> org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
> at
> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
> at
> org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
> at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
> at
> org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
> at
> org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
> at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at
> com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
> at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
> at
> org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
> at
> org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
> at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
> at
> org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
> at
> com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
> at
> com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
> at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
> at
> com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
> at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
> at
> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
> at
> com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
> at
> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)
> at
> com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>
> at java.lang.reflect.Method.invoke(Method.java:606)
>
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
> at com.sun.proxy.$Proxy23.retrievePair(Unknown Source)
> at
> com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157) at
> org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
> at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
>
> at
> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
>
> at
> org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.waitForCompletion(FileInputFormat.java:890)
>
> at
> org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:676)
>
>
>
>
> Is there  a Flink Configuration to address this?
>
> I've also tried setting:
>
> avroInputFormat.setUnsplittable(true)
>
> Are there any S3 properties that we should set?
>
> Thanks in advance.
> --
>
>
> *Gna Phetsarath*System Architect // AOL Platforms // Data Services //
> Applied Research Chapter
> 770 Broadway, 5th Floor, New York, NY 10003
> o: 212.402.4871 // m: 917.373.7363
> vvmr: 8890237 aim: sphetsarath20 t: @sourigna
>
> * <http://www.aolplatforms.com/>*
>
>
> --------------------------
> Ken Krugler
> +1 530-210-6378
> http://www.scaleunlimited.com
> custom big data solutions & training
> Hadoop, Cascading, Cassandra & Solr
>
>
>
>
>
> --------------------------
> Ken Krugler
> +1 530-210-6378
> http://www.scaleunlimited.com
> custom big data solutions & training
> Hadoop, Cascading, Cassandra & Solr
>
>
>
>
>
>


-- 


*Gna Phetsarath*System Architect // AOL Platforms // Data Services //
Applied Research Chapter
770 Broadway, 5th Floor, New York, NY 10003
o: 212.402.4871 // m: 917.373.7363
vvmr: 8890237 aim: sphetsarath20 t: @sourigna

* <http://www.aolplatforms.com>*

RE: S3 Timeouts with lots of Files Using Flink 0.10.2

Posted by Ken Krugler <kk...@transpac.com>.
With Hadoop 2.6 or later, you can use the s3a:// protocol (vs. s3n://), which should be more reliable (though some bug fixes aren't available until 2.7, see https://issues.apache.org/jira/browse/HADOOP-11571)

And you can also then set these properties to control timeouts:

> <property>
>   <name>fs.s3a.connection.establish.timeout</name>
>   <value>5000</value>
>   <description>Socket connection setup timeout in milliseconds.</description>
> </property>
> 
> <property>
>   <name>fs.s3a.connection.timeout</name>
>   <value>50000</value>
>   <description>Socket connection timeout in milliseconds.</description>
> </property>

-- Ken


> From: Sourigna Phetsarath
> Sent: March 17, 2016 2:05:39pm PDT
> To: user@flink.apache.org
> Subject: S3 Timeouts with lots of Files Using Flink 0.10.2
> 
> All:
> 
> I'm trying to read lots of files from S3 and I am getting timeouts from S3:
> 
> java.io.IOException: Error opening the Input Split <FILE> [0,558574890]: Input opening request timed out. Opener was  alive. Stack of split open thread:
> 	at sun.misc.Unsafe.park(Native Method)
> 	at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
> 	at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
> 	at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
> 	at org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
> 	at org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
> 	at org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
> 	at org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
> 	at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
> 	at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
> 	at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
> 	at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
> 	at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
> 	at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
> 	at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
> 	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
> 	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
> 	at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
> 	at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
> 	at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
> 	at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
> 	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
> 	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
> 	at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
> 	at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)	at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
> 	at com.sun.proxy.$Proxy23.retrievePair(Unknown Source)	at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)
> 	at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
> 	at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
> 	at org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
> 	at org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
> 	at org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
> 	at org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:680)
> 	at com.aol.ds.arc.ml.flink.AvroGenericRecordInputFormat.open(AvroGenericRecordInputFormat.java:56)
> 	at org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:57)
> 	at org.apache.flink.runtime.operators.DataSourceTask.invoke(DataSourceTask.java:146)
> 	at org.apache.flink.runtime.taskmanager.Task.run(Task.java:584)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: Input opening request timed out. Opener was  alive. Stack of split open thread:
> 	at sun.misc.Unsafe.park(Native Method)
> 	at java.util.concurrent.locks.LockSupport.parkUntil(LockSupport.java:267)
> 	at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUntil(AbstractQueuedSynchronizer.java:2130)
> 	at org.apache.http.pool.PoolEntryFuture.await(PoolEntryFuture.java:131)
> 	at org.apache.http.pool.AbstractConnPool.getPoolEntryBlocking(AbstractConnPool.java:282)
> 	at org.apache.http.pool.AbstractConnPool.access$000(AbstractConnPool.java:64)
> 	at org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:177)
> 	at org.apache.http.pool.AbstractConnPool$2.getPoolEntry(AbstractConnPool.java:170)
> 	at org.apache.http.pool.PoolEntryFuture.get(PoolEntryFuture.java:102)
> 	at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:208)
> 	at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
> 	at sun.reflect.GeneratedMethodAccessor49.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70)
> 	at com.amazonaws.http.conn.$Proxy24.getConnection(Unknown Source)
> 	at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
> 	at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:863)
> 	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
> 	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
> 	at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:822)
> at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:576)
> at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:362)
> at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:328)
> at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:307)
> at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
> at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1162)
> at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:357)
> at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:343)
> at sun.reflect.GeneratedMethodAccessor53.invoke(Unknown Source)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 
> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
> 	at com.sun.proxy.$Proxy23.retrievePair(Unknown Source)
> 	at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1157)	at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767)
> 	at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:172)
> 	at org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:387)
> 	at org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.open(HadoopFileSystem.java:48)
> 
> 	at org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.run(FileInputFormat.java:841)
> 
> 	at org.apache.flink.api.common.io.FileInputFormat$InputSplitOpenThread.waitForCompletion(FileInputFormat.java:890)
> 
> at org.apache.flink.api.common.io.FileInputFormat.open(FileInputFormat.java:676)
> 
> 
> 
> 
> Is there  a Flink Configuration to address this? 
> 
> I've also tried setting:
> avroInputFormat.setUnsplittable(true)
> 
> Are there any S3 properties that we should set?
> 
> Thanks in advance. 
> -- 
> Gna Phetsarath
> System Architect // AOL Platforms // Data Services // Applied Research Chapter
> 770 Broadway, 5th Floor, New York, NY 10003
> o: 212.402.4871 // m: 917.373.7363
> vvmr: 8890237 aim: sphetsarath20 t: @sourigna
> 
> 
> 

--------------------------
Ken Krugler
+1 530-210-6378
http://www.scaleunlimited.com
custom big data solutions & training
Hadoop, Cascading, Cassandra & Solr





--------------------------
Ken Krugler
+1 530-210-6378
http://www.scaleunlimited.com
custom big data solutions & training
Hadoop, Cascading, Cassandra & Solr