You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "satish (Jira)" <ji...@apache.org> on 2020/11/09 20:23:00 UTC

[jira] [Updated] (HUDI-1382) Bloated wire format for file slices causing OOM in Timeline Server

     [ https://issues.apache.org/jira/browse/HUDI-1382?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

satish updated HUDI-1382:
-------------------------
    Description: 
FileSystem call to list all files in a partition is failing with OOM at scale. BaseFileDTO contains lot of unnecessary information. Can we change the on-wire format to transfer only essential information such as file path.

20/11/04 22:04:37 ERROR javalin.Javalin: Exception occurred while servicing http-request
java.lang.OutOfMemoryError: Java heap space
	at java.util.Arrays.copyOfRange(Arrays.java:3664)
	at java.lang.String.<init>(String.java:207)
	at java.lang.StringBuilder.toString(StringBuilder.java:407)
	at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:356)
	at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
	at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2933)
	at org.apache.hudi.timeline.service.FileSystemViewHandler.writeValueAsString(FileSystemViewHandler.java:135)
	at org.apache.hudi.timeline.service.FileSystemViewHandler.lambda$registerFileSlicesAPI$17(FileSystemViewHandler.java:284)
	at org.apache.hudi.timeline.service.FileSystemViewHandler$$Lambda$162/1136467149.handle(Unknown Source)
	at org.apache.hudi.timeline.service.FileSystemViewHandler$ViewHandler.handle(FileSystemViewHandler.java:329)
	at io.javalin.security.SecurityUtil.noopAccessManager(SecurityUtil.kt:22)
	at io.javalin.Javalin$$Lambda$129/2119409741.manage(Unknown Source)
	at io.javalin.Javalin.lambda$addHandler$0(Javalin.java:606)
	at io.javalin.Javalin$$Lambda$134/1556401990.handle(Unknown Source)
	at io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:46)
	at io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:17)
	at io.javalin.core.JavalinServlet$service$1.invoke(JavalinServlet.kt:143)
	at io.javalin.core.JavalinServlet$service$2.invoke(JavalinServlet.kt:41)
	at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:107)
	at io.javalin.core.util.JettyServerUtil$initialize$httpHandler$1.doHandle(JettyServerUtil.kt:72)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1668)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
	at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:61)
	at org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:174)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
	at org.eclipse.jetty.server.Server.handle(Server.java:502)
	at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:370)
	at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:267)
20/11/04 22:05:05 WARN hdfs.DataStreamer: Exception for BP-1936994480-10.13.11.3-1486766945414:blk_27596768430_29703227810
java.io.EOFException: Unexpected EOF while trying to read response from server
	at org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
	at org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
	at org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)

> Bloated wire format for file slices causing OOM in Timeline Server
> ------------------------------------------------------------------
>
>                 Key: HUDI-1382
>                 URL: https://issues.apache.org/jira/browse/HUDI-1382
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: Common Core
>            Reporter: Nishith Agarwal
>            Priority: Major
>
> FileSystem call to list all files in a partition is failing with OOM at scale. BaseFileDTO contains lot of unnecessary information. Can we change the on-wire format to transfer only essential information such as file path.
> 20/11/04 22:04:37 ERROR javalin.Javalin: Exception occurred while servicing http-request
> java.lang.OutOfMemoryError: Java heap space
> 	at java.util.Arrays.copyOfRange(Arrays.java:3664)
> 	at java.lang.String.<init>(String.java:207)
> 	at java.lang.StringBuilder.toString(StringBuilder.java:407)
> 	at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:356)
> 	at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
> 	at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2933)
> 	at org.apache.hudi.timeline.service.FileSystemViewHandler.writeValueAsString(FileSystemViewHandler.java:135)
> 	at org.apache.hudi.timeline.service.FileSystemViewHandler.lambda$registerFileSlicesAPI$17(FileSystemViewHandler.java:284)
> 	at org.apache.hudi.timeline.service.FileSystemViewHandler$$Lambda$162/1136467149.handle(Unknown Source)
> 	at org.apache.hudi.timeline.service.FileSystemViewHandler$ViewHandler.handle(FileSystemViewHandler.java:329)
> 	at io.javalin.security.SecurityUtil.noopAccessManager(SecurityUtil.kt:22)
> 	at io.javalin.Javalin$$Lambda$129/2119409741.manage(Unknown Source)
> 	at io.javalin.Javalin.lambda$addHandler$0(Javalin.java:606)
> 	at io.javalin.Javalin$$Lambda$134/1556401990.handle(Unknown Source)
> 	at io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:46)
> 	at io.javalin.core.JavalinServlet$service$2$1.invoke(JavalinServlet.kt:17)
> 	at io.javalin.core.JavalinServlet$service$1.invoke(JavalinServlet.kt:143)
> 	at io.javalin.core.JavalinServlet$service$2.invoke(JavalinServlet.kt:41)
> 	at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:107)
> 	at io.javalin.core.util.JettyServerUtil$initialize$httpHandler$1.doHandle(JettyServerUtil.kt:72)
> 	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
> 	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
> 	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1668)
> 	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
> 	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
> 	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
> 	at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:61)
> 	at org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:174)
> 	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
> 	at org.eclipse.jetty.server.Server.handle(Server.java:502)
> 	at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:370)
> 	at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:267)
> 20/11/04 22:05:05 WARN hdfs.DataStreamer: Exception for BP-1936994480-10.13.11.3-1486766945414:blk_27596768430_29703227810
> java.io.EOFException: Unexpected EOF while trying to read response from server
> 	at org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
> 	at org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
> 	at org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)