You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Rahul Challapalli (JIRA)" <ji...@apache.org> on 2016/12/14 23:36:58 UTC

[jira] [Created] (DRILL-5131) Parquet Writer fails with heap space not available error on TPCDS 1TB data set

Rahul Challapalli created DRILL-5131:
----------------------------------------

             Summary: Parquet Writer fails with heap space not available error on TPCDS 1TB data set
                 Key: DRILL-5131
                 URL: https://issues.apache.org/jira/browse/DRILL-5131
             Project: Apache Drill
          Issue Type: Bug
          Components: Storage - Parquet
    Affects Versions: 1.9.0
            Reporter: Rahul Challapalli


git.commit.id.abbrev=cf2b7c7

The below query fails with "Out of Heap Space" error and brings down the drillbit

{code}
create table store_sales as select
case when (columns[0]='') then cast(null as integer) else cast(columns[0] as integer) end as ss_sold_date_sk,
case when (columns[1]='') then cast(null as integer) else cast(columns[1] as integer) end as ss_sold_time_sk,
case when (columns[2]='') then cast(null as integer) else cast(columns[2] as integer) end as ss_item_sk,
case when (columns[3]='') then cast(null as integer) else cast(columns[3] as integer) end as ss_customer_sk,
case when (columns[4]='') then cast(null as integer) else cast(columns[4] as integer) end as ss_cdemo_sk,
case when (columns[5]='') then cast(null as integer) else cast(columns[5] as integer) end as ss_hdemo_sk,
case when (columns[6]='') then cast(null as integer) else cast(columns[6] as integer) end as ss_addr_sk,
case when (columns[7]='') then cast(null as integer) else cast(columns[7] as integer) end as ss_store_sk,
case when (columns[8]='') then cast(null as integer) else cast(columns[8] as integer) end as ss_promo_sk,
case when (columns[9]='') then cast(null as integer) else cast(columns[9] as integer) end as ss_ticket_number,
case when (columns[10]='') then cast(null as integer) else cast(columns[10] as integer) end as ss_quantity,
case when (columns[11]='') then cast(null as decimal(7,2)) else cast(columns[11] as decimal(7,2)) end as ss_wholesale_cost,
case when (columns[12]='') then cast(null as decimal(7,2)) else cast(columns[12] as decimal(7,2)) end as ss_list_price,
case when (columns[13]='') then cast(null as decimal(7,2)) else cast(columns[13] as decimal(7,2)) end as ss_sales_price,
case when (columns[14]='') then cast(null as decimal(7,2)) else cast(columns[14] as decimal(7,2)) end as ss_ext_discount_amt,
case when (columns[15]='') then cast(null as decimal(7,2)) else cast(columns[15] as decimal(7,2)) end as ss_ext_sales_price,
case when (columns[16]='') then cast(null as decimal(7,2)) else cast(columns[16] as decimal(7,2)) end as ss_ext_wholesale_cost,
case when (columns[17]='') then cast(null as decimal(7,2)) else cast(columns[17] as decimal(7,2)) end as ss_ext_list_price,
case when (columns[18]='') then cast(null as decimal(7,2)) else cast(columns[18] as decimal(7,2)) end as ss_ext_tax,
case when (columns[19]='') then cast(null as decimal(7,2)) else cast(columns[19] as decimal(7,2)) end as ss_coupon_amt,
case when (columns[20]='') then cast(null as decimal(7,2)) else cast(columns[20] as decimal(7,2)) end as ss_net_paid,
case when (columns[21]='') then cast(null as decimal(7,2)) else cast(columns[21] as decimal(7,2)) end as ss_net_paid_inc_tax,
case when (columns[22]='') then cast(null as decimal(7,2)) else cast(columns[22] as decimal(7,2)) end as ss_net_profit
from dfs.`/drill/testdata/tpcds/text/sf1000/store_sales.dat`;
{code}

Exception from the logs
{code}
2016-12-14 14:23:49,303 [27ae4152-0fd4-aa0f-56db-a21e2f54d6c2:frag:1:14] ERROR o.a.drill.common.CatastrophicFailure - Catastrophic Failure Occurred, exiting. Information message: Unable to handle out of memory condition in FragmentExecutor.
java.lang.OutOfMemoryError: Java heap space
        at org.apache.parquet.bytes.CapacityByteArrayOutputStream.writeToOutput(CapacityByteArrayOutputStream.java:223) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.CapacityByteArrayOutputStream.writeTo(CapacityByteArrayOutputStream.java:239) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput$CapacityBAOSBytesInput.writeAllTo(BytesInput.java:355) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput$SequenceBytesIn.writeAllTo(BytesInput.java:266) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput.toByteArray(BytesInput.java:174) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput.toByteBuffer(BytesInput.java:185) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.hadoop.DirectCodecFactory$SnappyCompressor.compress(DirectCodecFactory.java:291) ~[parquet-hadoop-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.hadoop.ColumnChunkPageWriteStore$ColumnChunkPageWriter.writePage(ColumnChunkPageWriteStore.java:94) ~[parquet-hadoop-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.writePage(ColumnWriterV1.java:154) ~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.accountForValueWritten(ColumnWriterV1.java:115) ~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.write(ColumnWriterV1.java:227) ~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.addInteger(MessageColumnIO.java:433) ~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.drill.exec.store.ParquetOutputRecordWriter$NullableIntParquetConverter.writeField(ParquetOutputRecordWriter.java:377) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.store.EventBasedRecordWriter.write(EventBasedRecordWriter.java:65) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:106) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:104) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext(SingleSenderCreator.java:92) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:94) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:232) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:226) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at java.security.AccessController.doPrivileged(Native Method) ~[na:1.8.0_92]
        at javax.security.auth.Subject.doAs(Subject.java:422) ~[na:1.8.0_92]
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) ~[hadoop-common-2.7.0-mapr-1607.jar:na]
        at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:226) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) [drill-common-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_92]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_92]
        at java.lang.Thread.run(Thread.java:745) [na:1.8.0_92]
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)