You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by "Ming LI (JIRA)" <ji...@apache.org> on 2016/02/01 06:24:40 UTC

[jira] [Assigned] (HAWQ-376) QE core dump at HdfsCloseFile() when run stress test with COPY

     [ https://issues.apache.org/jira/browse/HAWQ-376?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ming LI reassigned HAWQ-376:
----------------------------

    Assignee: Ming LI  (was: Lei Chang)

> QE core dump at HdfsCloseFile() when run stress test with COPY
> --------------------------------------------------------------
>
>                 Key: HAWQ-376
>                 URL: https://issues.apache.org/jira/browse/HAWQ-376
>             Project: Apache HAWQ
>          Issue Type: Bug
>          Components: Core
>            Reporter: Ming LI
>            Assignee: Ming LI
>
> hawq core dumped when run stress test  with copy. QE core dumped when do copy concurrent:
> Core was generated by `postgres: port  5532, gpadmin gpsqltest_... 172.28.8.250(48940) con60 seg103 cm'.
> Program terminated with signal 11, Segmentation fault.
> #0  0x0000003aeac0f5db in raise () from /lib64/libpthread.so.0
> Missing separate debuginfos, use: debuginfo-install hawq-2.0.0.0_beta-19836.x86_64
> (gdb) bt
> #0  0x0000003aeac0f5db in raise () from /lib64/libpthread.so.0
> #1  0x00000000009d7a4c in SafeHandlerForSegvBusIll (processName=0xd44f82 "Segment process", postgres_signal_arg=11) at elog.c:4515
> #2  0x00000000009d7c79 in StandardHandlerForSigillSigsegvSigbus_OnMainThread (processName=0xd44f82 "Segment process", postgres_signal_arg=11) at elog.c:4593
> #3  0x00000000008eb32a in CdbProgramErrorHandler (postgres_signal_arg=11) at postgres.c:3423
> #4  <signal handler called>
> #5  Hdfs::InputStream::close (this=<value optimized out>)
>     at /data1/pulse2-agent/agents/agent1/work/LIBHDFS3-2.0-stash/rhel5_x86_64/src/client/InputStream.cpp:104
> #6  0x00007f76e064b88e in hdfsCloseFile (fs=<value optimized out>, file=0x342df40)
>     at /data1/pulse2-agent/agents/agent1/work/LIBHDFS3-2.0-stash/rhel5_x86_64/src/client/Hdfs.cpp:705
> #7  0x00007f76a8829f13 in gpfs_hdfs_closefile (fcinfo=0x7fffb1ac0670) at gpfshdfs.c:336
> #8  0x00000000008c920d in HdfsCloseFile (protocol=0xd3c40b0 "hdfs", fileSystem=0x342f1f0, file=0x342df40) at filesystem.c:440
> #9  0x00000000008c2309 in HdfsFileClose (file=206, canReportError=0 '\000') at fd.c:2684
> #10 0x00000000008c0ecd in CleanupTempFiles (isProcExit=0 '\000') at fd.c:2113
> #11 0x00000000008c0d52 in AtEOXact_Files () at fd.c:2063
> #12 0x000000000051f492 in AbortTransaction () at xact.c:2897
> #13 0x000000000052164e in AbortOutOfAnyTransaction () at xact.c:4490
> #14 0x00000000009e70ff in ShutdownPostgres (code=1, arg=0) at postinit.c:702
> #15 0x00000000008ccdda in shmem_exit (code=1) at ipc.c:205
> #16 0x00000000008cccdc in proc_exit_prepare (code=1) at ipc.c:165
> #17 0x00000000008ccc59 in proc_exit (code=1) at ipc.c:93
> #18 0x00000000009cfb86 in errfinish (dummy=0) at elog.c:667
> #19 0x00000000008eb4fd in ProcessInterrupts () at postgres.c:3506
> #20 0x00000000009cfbc9 in errfinish (dummy=0) at elog.c:692
> #21 0x00000000008bdd5c in LruDelete (file=206) at fd.c:646
> #22 0x00000000008be2a8 in ReleaseLruFile () at fd.c:816
> #23 0x00000000008c2031 in HdfsPathNameOpenFile (fileName=0x370ae078 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", fileFlags=0,
>     fileMode=384) at fd.c:2615
> #24 0x00000000008c3ab1 in PathNameOpenFile (fileName=0x370ae078 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", fileFlags=0, fileMode=384)
>     at fd.c:3135
> #25 0x0000000000af2461 in MirroredAppendOnly_DoOpen (open=0x36df9060, relFileNode=0x20b4c180, segmentFileNum=3688,
>     relationName=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", logicalEof=0, create=0 '\000', readOnly=1 '\001',
>     primaryError=0x7fffb1ac13d0) at cdbmirroredappendonly.c:129
> #26 0x0000000000af2796 in MirroredAppendOnly_OpenReadWrite (open=0x36df9060, relFileNode=0x20b4c180, segmentFileNum=3688,
>     relationName=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", logicalEof=0, readOnly=1 '\001', primaryError=0x7fffb1ac13d0)
>     at cdbmirroredappendonly.c:270
> #27 0x000000000059aed6 in OpenSegmentFile (mirroredOpen=0x36df9060,
>     filePathName=0x36df88f8 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", logicalEof=0, relFileNode=0x20b4c180, segmentFileNum=3688,
>     relname=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", parquet_file=0x36df89c8, parquet_file_previous=0x36df89cc,
>     protocol_read=0x36df89d0, tableAttrs=0x20b4c4f0, parquetMetadata=0x36df8a38, fileLen=0x36df8a28, fileLen_uncompressed=0x36df8a20,
>     previous_rowgroupcnt=0x36df8a18) at parquetam.c:950
> #28 0x000000000059ae2a in SetCurrentFileSegForWrite (parquetInsertDesc=0x36df89b0, segfileinfo=0x7f76a80bf000) at parquetam.c:894
> #29 0x000000000059a2d4 in parquet_insert_init (rel=0x20b4c180, segfileinfo=0x7f76a80bf000) at parquetam.c:603
> #30 0x0000000000686d9b in CopyFrom (cstate=0x33c9858) at copy.c:4159
> #31 0x000000000067d1f9 in DoCopy (stmt=0x32eb018,
>     queryString=0x335c75a "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"...) at copy.c:1644
> #32 0x00000000008f5d81 in ProcessUtility (parsetree=0x32eb018,
>     queryString=0x334c488 "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"..., params=0x0, isTopLevel=1 '\001', dest=0x7f76a1f700e8,
>     completionTag=0x7fffb1ac2020 "") at utility.c:1076
> #33 0x00000000008f3007 in PortalRunUtility (portal=0x3309028, utilityStmt=0x32eb018, isTopLevel=1 '\001', dest=0x7f76a1f700e8,
>     completionTag=0x7fffb1ac2020 "") at pquery.c:1887
> #34 0x00000000008f32d2 in PortalRunMulti (portal=0x3309028, isTopLevel=1 '\001', dest=0x7f76a1f700e8, altdest=0x7f76a1f700e8,
>     completionTag=0x7fffb1ac2020 "") at pquery.c:1997
> #35 0x00000000008f26dc in PortalRun (portal=0x3309028, count=9223372036854775807, isTopLevel=1 '\001', dest=0x7f76a1f700e8, altdest=0x7f76a1f700e8,
>     completionTag=0x7fffb1ac2020 "") at pquery.c:1514
> #36 0x00000000008e7d77 in exec_mpp_query (
>     query_string=0x335c75a "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"..., serializedQuerytree=0x335c927 "$\264\032", serializedQuerytreelen=107624,
>     serializedPlantree=0x0, serializedPlantreelen=0, serializedParams=0x0, serializedParamslen=0, serializedSliceInfo=0x0, serializedSliceInfolen=0,
>     serializedResource=0x3376de0 " \004", serializedResourceLen=120, seqServerHost=0x3376e58 "172.28.8.250", seqServerPort=16444, localSlice=0)
>     at postgres.c:1404
> #37 0x00000000008ed9a5 in PostgresMain (argc=266, argv=0x3243a58, username=0x321f810 "gpadmin") at postgres.c:4886
> #38 0x0000000000893749 in BackendRun (port=0x31d2c80) at postmaster.c:5875
> #39 0x0000000000892bd3 in BackendStartup (port=0x31d2c80) at postmaster.c:5468
> #40 0x000000000088cc92 in ServerLoop () at postmaster.c:2147
> #41 0x000000000088bd68 in PostmasterMain (argc=9, argv=0x31e9940) at postmaster.c:1439
> #42 0x00000000007a5cca in main (argc=9, argv=0x31e9940) at main.c:226	



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)