You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by "Ming LI (JIRA)" <ji...@apache.org> on 2016/02/01 06:19:39 UTC

[jira] [Created] (HAWQ-376) QE core dump at HdfsCloseFile() when run stress test with COPY

Ming LI created HAWQ-376:
----------------------------

             Summary: QE core dump at HdfsCloseFile() when run stress test with COPY
                 Key: HAWQ-376
                 URL: https://issues.apache.org/jira/browse/HAWQ-376
             Project: Apache HAWQ
          Issue Type: Bug
          Components: Core
            Reporter: Ming LI
            Assignee: Lei Chang


hawq core dumped when run stress test  with copy. QE core dumped when do copy concurrent:

Core was generated by `postgres: port  5532, gpadmin gpsqltest_... 172.28.8.250(48940) con60 seg103 cm'.
Program terminated with signal 11, Segmentation fault.
#0  0x0000003aeac0f5db in raise () from /lib64/libpthread.so.0
Missing separate debuginfos, use: debuginfo-install hawq-2.0.0.0_beta-19836.x86_64
(gdb) bt
#0  0x0000003aeac0f5db in raise () from /lib64/libpthread.so.0
#1  0x00000000009d7a4c in SafeHandlerForSegvBusIll (processName=0xd44f82 "Segment process", postgres_signal_arg=11) at elog.c:4515
#2  0x00000000009d7c79 in StandardHandlerForSigillSigsegvSigbus_OnMainThread (processName=0xd44f82 "Segment process", postgres_signal_arg=11) at elog.c:4593
#3  0x00000000008eb32a in CdbProgramErrorHandler (postgres_signal_arg=11) at postgres.c:3423
#4  <signal handler called>
#5  Hdfs::InputStream::close (this=<value optimized out>)
    at /data1/pulse2-agent/agents/agent1/work/LIBHDFS3-2.0-stash/rhel5_x86_64/src/client/InputStream.cpp:104
#6  0x00007f76e064b88e in hdfsCloseFile (fs=<value optimized out>, file=0x342df40)
    at /data1/pulse2-agent/agents/agent1/work/LIBHDFS3-2.0-stash/rhel5_x86_64/src/client/Hdfs.cpp:705
#7  0x00007f76a8829f13 in gpfs_hdfs_closefile (fcinfo=0x7fffb1ac0670) at gpfshdfs.c:336
#8  0x00000000008c920d in HdfsCloseFile (protocol=0xd3c40b0 "hdfs", fileSystem=0x342f1f0, file=0x342df40) at filesystem.c:440
#9  0x00000000008c2309 in HdfsFileClose (file=206, canReportError=0 '\000') at fd.c:2684
#10 0x00000000008c0ecd in CleanupTempFiles (isProcExit=0 '\000') at fd.c:2113
#11 0x00000000008c0d52 in AtEOXact_Files () at fd.c:2063
#12 0x000000000051f492 in AbortTransaction () at xact.c:2897
#13 0x000000000052164e in AbortOutOfAnyTransaction () at xact.c:4490
#14 0x00000000009e70ff in ShutdownPostgres (code=1, arg=0) at postinit.c:702
#15 0x00000000008ccdda in shmem_exit (code=1) at ipc.c:205
#16 0x00000000008cccdc in proc_exit_prepare (code=1) at ipc.c:165
#17 0x00000000008ccc59 in proc_exit (code=1) at ipc.c:93
#18 0x00000000009cfb86 in errfinish (dummy=0) at elog.c:667
#19 0x00000000008eb4fd in ProcessInterrupts () at postgres.c:3506
#20 0x00000000009cfbc9 in errfinish (dummy=0) at elog.c:692
#21 0x00000000008bdd5c in LruDelete (file=206) at fd.c:646
#22 0x00000000008be2a8 in ReleaseLruFile () at fd.c:816
#23 0x00000000008c2031 in HdfsPathNameOpenFile (fileName=0x370ae078 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", fileFlags=0,
    fileMode=384) at fd.c:2615
#24 0x00000000008c3ab1 in PathNameOpenFile (fileName=0x370ae078 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", fileFlags=0, fileMode=384)
    at fd.c:3135
#25 0x0000000000af2461 in MirroredAppendOnly_DoOpen (open=0x36df9060, relFileNode=0x20b4c180, segmentFileNum=3688,
    relationName=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", logicalEof=0, create=0 '\000', readOnly=1 '\001',
    primaryError=0x7fffb1ac13d0) at cdbmirroredappendonly.c:129
#26 0x0000000000af2796 in MirroredAppendOnly_OpenReadWrite (open=0x36df9060, relFileNode=0x20b4c180, segmentFileNum=3688,
    relationName=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", logicalEof=0, readOnly=1 '\001', primaryError=0x7fffb1ac13d0)
    at cdbmirroredappendonly.c:270
#27 0x000000000059aed6 in OpenSegmentFile (mirroredOpen=0x36df9060,
    filePathName=0x36df88f8 "hdfs://gphd-cluster/hawq/hawq-1453533506/16385/44891/45005/3688", logicalEof=0, relFileNode=0x20b4c180, segmentFileNum=3688,
    relname=0x36918510 "lineitem_copy_ao_parquet_snappy_part_part_1_prt_p1_19", parquet_file=0x36df89c8, parquet_file_previous=0x36df89cc,
    protocol_read=0x36df89d0, tableAttrs=0x20b4c4f0, parquetMetadata=0x36df8a38, fileLen=0x36df8a28, fileLen_uncompressed=0x36df8a20,
    previous_rowgroupcnt=0x36df8a18) at parquetam.c:950
#28 0x000000000059ae2a in SetCurrentFileSegForWrite (parquetInsertDesc=0x36df89b0, segfileinfo=0x7f76a80bf000) at parquetam.c:894
#29 0x000000000059a2d4 in parquet_insert_init (rel=0x20b4c180, segfileinfo=0x7f76a80bf000) at parquetam.c:603
#30 0x0000000000686d9b in CopyFrom (cstate=0x33c9858) at copy.c:4159
#31 0x000000000067d1f9 in DoCopy (stmt=0x32eb018,
    queryString=0x335c75a "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"...) at copy.c:1644
#32 0x00000000008f5d81 in ProcessUtility (parsetree=0x32eb018,
    queryString=0x334c488 "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"..., params=0x0, isTopLevel=1 '\001', dest=0x7f76a1f700e8,
    completionTag=0x7fffb1ac2020 "") at utility.c:1076
#33 0x00000000008f3007 in PortalRunUtility (portal=0x3309028, utilityStmt=0x32eb018, isTopLevel=1 '\001', dest=0x7f76a1f700e8,
    completionTag=0x7fffb1ac2020 "") at pquery.c:1887
#34 0x00000000008f32d2 in PortalRunMulti (portal=0x3309028, isTopLevel=1 '\001', dest=0x7f76a1f700e8, altdest=0x7f76a1f700e8,
    completionTag=0x7fffb1ac2020 "") at pquery.c:1997
#35 0x00000000008f26dc in PortalRun (portal=0x3309028, count=9223372036854775807, isTopLevel=1 '\001', dest=0x7f76a1f700e8, altdest=0x7f76a1f700e8,
    completionTag=0x7fffb1ac2020 "") at pquery.c:1514
#36 0x00000000008e7d77 in exec_mpp_query (
    query_string=0x335c75a "COPY public.lineitem_copy(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipm"..., serializedQuerytree=0x335c927 "$\264\032", serializedQuerytreelen=107624,
    serializedPlantree=0x0, serializedPlantreelen=0, serializedParams=0x0, serializedParamslen=0, serializedSliceInfo=0x0, serializedSliceInfolen=0,
    serializedResource=0x3376de0 " \004", serializedResourceLen=120, seqServerHost=0x3376e58 "172.28.8.250", seqServerPort=16444, localSlice=0)
    at postgres.c:1404
#37 0x00000000008ed9a5 in PostgresMain (argc=266, argv=0x3243a58, username=0x321f810 "gpadmin") at postgres.c:4886
#38 0x0000000000893749 in BackendRun (port=0x31d2c80) at postmaster.c:5875
#39 0x0000000000892bd3 in BackendStartup (port=0x31d2c80) at postmaster.c:5468
#40 0x000000000088cc92 in ServerLoop () at postmaster.c:2147
#41 0x000000000088bd68 in PostmasterMain (argc=9, argv=0x31e9940) at postmaster.c:1439
#42 0x00000000007a5cca in main (argc=9, argv=0x31e9940) at main.c:226	



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)