You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2017/03/10 07:59:47 UTC
[1/2] incubator-hawq git commit: HAWQ-1371. Fix QE process hang in
shared input scan
Repository: incubator-hawq
Updated Branches:
refs/heads/master 9d0ea4699 -> 914cbc1ab
HAWQ-1371. Fix QE process hang in shared input scan
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/914cbc1a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/914cbc1a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/914cbc1a
Branch: refs/heads/master
Commit: 914cbc1ab43abd421dfe0e139c23ede03dd40f95
Parents: 61780e9
Author: amyrazz44 <ab...@pivotal.io>
Authored: Tue Mar 7 16:22:18 2017 +0800
Committer: Ruilong Huo <rh...@pivotal.io>
Committed: Fri Mar 10 15:59:16 2017 +0800
----------------------------------------------------------------------
src/backend/executor/nodeShareInputScan.c | 39 ++++++++++++++++++++------
1 file changed, 30 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/914cbc1a/src/backend/executor/nodeShareInputScan.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeShareInputScan.c b/src/backend/executor/nodeShareInputScan.c
index 0f08848..049943b 100644
--- a/src/backend/executor/nodeShareInputScan.c
+++ b/src/backend/executor/nodeShareInputScan.c
@@ -40,7 +40,6 @@
#include "postgres.h"
-#include "access/xact.h"
#include "cdb/cdbvars.h"
#include "executor/executor.h"
#include "executor/nodeShareInputScan.h"
@@ -641,10 +640,6 @@ read_retry:
goto read_retry;
else
{
- if(fd >= 0)
- {
- gp_retry_close(fd);
- }
elog(ERROR, "could not read from fifo: %m");
}
Assert(!"Never be here");
@@ -664,10 +659,6 @@ write_retry:
goto write_retry;
else
{
- if(fd >= 0)
- {
- gp_retry_close(fd);
- }
elog(ERROR, "could not write to fifo: %m");
}
@@ -751,6 +742,21 @@ shareinput_reader_waitready(int share_id, PlanGenerator planGen)
{
CHECK_FOR_INTERRUPTS();
+ /*
+ * Readers won't wait for data writing done notification from writer if transaction is
+ * aborting. Writer may fail to send data writing done notification to readers in two
+ * cases:
+ *
+ * 1. The transaction is aborted due to interrupts or exceptions, i.e., user cancels
+ * query, division by zero on some segment
+ *
+ * 2. Logic errors in reader which incur its unexpected exit, i.e., segmentation fault
+ */
+ if (IsAbortInProgress())
+ {
+ break;
+ }
+
MPP_FD_ZERO(&rset);
MPP_FD_SET(pctxt->readyfd, &rset);
@@ -888,6 +894,21 @@ writer_wait_for_acks(ShareInput_Lk_Context *pctxt, int share_id, int xslice)
{
CHECK_FOR_INTERRUPTS();
+ /*
+ * Writer won't wait for ack notification from readers if transaction is
+ * aborting. Readers may fail to send ack notification to writer in two
+ * cases:
+ *
+ * 1. The transaction is aborted due to interrupts or exceptions, i.e., user cancels
+ * query, division by zero on some segment
+ *
+ * 2. Logic errors in reader which incur its unexpected exit, i.e., segmentation fault
+ */
+ if (IsAbortInProgress())
+ {
+ break;
+ }
+
MPP_FD_ZERO(&rset);
MPP_FD_SET(pctxt->donefd, &rset);
[2/2] incubator-hawq git commit: Revert "HAWQ-1342. Fixed QE process
hang in shared input scan on segment node"
Posted by hu...@apache.org.
Revert "HAWQ-1342. Fixed QE process hang in shared input scan on segment node"
The fix introduce hang regression in shared input scan query as described in HAWQ-1371
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/61780e99
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/61780e99
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/61780e99
Branch: refs/heads/master
Commit: 61780e999c508973e3dccd460e80f47853928277
Parents: 9d0ea46
Author: amyrazz44 <ab...@pivotal.io>
Authored: Tue Mar 7 16:02:39 2017 +0800
Committer: Ruilong Huo <rh...@pivotal.io>
Committed: Fri Mar 10 15:59:16 2017 +0800
----------------------------------------------------------------------
src/backend/executor/nodeShareInputScan.c | 49 +++++++++++++++-----------
1 file changed, 28 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/61780e99/src/backend/executor/nodeShareInputScan.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeShareInputScan.c b/src/backend/executor/nodeShareInputScan.c
index 74dbcb5..0f08848 100644
--- a/src/backend/executor/nodeShareInputScan.c
+++ b/src/backend/executor/nodeShareInputScan.c
@@ -40,6 +40,7 @@
#include "postgres.h"
+#include "access/xact.h"
#include "cdb/cdbvars.h"
#include "executor/executor.h"
#include "executor/nodeShareInputScan.h"
@@ -640,6 +641,10 @@ read_retry:
goto read_retry;
else
{
+ if(fd >= 0)
+ {
+ gp_retry_close(fd);
+ }
elog(ERROR, "could not read from fifo: %m");
}
Assert(!"Never be here");
@@ -659,6 +664,10 @@ write_retry:
goto write_retry;
else
{
+ if(fd >= 0)
+ {
+ gp_retry_close(fd);
+ }
elog(ERROR, "could not write to fifo: %m");
}
@@ -785,14 +794,7 @@ shareinput_reader_waitready(int share_id, PlanGenerator planGen)
{
int save_errno = errno;
elog(LOG, "SISC READER (shareid=%d, slice=%d): Wait ready try again, errno %d ... ",
- share_id, currentSliceId, save_errno);
- if(save_errno == EBADF)
- {
- /* The file description is invalid, maybe this FD has been already closed by writer in some cases
- * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
- */
- break;
- }
+ share_id, currentSliceId, save_errno);
}
}
return (void *) pctxt;
@@ -923,12 +925,9 @@ writer_wait_for_acks(ShareInput_Lk_Context *pctxt, int share_id, int xslice)
int save_errno = errno;
elog(LOG, "SISC WRITER (shareid=%d, slice=%d): notify still wait for an answer, errno %d",
share_id, currentSliceId, save_errno);
- if(save_errno == EBADF)
- {
- /* The file description is invalid, maybe this FD has been already closed by writer in some cases
- * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
- */
- break;
+ /*if error(except EINTR) happens in select, we just return to avoid endless loop*/
+ if(errno != EINTR){
+ return;
}
}
}
@@ -980,6 +979,21 @@ shareinput_writer_waitdone(void *ctxt, int share_id, int nsharer_xslice)
while(ack_needed > 0)
{
CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Writer won't wait for data reading done notification from readers if transaction is
+ * aborting. Readers may fail to send data reading done notification to writer in two
+ * cases:
+ *
+ * 1. The transaction is aborted due to interrupts or exceptions, i.e., user cancels
+ * query, division by zero on some segment
+ *
+ * 2. Logic errors in reader which incur its unexpected exit, i.e., segmentation fault
+ */
+ if (IsAbortInProgress())
+ {
+ break;
+ }
MPP_FD_ZERO(&rset);
MPP_FD_SET(pctxt->donefd, &rset);
@@ -1010,13 +1024,6 @@ shareinput_writer_waitdone(void *ctxt, int share_id, int nsharer_xslice)
int save_errno = errno;
elog(LOG, "SISC WRITER (shareid=%d, slice=%d): wait done time out once, errno %d",
share_id, currentSliceId, save_errno);
- if(save_errno == EBADF)
- {
- /* The file description is invalid, maybe this FD has been already closed by writer in some cases
- * we need to break here to avoid endless loop and continue to run CHECK_FOR_INTERRUPTS.
- */
- break;
- }
}
}