You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2015/12/25 07:17:41 UTC

incubator-hawq git commit: HAWQ-280. Fix error accessing external table or copying from file with bad rows

Repository: incubator-hawq
Updated Branches:
  refs/heads/master d926fbf42 -> fc89b4519


HAWQ-280. Fix error accessing external table or copying from file with bad rows


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/fc89b451
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/fc89b451
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/fc89b451

Branch: refs/heads/master
Commit: fc89b45197d6bd4c4abb6b2442627c200ecee5dc
Parents: d926fbf
Author: Ruilong Huo <rh...@pivotal.io>
Authored: Thu Dec 24 20:24:43 2015 -0800
Committer: Ruilong Huo <rh...@pivotal.io>
Committed: Fri Dec 25 14:17:27 2015 +0800

----------------------------------------------------------------------
 src/backend/access/external/fileam.c | 11 +----------
 src/backend/cdb/cdbsreh.c            |  4 ----
 src/backend/commands/copy.c          | 11 +----------
 src/include/cdb/cdbsreh.h            |  7 -------
 4 files changed, 2 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fc89b451/src/backend/access/external/fileam.c
----------------------------------------------------------------------
diff --git a/src/backend/access/external/fileam.c b/src/backend/access/external/fileam.c
index 82abb9d..645e6dc 100644
--- a/src/backend/access/external/fileam.c
+++ b/src/backend/access/external/fileam.c
@@ -824,19 +824,10 @@ else \
 if (IsRejectLimitReached(pstate->cdbsreh)) \
 { \
 	char *rejectmsg_normal = "Segment reject limit reached. Aborting operation. Last error was:";\
-	char *rejectmsg_allbad = "All 1000 first rows in this segment were rejected. Aborting operation regardless of REJECT LIMIT value. Last error was:";\
  	char *rejectmsg_csv_unparsable = "Input includes invalid CSV data that corrupts the ability to parse data rows. This usually means several unescaped embedded QUOTE characters. Data is not parsable.Last error was:";\
 	char *finalmsg;\
 \
- 	if (FIRST_1000_BAD(pstate->cdbsreh))\
- 	{\
- 		/* the special "first 1000 are bad" case */\
- 		finalmsg = (char *) palloc((strlen(pstate->cdbsreh->errmsg) + \
- 									strlen(rejectmsg_allbad) + 12 + 1)\
- 								   * sizeof(char)); \
- 		sprintf(finalmsg, "%s %s", rejectmsg_allbad, pstate->cdbsreh->errmsg);\
- 	}\
- 	else if (CSV_IS_UNPARSABLE(pstate->cdbsreh))\
+ 	if (CSV_IS_UNPARSABLE(pstate->cdbsreh))\
   	{\
  		/* the special "csv un-parsable" case */\
   		finalmsg = (char *) palloc((strlen(pstate->cdbsreh->errmsg) + \

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fc89b451/src/backend/cdb/cdbsreh.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbsreh.c b/src/backend/cdb/cdbsreh.c
index 4e08967..87048c3 100644
--- a/src/backend/cdb/cdbsreh.c
+++ b/src/backend/cdb/cdbsreh.c
@@ -581,10 +581,6 @@ bool IsRejectLimitReached(CdbSreh *cdbsreh)
 {
 	bool	limit_reached = false;
 	
-	/* special case: check for the case that we are rejecting every single row */
-	if(FIRST_1000_BAD(cdbsreh))
-		return true;
-	
 	/* special case: check for un-parsable csv format errors */
 	if(CSV_IS_UNPARSABLE(cdbsreh))
 		return true;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fc89b451/src/backend/commands/copy.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 19d388d..d454d80 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -277,19 +277,10 @@ continue; /* move on to the next data line */
 if (IsRejectLimitReached(cstate->cdbsreh)) \
 {\
 	char *rejectmsg_normal = "Segment reject limit reached. Aborting operation. Last error was:";\
-	char *rejectmsg_allbad = "All 1000 first rows in this segment were rejected. Aborting operation regardless of REJECT LIMIT value. Last error was:";\
 	char *rejectmsg_csv_unparsable = "Input includes invalid CSV data that corrupts the ability to parse data rows. This usually means several unescaped embedded QUOTE characters. Data is not parsable.Last error was:";\
 	char *finalmsg;\
 \
-	if (FIRST_1000_BAD(cstate->cdbsreh))\
-	{\
-		/* the special "first 1000 are bad" case */\
-		finalmsg = (char *) palloc((strlen(cstate->cdbsreh->errmsg) + \
-									strlen(rejectmsg_allbad) + 12 + 1)\
-									* sizeof(char)); \
-		sprintf(finalmsg, "%s %s", rejectmsg_allbad, cstate->cdbsreh->errmsg);\
-	}\
-	else if (CSV_IS_UNPARSABLE(cstate->cdbsreh))\
+	if (CSV_IS_UNPARSABLE(cstate->cdbsreh))\
 	{\
 		/* the special "csv un-parsable" case */\
 		finalmsg = (char *) palloc((strlen(cstate->cdbsreh->errmsg) + \

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fc89b451/src/include/cdb/cdbsreh.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbsreh.h b/src/include/cdb/cdbsreh.h
index dfe5e2d..85c15c2 100644
--- a/src/include/cdb/cdbsreh.h
+++ b/src/include/cdb/cdbsreh.h
@@ -56,13 +56,6 @@
 #define errtable_rawbytes 8   
 
 /*
- * If the 1st 1000 rows got rejected (ALL of them) - there's no point
- * in resuming the load. Better if we abort even if REJECT LIMIT is more
- * than 1000.
- */
-#define FIRST_1000_BAD(sreh) ((sreh->processed == 1000 && sreh->rejectcount >= 1000) ? (true) : (false))
-
-/*
  * In cases of invalid csv input data we end up with not being able to parse the
  * data, resulting in very large data rows. In copy.c we throw an error ("line
  * too long") and continue to try and parse. In some cases this is enough to