You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by zt...@apache.org on 2021/11/18 01:51:41 UTC

[hawq] branch ztao updated (b21c7d3 -> c82cf9f)

This is an automated email from the ASF dual-hosted git repository.

ztao1987 pushed a change to branch ztao
in repository https://gitbox.apache.org/repos/asf/hawq.git.


 discard b21c7d3  HAWQ-1812. fix bug of proxy dispatcher decode bytes as string
    omit aade665  HAWQ-1811. Sync with OushuDB - Phase I
     add e3181da  HAWQ-1811. Sync with OushuDB - Phase I
     add 39f96c0  HAWQ-1812. fix bug of proxy dispatcher decode bytes as string
     add c1adfc2  HAWQ-1813. fix wrong judgment of union query
     new 6c7c752  HAWQ-1814. handle multiple dir in upgrade script
     new c82cf9f  HAWQ-1815. native orc supports udt

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (b21c7d3)
            \
             N -- N -- N   refs/heads/ztao (c82cf9f)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 contrib/orc/orc.c                     |  66 +++++++-------
 src/backend/access/orc/orcam.c        | 158 ++++++++++++++++++++++------------
 src/backend/parser/parse_clause.c     |  28 +-----
 src/backend/utils/hawq_type_mapping.c |  61 ++++++-------
 src/include/utils/hawq_type_mapping.h |   2 +
 tools/bin/upgrade.sh                  |   2 +-
 6 files changed, 180 insertions(+), 137 deletions(-)

[hawq] 01/02: HAWQ-1814. handle multiple dir in upgrade script

Posted by zt...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ztao1987 pushed a commit to branch ztao
in repository https://gitbox.apache.org/repos/asf/hawq.git

commit 6c7c752c083dc175c73b8d4dba9f07e2c8a43736
Author: ztao1987 <zh...@gmail.com>
AuthorDate: Thu Nov 18 09:34:13 2021 +0800

    HAWQ-1814. handle multiple dir in upgrade script
---
 tools/bin/upgrade.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bin/upgrade.sh b/tools/bin/upgrade.sh
index 8452d57..90e77ea 100755
--- a/tools/bin/upgrade.sh
+++ b/tools/bin/upgrade.sh
@@ -102,7 +102,7 @@ SEGMENT_HOSTS=`cat $GPHOME/etc/slaves`
 OPTIONS='-c gp_maintenance_conn=true'
 
 # check whether all tmp dir exsits
-ls $MASTER_TEMP_DIR
+echo $MASTER_TEMP_DIR | sed "s/,/ /g" | xargs ls
 check_error "check master and segment temp dir on master"
 
 # check whether all segments replaced with new binary

[hawq] 02/02: HAWQ-1815. native orc supports udt

Posted by zt...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ztao1987 pushed a commit to branch ztao
in repository https://gitbox.apache.org/repos/asf/hawq.git

commit c82cf9f2a2f61859918f34401c6cad26cd912490
Author: ztao1987 <zh...@gmail.com>
AuthorDate: Thu Nov 18 09:45:04 2021 +0800

    HAWQ-1815. native orc supports udt
---
 contrib/orc/orc.c                     |  66 +++++++-------
 src/backend/access/orc/orcam.c        | 158 ++++++++++++++++++++++------------
 src/backend/utils/hawq_type_mapping.c |  61 ++++++-------
 src/include/utils/hawq_type_mapping.h |   2 +
 4 files changed, 175 insertions(+), 112 deletions(-)

diff --git a/contrib/orc/orc.c b/contrib/orc/orc.c
index 465e4f3..b58943d 100644
--- a/contrib/orc/orc.c
+++ b/contrib/orc/orc.c
@@ -337,36 +337,44 @@ Datum orc_validate_encodings(PG_FUNCTION_ARGS)
  * void
  * orc_validate_datatypes(TupleDesc tupDesc)
  */
-Datum orc_validate_datatypes(PG_FUNCTION_ARGS)
-{
-	PlugStorageValidator psv = (PlugStorageValidator) (fcinfo->context);
-	TupleDesc tup_desc = psv->tuple_desc;
-
-	for (int i = 0; i < tup_desc->natts; ++i)
-	{
-		int32_t datatype =
-				(int32_t) (((Form_pg_attribute) (tup_desc->attrs[i]))->atttypid);
-		int4 	typmod = ((Form_pg_attribute) (tup_desc->attrs[i]))->atttypmod;
-
-		if (checkORCUnsupportedDataType(datatype))
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_SYNTAX_ERROR), errmsg("unsupported data types %s for columns of external ORC table is specified.", TypeNameToString(makeTypeNameFromOid(datatype, -1))), errOmitLocation(true)));
-		}
-		if (HAWQ_TYPE_NUMERIC == datatype)
-		{
-			int4 tmp_typmod = typmod - VARHDRSZ;
-			int precision = (tmp_typmod >> 16) & 0xffff;
-			int scale = tmp_typmod & 0xffff;
-			if (precision < 1 || 38 < precision)
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ORC DECIMAL precision must be between 1 and 38")));
-			if (scale == 0)
-				ereport(NOTICE, (errmsg("Using a scale of zero for ORC DECIMAL")));
-		}
-	}
+Datum orc_validate_datatypes(PG_FUNCTION_ARGS) {
+  PlugStorageValidator psv = (PlugStorageValidator)(fcinfo->context);
+  TupleDesc tup_desc = psv->tuple_desc;
+
+  for (int i = 0; i < tup_desc->natts; ++i) {
+    int32_t datatype =
+        (int32_t)(((Form_pg_attribute)(tup_desc->attrs[i]))->atttypid);
+    int4 typmod = ((Form_pg_attribute)(tup_desc->attrs[i]))->atttypmod;
+
+    if (checkORCUnsupportedDataType(datatype)) {
+      ereport(ERROR,
+              (errcode(ERRCODE_SYNTAX_ERROR),
+               errmsg("unsupported data types %s for columns of external ORC "
+                      "table is specified.",
+                      TypeNameToString(makeTypeNameFromOid(datatype, -1))),
+               errOmitLocation(true)));
+    }
+    if (HAWQ_TYPE_NUMERIC == datatype) {
+      int4 tmp_typmod = typmod - VARHDRSZ;
+      int precision = (tmp_typmod >> 16) & 0xffff;
+      int scale = tmp_typmod & 0xffff;
+      if (precision < 1 || 38 < precision)
+        ereport(ERROR,
+                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                 errmsg("ORC DECIMAL precision must be between 1 and 38")));
+      if (scale == 0)
+        ereport(NOTICE, (errmsg("Using a scale of zero for ORC DECIMAL")));
+    }
+    if (HAEQ_TYPE_UDT(datatype))
+      ereport(ERROR,
+              (errcode(ERRCODE_SYNTAX_ERROR),
+               errmsg("unsupported data types %s for columns of external ORC "
+                      "table is specified.",
+                      TypeNameToString(makeTypeNameFromOid(datatype, -1))),
+               errOmitLocation(true)));
+  }
 
-	PG_RETURN_VOID() ;
+  PG_RETURN_VOID();
 }
 
 /*
diff --git a/src/backend/access/orc/orcam.c b/src/backend/access/orc/orcam.c
index df7242e..2439704 100644
--- a/src/backend/access/orc/orcam.c
+++ b/src/backend/access/orc/orcam.c
@@ -74,6 +74,7 @@ typedef struct OrcFormatData {
   char **colRawValues;
   uint64 *colValLength;
   TimestampType *colTimestamp;
+  struct varlena **colFixedLenUDT;
 } OrcFormatData;
 
 static void initOrcFormatUserData(TupleDesc tup_desc,
@@ -86,8 +87,16 @@ static void initOrcFormatUserData(TupleDesc tup_desc,
   orcFormatData->colRawValues = palloc0(sizeof(char *) * natts);
   orcFormatData->colValLength = palloc0(sizeof(uint64) * natts);
   orcFormatData->colTimestamp = palloc0(sizeof(TimestampType) * natts);
+  orcFormatData->colFixedLenUDT = palloc0(sizeof(struct varlena *) * natts);
 
   for (int i = 0; i < orcFormatData->numberOfColumns; ++i) {
+    // allocate memory for colFixedLenUDT[i] of fixed-length type in advance
+    bool isFixedLengthType = tup_desc->attrs[i]->attlen > 0 ? true : false;
+    if (isFixedLengthType) {
+      orcFormatData->colFixedLenUDT[i] = (struct valena *)palloc0(
+          tup_desc->attrs[i]->attlen + sizeof(uint32_t));
+    }
+
     orcFormatData->colNames[i] = palloc0(NAMEDATALEN);
     strcpy(orcFormatData->colNames[i], tup_desc->attrs[i]->attname.data);
 
@@ -105,8 +114,12 @@ static void initOrcFormatUserData(TupleDesc tup_desc,
 }
 
 static freeOrcFormatUserData(OrcFormatData *orcFormatData) {
-  for (int i = 0; i < orcFormatData->numberOfColumns; ++i)
+  for (int i = 0; i < orcFormatData->numberOfColumns; ++i) {
     pfree(orcFormatData->colNames[i]);
+    if (orcFormatData->colFixedLenUDT[i])
+      pfree(orcFormatData->colFixedLenUDT[i]);
+  }
+
   pfree(orcFormatData->colTimestamp);
   pfree(orcFormatData->colValLength);
   pfree(orcFormatData->colRawValues);
@@ -235,17 +248,37 @@ static void convertAndFillIntoOrcFormatData(OrcFormatData *orcFormatData,
       int *date = (int *)(&(values[i]));
       *date += POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE;
       orcFormatData->colRawValues[i] = (char *)(&(values[i]));
-    } else if (dataType == HAWQ_TYPE_TEXT || dataType == HAWQ_TYPE_BPCHAR ||
-               dataType == HAWQ_TYPE_VARCHAR) {
-      struct varlena *data = PG_DETOAST_DATUM(values[i]);
-      orcFormatData->colRawValues[i] = (char *)data;
-    } else if (dataType == HAWQ_TYPE_BYTE) {
-      orcFormatData->colRawValues[i] = (char *)PG_DETOAST_DATUM(values[i]);
     } else if (dataType == HAWQ_TYPE_NUMERIC) {
       Numeric num = DatumGetNumeric(values[i]);
       orcFormatData->colRawValues[i] = (char *)num;
-      if (NUMERIC_IS_NAN(num))
-        nulls[i] = true;
+      if (NUMERIC_IS_NAN(num)) nulls[i] = true;
+    } else {
+      // Check whether values[i] is fixed length udt.
+      bool isFixedLengthType = tupleDesc->attrs[i]->attlen > 0 ? true : false;
+      bool isPassByVal = tupleDesc->attrs[i]->attbyval;
+      if (isFixedLengthType) {
+        uint32_t dataLen = tupleDesc->attrs[i]->attlen;
+        uint32_t totalLen = dataLen + sizeof(uint32_t);
+
+        uint32_t tmpLen = __builtin_bswap32(totalLen);
+        char *lenArr = (char *)(&tmpLen);
+        memcpy(orcFormatData->colFixedLenUDT[i]->vl_len_, lenArr,
+               sizeof(uint32_t));
+
+        if (isPassByVal) {  // pass by val
+          char *data = (char *)(&values[i]);
+          memcpy(orcFormatData->colFixedLenUDT[i]->vl_dat, data, dataLen);
+          orcFormatData->colRawValues[i] =
+              (char *)(orcFormatData->colFixedLenUDT[i]);
+        } else {  // pass by pointer
+          char *data = (char *)(values[i]);
+          memcpy(orcFormatData->colFixedLenUDT[i]->vl_dat, data, dataLen);
+          orcFormatData->colRawValues[i] =
+              (char *)(orcFormatData->colFixedLenUDT[i]);
+        }
+      } else {
+        orcFormatData->colRawValues[i] = (char *)PG_DETOAST_DATUM(values[i]);
+      }
     }
   }
 }
@@ -402,51 +435,68 @@ void orcReadNext(OrcScanDescData *scanData, TupleTableSlot *slot) {
         continue;
 
       switch (tupleDesc->attrs[i]->atttypid) {
-      case HAWQ_TYPE_BOOL: {
-        values[i] = BoolGetDatum(*(bool *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_INT2: {
-        values[i] = Int16GetDatum(*(int16_t *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_INT4: {
-        values[i] = Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_INT8:
-      case HAWQ_TYPE_TIME:
-      case HAWQ_TYPE_TIMESTAMP:
-      case HAWQ_TYPE_TIMESTAMPTZ: {
-        values[i] = Int64GetDatum(*(int64_t *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_FLOAT4: {
-        values[i] = Float4GetDatum(*(float *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_FLOAT8: {
-        values[i] = Float8GetDatum(*(double *)(orcFormatData->colRawValues[i]));
-        break;
-      }
-      case HAWQ_TYPE_VARCHAR:
-      case HAWQ_TYPE_TEXT:
-      case HAWQ_TYPE_BPCHAR:
-      case HAWQ_TYPE_BYTE:
-      case HAWQ_TYPE_NUMERIC: {
-        SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
-                    orcFormatData->colValLength[i]);
-        values[i] = PointerGetDatum(orcFormatData->colRawValues[i]);
-        break;
-      }
-      case HAWQ_TYPE_DATE: {
-        values[i] = Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]) -
-                                  POSTGRES_EPOCH_JDATE + UNIX_EPOCH_JDATE);
-        break;
-      }
-      default: {
-        break;
-      }
+        case HAWQ_TYPE_BOOL: {
+          values[i] = BoolGetDatum(*(bool *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_INT2: {
+          values[i] =
+              Int16GetDatum(*(int16_t *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_INT4: {
+          values[i] =
+              Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_INT8:
+        case HAWQ_TYPE_TIME:
+        case HAWQ_TYPE_TIMESTAMP:
+        case HAWQ_TYPE_TIMESTAMPTZ: {
+          values[i] =
+              Int64GetDatum(*(int64_t *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_FLOAT4: {
+          values[i] =
+              Float4GetDatum(*(float *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_FLOAT8: {
+          values[i] =
+              Float8GetDatum(*(double *)(orcFormatData->colRawValues[i]));
+          break;
+        }
+        case HAWQ_TYPE_DATE: {
+          values[i] =
+              Int32GetDatum(*(int32_t *)(orcFormatData->colRawValues[i]) -
+                            POSTGRES_EPOCH_JDATE + UNIX_EPOCH_JDATE);
+          break;
+        }
+        default: {
+          // Check whether value[i] is fixed length udt.
+          bool isFixedLengthType =
+              tupleDesc->attrs[i]->attlen > 0 ? true : false;
+          bool isPassByVal = tupleDesc->attrs[i]->attbyval;
+          if (isFixedLengthType) {
+            if (isPassByVal) {  // pass by val
+              struct varlena *var =
+                  (struct varlena *)(orcFormatData->colRawValues[i]);
+              uint32 valLen = *(uint32 *)(var->vl_len_);
+              memcpy((void *)&values[i], var->vl_dat, valLen);
+            } else {  // pass by pointer
+              SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
+                          orcFormatData->colValLength[i]);
+              values[i] = PointerGetDatum(orcFormatData->colRawValues[i] +
+                                          sizeof(uint32_t));
+            }
+          } else {
+            SET_VARSIZE((struct varlena *)(orcFormatData->colRawValues[i]),
+                        orcFormatData->colValLength[i]);
+            values[i] = PointerGetDatum(orcFormatData->colRawValues[i]);
+          }
+          break;
+        }
       }
     }
     TupSetVirtualTupleNValid(slot, slot->tts_tupleDescriptor->natts);
diff --git a/src/backend/utils/hawq_type_mapping.c b/src/backend/utils/hawq_type_mapping.c
index be9eb4d..844bf5f 100644
--- a/src/backend/utils/hawq_type_mapping.c
+++ b/src/backend/utils/hawq_type_mapping.c
@@ -1,28 +1,29 @@
 /*-------------------------------------------------------------------------
-*
-* hawq_type_mapping.c
-*     Definitions for hawq type mapping function
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*-------------------------------------------------------------------------
-*/
+ *
+ * hawq_type_mapping.c
+ *     Definitions for hawq type mapping function
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *-------------------------------------------------------------------------
+ */
 
+#include "catalog/pg_magic_oid.h"
 #include "utils/hawq_type_mapping.h"
 #include "miscadmin.h"
 
@@ -129,10 +130,13 @@ int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID) {
     case HAWQ_TYPE_POLYGON:
     case HAWQ_TYPE_CIRCLE:
     default:
-      return type_is_rowtype(hawqTypeID)
-                 ? (STRUCTEXID)
-                 : (type_is_basetype(hawqTypeID) ? IOBASETYPEID
-                                                 : INVALIDTYPEID);
+      if (HAEQ_TYPE_UDT(hawqTypeID))
+        return BINARYID;
+      else
+        return type_is_rowtype(hawqTypeID)
+                   ? (STRUCTEXID)
+                   : (type_is_basetype(hawqTypeID) ? IOBASETYPEID
+                                                   : INVALIDTYPEID);
   }
 }
 
@@ -189,7 +193,6 @@ bool checkORCUnsupportedDataType(int32_t hawqTypeID) {
     case HAWQ_TYPE_INT2:
     case HAWQ_TYPE_INT4:
     case HAWQ_TYPE_INT8:
-    case HAWQ_TYPE_TID:
     case HAWQ_TYPE_FLOAT4:
     case HAWQ_TYPE_FLOAT8:
     case HAWQ_TYPE_TEXT:
@@ -211,6 +214,6 @@ bool checkORCUnsupportedDataType(int32_t hawqTypeID) {
     case HAWQ_TYPE_UNKNOWN:
       return false;
     default:
-      return true;
+      return !HAEQ_TYPE_UDT(hawqTypeID);
   }
 }
diff --git a/src/include/utils/hawq_type_mapping.h b/src/include/utils/hawq_type_mapping.h
index 79320ea..d3e0f20 100644
--- a/src/include/utils/hawq_type_mapping.h
+++ b/src/include/utils/hawq_type_mapping.h
@@ -81,6 +81,8 @@
 
 #define HAWQ_TYPE_UNKNOWN 705
 
+#define HAEQ_TYPE_UDT(x) ( x > FirstNormalObjectId)
+
 extern int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID);
 
 // if hawq type unsupported, return true