You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2019/04/21 13:32:20 UTC

[hawq] branch master updated: HAWQ-1704. Add ORC protocol validators in hawq

This is an automated email from the ASF dual-hosted git repository.

huor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hawq.git


The following commit(s) were added to refs/heads/master by this push:
     new d9a45b4  HAWQ-1704. Add ORC protocol validators in hawq
d9a45b4 is described below

commit d9a45b423807b649f522dbb2cc08d9c896b3135f
Author: oushu1tuyu1 <tu...@oushu.io>
AuthorDate: Fri Apr 19 13:14:41 2019 +0800

    HAWQ-1704. Add ORC protocol validators in hawq
---
 contrib/orc/Makefile                  |  15 ++
 contrib/orc/README                    |  84 +++++++++++
 contrib/orc/hive_install.sql          |  15 ++
 contrib/orc/orc.c                     | 257 ++++++++++++++++++++++++++++++++++
 contrib/orc/orc_init.sql              |  60 ++++++++
 contrib/orc/orc_install.sql           |  85 +++++++++++
 contrib/orc/orc_uninstall.sql         |  36 +++++
 src/backend/utils/hawq_type_mapping.c | 209 +++++++++++++++++++++++++++
 src/include/utils/hawq_type_mapping.h |  86 ++++++++++++
 9 files changed, 847 insertions(+)

diff --git a/contrib/orc/Makefile b/contrib/orc/Makefile
new file mode 100644
index 0000000..f432146
--- /dev/null
+++ b/contrib/orc/Makefile
@@ -0,0 +1,15 @@
+MODULE_big = orc
+OBJS       = orc.o
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/orc
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+override CFLAGS += -lstorage -ljson-c -luuid -I${top_builddir}/src/backend/utils \
+				   -L${top_builddir}/depends/storage/build/install/usr/local/hawq/lib \
+				   -I${top_builddir}/depends/storage/build/install/usr/local/hawq/include 
+endif
diff --git a/contrib/orc/README b/contrib/orc/README
new file mode 100644
index 0000000..fa41fbf
--- /dev/null
+++ b/contrib/orc/README
@@ -0,0 +1,84 @@
+Fresh installation from 3.0.0.0 source code
+-------------------------------------------
+1. Compile ORC format in pluggable storage framework
+   $ ./configure --with-orc; make -j8; make install
+
+2. Configure and initialize cluster
+   $ hawq init cluster -a
+
+
+
+Fresh installation from 3.0.0.0 rpm package
+-------------------------------------------
+1. Setup 3.0.0.0 yum repository
+   $ cd /etc/yum.repos.d
+   $ sudo wget  http://yum.oushu.io/oushurepo/yumrepo/release/oushu-database/centos7/3.0.0.0/oushu-database.repo
+   $ sudo wget http://yum.oushu.io/oushurepo/yumrepo/oushu-database-utils/centos7/1.1.0.0/oushu-database-utils.repo
+
+2. Install rpm package
+   $ sudo yum install -y hawq
+
+3. Configure and initialize cluster
+   $ hawq init cluster -a
+
+
+
+Upgrade from 2.2.0.0 to 3.3.0.0
+-------------------------------
+1. Stop 2.2.0.0 cluster on master node
+   $ hawq stop cluster -a
+
+2. Backup 2.2.0.0 configuration on each node
+   $ cp -rf $GPHOME/etc ~/
+
+3. Backup 2.2.0.0 yum repository on each node
+   $ sudo mv /etc/yum.repos.d/oushu-hawq++.repo /etc/yum.repos.d/oushu-hawq++.repo.bak
+   $ sudo mv /etc/yum.repos.d/oushu-hawq++-utils.repo /etc/yum.repos.d/oushu-hawq++-utils.repo.bak
+
+4. Setup 3.0.0.0 yum repository on each node
+   $ sudo wget -O /etc/yum.repos.d/oushu-database.repo http://yum.oushu.io/oushurepo/yumrepo/test/oushu-database/centos7/3.0.0.0/oushu-database.repo
+   $ sudo wget -O /etc/yum.repos.d/oushu-database-utils.repo http://yum.oushu.io/oushurepo/yumrepo/oushu-database-utils/centos7/1.1.0.0/oushu-database-utils.repo
+   $ sudo yum makecache
+
+5. Install 3.0.0.0 rpm package on each node
+   $ sudo yum remove -y hawq
+   $ sudo yum install -y hawq
+
+6. Configure 3.0.0.0 on each node
+   $ cp -rf ~/etc/* $GPHOME/etc/
+
+7. Install ORC format
+
+   1) Configure the cluster to upgrade mode
+   $ hawq start cluster 
+   $ hawq config -c upgrade_mode -v on --skipvalidation
+   $ hawq restart cluster -a  
+
+   2) Install ORC format in pg_catalog.pg_proc table in template1 on master node
+      Please find $hawq_master_address_port in $GPHOME/etc/hawq-site.xml
+   $ PGOPTIONS='-c gp_session_role=utility' psql -a -p $hawq_master_address_port -d template1 -f $GPHOME/share/postgresql/orc_install.sql > orc_install.out 2>&1
+
+   3) Install ORC format in pg_catalog.pg_proc table in template1 on segment node
+      Please find $hawq_segment_address_port in $GPHOME/etc/hawq-site.xml
+   $ PGOPTIONS='-c gp_session_role=utility' psql -a -p $hawq_segment_address_port -d template1 -f $GPHOME/share/postgresql/orc_install.sql > orc_install.out 2>&1
+
+   4) Configure the cluster to normal mode
+   $ hawq config -c upgrade_mode -v off --skipvalidation
+   $ hawq restart cluster -a 
+
+   5) Get user database name on master node
+   $ psql -a -d template1 -c "select datname from pg_database where datname not in ('hcatalog', 'template0', 'template1') order by datname;"
+
+   6) Install ORC format in pg_catalog.pg_proc table in each user database on master node
+   $ psql -a -d $user_database_name -f $GPHOME/share/postgresql/orc_install.sql > orc_install.out 2>&1
+
+   7) Restart cluster on master node
+   $ hawq restart cluster -a
+
+8. Uninstall ORC format if necessary
+   Refer to installation of ORC format while use /user/local/hawq/share/postgresql/orc_uninstall.sql instead of /user/local/hawq/share/postgresql/orc_install.sql
+
+9. Install PostGIS if necessary
+
+10. Install MADlib if necessary
+
diff --git a/contrib/orc/hive_install.sql b/contrib/orc/hive_install.sql
new file mode 100644
index 0000000..624a499
--- /dev/null
+++ b/contrib/orc/hive_install.sql
@@ -0,0 +1,15 @@
+-- --------------------------------------------------------------------
+--
+-- hive_install.sql
+--
+-- Support HIVE protocol in pluggable storage framework
+--
+-- --------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION hive_validate() RETURNS void
+AS '$libdir/exthive.so', 'hiveprotocol_validate'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION hive_blocklocation() RETURNS void
+AS '$libdir/exthive.so', 'hiveprotocol_blocklocation'
+LANGUAGE C STABLE;
diff --git a/contrib/orc/orc.c b/contrib/orc/orc.c
new file mode 100644
index 0000000..ac2f878
--- /dev/null
+++ b/contrib/orc/orc.c
@@ -0,0 +1,257 @@
+#include <json-c/json.h>
+
+#include "c.h"
+#include "port.h"
+#include "postgres.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "nodes/pg_list.h"
+#include "utils/hawq_type_mapping.h"
+#include "utils/memutils.h"
+#include "utils/relcache.h"
+#include "utils/uri.h"
+#include "utils/formatting.h"
+#include "utils/lsyscache.h"
+#include "utils/datetime.h"
+#include "mb/pg_wchar.h"
+#include "commands/defrem.h"
+#include "commands/copy.h"
+#include "access/tupdesc.h"
+#include "access/filesplit.h"
+#include "access/fileam.h"
+#include "access/plugstorage.h"
+#include "cdb/cdbvars.h"
+#include "catalog/pg_exttable.h"
+#include "catalog/namespace.h"
+#include "postmaster/identity.h"
+#include "nodes/makefuncs.h"
+#include "nodes/plannodes.h"
+#include "utils/uri.h"
+#include "cdb/cdbfilesystemcredential.h"
+
+#include "storage/cwrapper/orc-format-c.h"
+#include "storage/cwrapper/hdfs-file-system-c.h"
+#include "cdb/cdbvars.h"
+#define ORC_TIMESTAMP_EPOCH_JDATE 2457024 /* == date2j(2015, 1, 1) */
+#define MAX_ORC_ARRAY_DIMS        10000
+#define ORC_NUMERIC_MAX_PRECISION 38
+
+/* Do the module magic dance */
+PG_MODULE_MAGIC
+;
+
+/* Validators for pluggable storage format ORC */
+PG_FUNCTION_INFO_V1(orc_validate_interfaces);
+PG_FUNCTION_INFO_V1(orc_validate_options);
+PG_FUNCTION_INFO_V1(orc_validate_encodings);
+PG_FUNCTION_INFO_V1(orc_validate_datatypes);
+
+/* Implementation of validators for pluggable storage format ORC */
+
+/*
+ * void
+ * orc_validate_interfaces(char *formatName)
+ */
+Datum orc_validate_interfaces(PG_FUNCTION_ARGS)
+{
+  PlugStorageValidator psv_interface =
+      (PlugStorageValidator) (fcinfo->context);
+
+  if (pg_strncasecmp(psv_interface->format_name, "orc", strlen("orc")) != 0)
+  {
+    ereport(ERROR,
+        (errcode(ERRCODE_SYNTAX_ERROR), errmsg("orc_validate_interface : incorrect format name \'%s\'", psv_interface->format_name)));
+  }
+
+  PG_RETURN_VOID() ;
+}
+
+/*
+ * void
+ * orc_validate_options(List *formatOptions,
+ *                      char *formatStr,
+ *                      bool isWritable)
+ */
+Datum orc_validate_options(PG_FUNCTION_ARGS)
+{
+  PlugStorageValidator psv = (PlugStorageValidator) (fcinfo->context);
+
+  List *format_opts = psv->format_opts;
+  char *format_str = psv->format_str;
+  bool is_writable = psv->is_writable;
+  TupleDesc tup_desc = psv->tuple_desc;
+
+  char *formatter = NULL;
+  char *compresstype = NULL;
+  char *bloomfilter = NULL;
+  char *dicthreshold = NULL;
+  char *bucketnum = NULL;
+  char *category = NULL;
+
+  ListCell *opt;
+
+  const int maxlen = 8 * 1024 - 1;
+  int len = 0;
+
+  foreach(opt, format_opts)
+  {
+    DefElem *defel = (DefElem *) lfirst(opt);
+    char *key = defel->defname;
+    bool need_free_value = false;
+    char *val = (char *) defGetString(defel, &need_free_value);
+
+    /* check formatter */
+    if (strncasecmp(key, "formatter", strlen("formatter")) == 0)
+    {
+      char *formatter_values[] =
+      { "orc" };
+      checkPlugStorageFormatOption(&formatter, key, val,
+      true, 1, formatter_values);
+    }
+
+    /* check option for orc format */
+    if (strncasecmp(key, "compresstype", strlen("compresstype")) == 0)
+    {
+      char *compresstype_values[] =
+      { "none", "snappy", "lz4" };
+      checkPlugStorageFormatOption(&compresstype, key, val, is_writable,
+          3, compresstype_values);
+    }
+
+    if (strncasecmp(key, "bloomfilter", strlen("bloomfilter")) == 0)
+    {
+      int attnum = tup_desc->natts;
+      char **attribute_names = palloc0(attnum * sizeof(char*));
+      for (int i = 0; i < attnum; ++i) {
+        int name_len = strlen(((Form_pg_attribute) (tup_desc->attrs[i]))->attname.data);
+        char *attribute = palloc0(name_len + 1);
+        strncpy(attribute, ((Form_pg_attribute) (tup_desc->attrs[i]))->attname.data, name_len);
+        attribute_names[i] = attribute;
+      }
+      char *dup_val = pstrdup(val);
+      char *token = strtok(dup_val, ",");
+      while (token) {
+        checkPlugStorageFormatOption(&bloomfilter, key, token, true, attnum, attribute_names);
+        bloomfilter = NULL;
+        token = strtok(NULL, ",");
+      }
+    }
+
+    if (strncasecmp(key, "dicthreshold", strlen("dicthreshold")) == 0)
+    {
+      checkPlugStorageFormatOption(&dicthreshold, key, val,
+      true, 0, NULL);
+      char *end;
+      double threshold = strtod(val, &end);
+      if (end == val || *end != '\0' || threshold < 0 || threshold > 1)
+      {
+        ereport(ERROR,
+            (errcode(ERRCODE_SYNTAX_ERROR), errmsg("dicthreshold \"%s\" must be within [0-1]", val), errOmitLocation(true)));
+      }
+    }
+
+    if (strncasecmp(key, "bucketnum", strlen("bucketnum")) == 0)
+    {
+      checkPlugStorageFormatOption(&bucketnum, key, val,
+      true, 0, NULL);
+      char *end;
+      long bucketnumber = strtol(val, &end, 10);
+      if (end == val || *end != '\0' || bucketnumber <= 0)
+      {
+        ereport(ERROR,
+            (errcode(ERRCODE_SYNTAX_ERROR), errmsg("bucketnum \"%s\" must be > 0", val), errOmitLocation(true)));
+      }
+    }
+
+    /* check category orc format */
+    if (strncasecmp(key, "category", strlen("category")) == 0)
+    {
+      char *category_values[] =
+      { "internal", "external" };
+      checkPlugStorageFormatOption(&category, key, val,
+      true, 2, category_values);
+    }
+
+    if (strncasecmp(key, "formatter", strlen("formatter"))
+        && strncasecmp(key, "compresstype", strlen("compresstype"))
+        && strncasecmp(key, "bloomfilter", strlen("bloomfilter"))
+        && strncasecmp(key, "dicthreshold", strlen("dicthreshold"))
+        && strncasecmp(key, "bucketnum", strlen("bucketnum"))
+        && strncasecmp(key, "category", strlen("category")))
+    {
+      ereport(ERROR,
+          (errcode(ERRCODE_SYNTAX_ERROR), errmsg("Option \"%s\" for ORC table is invalid", key), errhint("Format options for ORC table must be either " "formatter, compresstype, bloomfilter or dicthreshold"), errOmitLocation(true)));
+    }
+
+    sprintf((char * ) format_str + len, "%s '%s' ", key, val);
+    len += strlen(key) + strlen(val) + 4;
+
+    if (need_free_value)
+    {
+      pfree(val);
+      val = NULL;
+    }
+
+    AssertImply(need_free_value, NULL == val);
+
+    if (len > maxlen)
+    {
+      ereport(ERROR,
+          (errcode(ERRCODE_SYNTAX_ERROR), errmsg("format options must be less than %d bytes in size", maxlen), errOmitLocation(true)));
+    }
+  }
+
+  if (!formatter)
+  {
+    ereport(ERROR,
+        (errcode(ERRCODE_SYNTAX_ERROR), errmsg("no formatter function specified"), errOmitLocation(true)));
+  }
+
+  PG_RETURN_VOID() ;
+}
+
+/*
+ * void
+ * orc_validate_encodings(char *encodingName)
+ */
+Datum orc_validate_encodings(PG_FUNCTION_ARGS)
+{
+  PlugStorageValidator psv = (PlugStorageValidator) (fcinfo->context);
+  char *encoding_name = psv->encoding_name;
+
+  if (strncasecmp(encoding_name, "utf8", strlen("utf8")))
+  {
+    ereport(ERROR,
+        (errcode(ERRCODE_SYNTAX_ERROR), errmsg("\"%s\" is not a valid encoding for ORC external table. " "Encoding for ORC external table must be UTF8.", encoding_name), errOmitLocation(true)));
+  }
+
+  PG_RETURN_VOID() ;
+}
+
+/*
+ * void
+ * orc_validate_datatypes(TupleDesc tupDesc)
+ */
+Datum orc_validate_datatypes(PG_FUNCTION_ARGS)
+{
+  PlugStorageValidator psv = (PlugStorageValidator) (fcinfo->context);
+  TupleDesc tup_desc = psv->tuple_desc;
+
+  for (int i = 0; i < tup_desc->natts; ++i)
+  {
+    int32_t datatype =
+        (int32_t) (((Form_pg_attribute) (tup_desc->attrs[i]))->atttypid);
+
+    if (checkORCUnsupportedDataType(datatype))
+    {
+      ereport(ERROR,
+          (errcode(ERRCODE_SYNTAX_ERROR), errmsg("unsupported data types %d for columns of external ORC table is specified.", datatype), errOmitLocation(true)));
+    }
+    /*
+     * TODO(wshao): additional check for orc decimal type
+     * orc format currently does not support decimal precision larger than 38
+     */
+  }
+
+  PG_RETURN_VOID() ;
+}
diff --git a/contrib/orc/orc_init.sql b/contrib/orc/orc_init.sql
new file mode 100644
index 0000000..fa720b7
--- /dev/null
+++ b/contrib/orc/orc_init.sql
@@ -0,0 +1,60 @@
+-- --------------------------------------------------------------------
+--
+-- orc_init.sql
+--
+-- Support ORC format in pluggable storage framework at initialization
+--
+-- --------------------------------------------------------------------
+  
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_interfaces() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_interfaces'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_options() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_options'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_encodings() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_encodings'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_datatypes() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_datatypes'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_beginscan() RETURNS bytea
+AS '$libdir/orc.so', 'orc_beginscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext_init() RETURNS bytea
+AS '$libdir/orc.so', 'orc_getnext_init'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext() RETURNS bytea
+AS '$libdir/orc.so', 'orc_getnext'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_rescan() RETURNS void
+AS '$libdir/orc.so', 'orc_rescan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_endscan() RETURNS void
+AS '$libdir/orc.so', 'orc_endscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_stopscan() RETURNS void
+AS '$libdir/orc.so', 'orc_stopscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_init() RETURNS bytea
+AS '$libdir/orc.so', 'orc_insert_init'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert() RETURNS bytea
+AS '$libdir/orc.so', 'orc_insert'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_finish() RETURNS void
+AS '$libdir/orc.so', 'orc_insert_finish'
+LANGUAGE C STABLE;
+
diff --git a/contrib/orc/orc_install.sql b/contrib/orc/orc_install.sql
new file mode 100644
index 0000000..21f9c09
--- /dev/null
+++ b/contrib/orc/orc_install.sql
@@ -0,0 +1,85 @@
+-- --------------------------------------------------------------------
+--
+-- orc_install.sql
+--
+-- Support ORC format in pluggable storage framework
+--
+-- --------------------------------------------------------------------
+
+SET allow_system_table_mods=ddl;
+  
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_interfaces() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_interfaces'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_options() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_options'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_encodings() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_encodings'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_datatypes() RETURNS void
+AS '$libdir/orc.so', 'orc_validate_datatypes'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_beginscan() RETURNS bytea
+AS '$libdir/orc.so', 'orc_beginscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext_init() RETURNS bytea
+AS '$libdir/orc.so', 'orc_getnext_init'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext() RETURNS bytea
+AS '$libdir/orc.so', 'orc_getnext'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_rescan() RETURNS void
+AS '$libdir/orc.so', 'orc_rescan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_endscan() RETURNS void
+AS '$libdir/orc.so', 'orc_endscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_stopscan() RETURNS void
+AS '$libdir/orc.so', 'orc_stopscan'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_init() RETURNS bytea
+AS '$libdir/orc.so', 'orc_insert_init'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert() RETURNS bytea
+AS '$libdir/orc.so', 'orc_insert'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_finish() RETURNS void
+AS '$libdir/orc.so', 'orc_insert_finish'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION hdfs_validate() RETURNS void
+AS '$libdir/exthdfs.so', 'hdfsprotocol_validate'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION hdfs_blocklocation() RETURNS void
+AS '$libdir/exthdfs.so', 'hdfsprotocol_blocklocation'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION csv_in() RETURNS record
+AS '$libdir/extfmtcsv.so', 'extfmtcsv_in'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION csv_out(record) RETURNS bytea
+AS '$libdir/extfmtcsv.so', 'extfmtcsv_out'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION text_in() RETURNS record
+AS '$libdir/extfmtcsv.so', 'extfmttext_in'
+LANGUAGE C STABLE;
+
+CREATE OR REPLACE FUNCTION text_out(record) RETURNS bytea
+AS '$libdir/extfmtcsv.so', 'extfmttext_out'
+LANGUAGE C STABLE;
diff --git a/contrib/orc/orc_uninstall.sql b/contrib/orc/orc_uninstall.sql
new file mode 100644
index 0000000..1ad03b4
--- /dev/null
+++ b/contrib/orc/orc_uninstall.sql
@@ -0,0 +1,36 @@
+-- --------------------------------------------------------------------
+--
+-- orc_uninstall.sql
+--
+-- Remove ORC format in pluggable storage framework
+--
+-- --------------------------------------------------------------------
+
+SET allow_system_table_mods=ddl;
+  
+DROP FUNCTION IF EXISTS pg_catalog.orc_validate_interfaces();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_validate_options();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_validate_encodings();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_validate_datatypes();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_beginscan();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_getnext_init();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_getnext();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_rescan();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_endscan();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_stopscan();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_insert_init();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_insert();
+
+DROP FUNCTION IF EXISTS pg_catalog.orc_insert_finish();
+
diff --git a/src/backend/utils/hawq_type_mapping.c b/src/backend/utils/hawq_type_mapping.c
new file mode 100644
index 0000000..9e19076
--- /dev/null
+++ b/src/backend/utils/hawq_type_mapping.c
@@ -0,0 +1,209 @@
+/*-------------------------------------------------------------------------
+*
+* hawq_type_mapping.c
+*     Definitions for hawq type mapping function
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*-------------------------------------------------------------------------
+*/
+
+#include "hawq_type_mapping.h"
+
+#include "miscadmin.h"
+
+int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID) {
+  switch (hawqTypeID) {
+    case HAWQ_TYPE_BOOL:
+      return BOOLEANID;
+
+    case HAWQ_TYPE_CHAR:
+      return TINYINTID;
+
+    case HAWQ_TYPE_INT2:
+      return SMALLINTID;
+
+    case HAWQ_TYPE_INT4:
+      return INTID;
+
+    case HAWQ_TYPE_INT8:
+    case HAWQ_TYPE_TID:
+      return BIGINTID;
+
+    case HAWQ_TYPE_FLOAT4:
+      return FLOATID;
+
+    case HAWQ_TYPE_FLOAT8:
+      return DOUBLEID;
+
+    case HAWQ_TYPE_NUMERIC:
+      return DECIMALID;
+
+    case HAWQ_TYPE_DATE:
+      return DATEID;
+
+    case HAWQ_TYPE_BPCHAR:
+      return CHARID;
+
+    case HAWQ_TYPE_VARCHAR:
+      return VARCHARID;
+
+    case HAWQ_TYPE_NAME:
+    case HAWQ_TYPE_TEXT:
+      return STRINGID;
+
+    case HAWQ_TYPE_TIME:
+      return TIMEID;
+
+    case HAWQ_TYPE_TIMESTAMP:
+    case HAWQ_TYPE_TIMETZ:
+      return TIMESTAMPID;
+
+    case HAWQ_TYPE_TIMESTAMPTZ:
+      return TIMESTAMPTZID;
+
+    case HAWQ_TYPE_INTERVAL:
+      return INTERVALID;
+
+    case HAWQ_TYPE_MONEY:
+    case HAWQ_TYPE_BIT:
+    case HAWQ_TYPE_VARBIT:
+    case HAWQ_TYPE_BYTE:
+    case HAWQ_TYPE_XML:
+    case HAWQ_TYPE_MACADDR:
+    case HAWQ_TYPE_INET:
+    case HAWQ_TYPE_CIDR:
+      return BINARYID;
+
+    case HAWQ_TYPE_INT2_ARRAY:
+      return SMALLINTARRAYID;
+
+    case HAWQ_TYPE_INT4_ARRAY:
+      return INTARRAYID;
+
+    case HAWQ_TYPE_INT8_ARRAY:
+      return BIGINTARRAYID;
+
+    case HAWQ_TYPE_FLOAT4_ARRAY:
+      return FLOATARRAYID;
+
+    case HAWQ_TYPE_FLOAT8_ARRAY:
+      return DOUBLEARRAYID;
+
+    case HAWQ_TYPE_TEXT_ARRAY:
+      return STRINGARRAYID;
+
+    case HAWQ_TYPE_BPCHAR_ARRAY:
+      return BPCHARARRAYID;
+
+    case HAWQ_TYPE_NUMERIC_ARRAY:
+      return DECIMAL128ARRAYID;
+
+    case HAWQ_TYPE_UNKNOWN:
+      return STRINGID;
+
+    case HAWQ_TYPE_POINT:
+    case HAWQ_TYPE_LSEG:
+    case HAWQ_TYPE_PATH:
+    case HAWQ_TYPE_BOX:
+    case HAWQ_TYPE_POLYGON:
+    case HAWQ_TYPE_CIRCLE:
+    default:
+      return type_is_rowtype(hawqTypeID)
+                 ? (STRUCTEXID)
+                 : (type_is_basetype(hawqTypeID) ? IOBASETYPEID
+                                                 : INVALIDTYPEID);
+  }
+}
+
+bool checkUnsupportedDataType(int32_t hawqTypeID, int32_t dateStyle) {
+  switch (hawqTypeID) {
+    case HAWQ_TYPE_BOOL:
+    case HAWQ_TYPE_INT2:
+    case HAWQ_TYPE_INT4:
+    case HAWQ_TYPE_INT8:
+    case HAWQ_TYPE_TID:
+    case HAWQ_TYPE_FLOAT4:
+    case HAWQ_TYPE_FLOAT8:
+    case HAWQ_TYPE_CHAR:
+    case HAWQ_TYPE_TEXT:
+    case HAWQ_TYPE_BYTE:
+    case HAWQ_TYPE_BPCHAR:
+    case HAWQ_TYPE_VARCHAR:
+    case HAWQ_TYPE_DATE:
+    case HAWQ_TYPE_TIME:
+    case HAWQ_TYPE_TIMESTAMP:
+    case HAWQ_TYPE_INTERVAL:
+    case HAWQ_TYPE_INT2_ARRAY:
+    case HAWQ_TYPE_INT4_ARRAY:
+    case HAWQ_TYPE_INT8_ARRAY:
+    case HAWQ_TYPE_FLOAT4_ARRAY:
+    case HAWQ_TYPE_FLOAT8_ARRAY:
+    case HAWQ_TYPE_TEXT_ARRAY:
+    case HAWQ_TYPE_BPCHAR_ARRAY:
+    case HAWQ_TYPE_NUMERIC_ARRAY:
+    case HAWQ_TYPE_NUMERIC:
+    case HAWQ_TYPE_UNKNOWN:
+      return false;
+    case HAWQ_TYPE_TIMESTAMPTZ:
+      if (dateStyle == USE_ISO_DATES)
+        return false;
+      else
+        return true;
+    default:
+      return true;
+  }
+}
+
+/*
+ * Type checking used by ORC format.
+ * Some of the included types are enabled in old executor
+ * while not supported in new executor.
+ */
+bool checkORCUnsupportedDataType(int32_t hawqTypeID) {
+  switch (hawqTypeID) {
+    case HAWQ_TYPE_BOOL:
+    case HAWQ_TYPE_INT2:
+    case HAWQ_TYPE_INT4:
+    case HAWQ_TYPE_INT8:
+    case HAWQ_TYPE_TID:
+    case HAWQ_TYPE_FLOAT4:
+    case HAWQ_TYPE_FLOAT8:
+    case HAWQ_TYPE_CHAR:
+    case HAWQ_TYPE_TEXT:
+    case HAWQ_TYPE_BYTE:
+    case HAWQ_TYPE_BPCHAR:
+    case HAWQ_TYPE_VARCHAR:
+    case HAWQ_TYPE_DATE:
+    case HAWQ_TYPE_TIME:
+    case HAWQ_TYPE_TIMESTAMP:
+    case HAWQ_TYPE_TIMESTAMPTZ:
+    case HAWQ_TYPE_INT2_ARRAY:
+    case HAWQ_TYPE_INT4_ARRAY:
+    case HAWQ_TYPE_INT8_ARRAY:
+    case HAWQ_TYPE_FLOAT4_ARRAY:
+    case HAWQ_TYPE_FLOAT8_ARRAY:
+    case HAWQ_TYPE_TEXT_ARRAY:
+    case HAWQ_TYPE_BPCHAR_ARRAY:
+    case HAWQ_TYPE_NUMERIC:
+    case HAWQ_TYPE_UNKNOWN:
+      return false;
+    default:
+      return true;
+  }
+}
diff --git a/src/include/utils/hawq_type_mapping.h b/src/include/utils/hawq_type_mapping.h
new file mode 100644
index 0000000..d21c480
--- /dev/null
+++ b/src/include/utils/hawq_type_mapping.h
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * hawq_type_mapping.h
+ *     Definitions for hawq type and its mapping
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HAWQTYPE_H
+#define HAWQTYPE_H
+
+#include "postgres.h"
+#include "dbcommon/type/type-kind.h"
+
+// primitive type
+#define HAWQ_TYPE_BOOL			16
+#define HAWQ_TYPE_CHAR			18
+#define HAWQ_TYPE_NAME			19
+#define HAWQ_TYPE_INT8			20
+#define HAWQ_TYPE_INT2			21
+#define HAWQ_TYPE_INT4			23
+#define HAWQ_TYPE_TID			27
+#define HAWQ_TYPE_FLOAT4		700
+#define HAWQ_TYPE_FLOAT8		701
+#define HAWQ_TYPE_MONEY 		790
+#define HAWQ_TYPE_NUMERIC		1700
+#define HAWQ_TYPE_BYTE			17
+#define HAWQ_TYPE_TEXT			25
+#define HAWQ_TYPE_XML			  142
+#define HAWQ_TYPE_MACADDR		829
+#define HAWQ_TYPE_INET			869
+#define HAWQ_TYPE_CIDR			650
+#define HAWQ_TYPE_BPCHAR		1042
+#define HAWQ_TYPE_VARCHAR		1043
+#define HAWQ_TYPE_DATE			1082
+#define HAWQ_TYPE_TIME			1083
+#define HAWQ_TYPE_TIMESTAMP	1114
+#define HAWQ_TYPE_TIMETZ		1266
+#define HAWQ_TYPE_TIMESTAMPTZ	1184
+#define HAWQ_TYPE_INTERVAL	1186
+#define HAWQ_TYPE_BIT			  1560
+#define HAWQ_TYPE_VARBIT		1562
+
+// group type
+#define HAWQ_TYPE_POINT			600
+#define HAWQ_TYPE_LSEG			601
+#define HAWQ_TYPE_PATH			602
+#define HAWQ_TYPE_BOX			  603
+#define HAWQ_TYPE_POLYGON		604
+#define HAWQ_TYPE_CIRCLE		718
+#define HAWQ_TYPE_INT2_ARRAY    1005
+#define HAWQ_TYPE_INT4_ARRAY    1007
+#define HAWQ_TYPE_INT8_ARRAY    1016
+#define HAWQ_TYPE_FLOAT4_ARRAY  1021
+#define HAWQ_TYPE_FLOAT8_ARRAY  1022
+#define HAWQ_TYPE_TEXT_ARRAY    1009
+#define HAWQ_TYPE_BPCHAR_ARRAY  1014
+#define HAWQ_TYPE_NUMERIC_ARRAY 1231
+
+#define HAWQ_TYPE_INVALID		-1
+
+#define HAWQ_TYPE_UNKNOWN 705
+
+int32_t map_hawq_type_to_common_plan(int32_t hawqTypeID);
+
+// if hawq type unsupported, return true
+bool checkUnsupportedDataType(int32_t hawqTypeID, int32_t dateStyle);
+bool checkORCUnsupportedDataType(int32_t hawqTypeID);
+
+#endif   /* HAWQTYPE_H */