You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by oushu1wangziming1 <gi...@git.apache.org> on 2018/08/24 10:45:44 UTC

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

GitHub user oushu1wangziming1 opened a pull request:

    https://github.com/apache/incubator-hawq/pull/1394

    HAWQ-1629. Add ORC format using pluggable storage framework.

    HAWQ-1629. Add ORC format using pluggable storage framework.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/oushu1wangziming1/incubator-hawq HAWQ-1629

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/incubator-hawq/pull/1394.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #1394
    
----
commit 98918702b6d0f53762e23731fc6009272cce976e
Author: oushu1wangziming1 <wa...@...>
Date:   2018-08-24T02:40:02Z

    HAWQ-1629. Add ORC format using pluggable storage framework.

----


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by huor <gi...@git.apache.org>.
Github user huor commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1394#discussion_r212932343
  
    --- Diff: contrib/orc/orc.c ---
    @@ -0,0 +1,369 @@
    +/*-------------------------------------------------------------------------
    +*
    +* hawq_type_mapping.c
    +*     Definitions for hawq type mapping function
    +*
    +* Licensed to the Apache Software Foundation (ASF) under one
    +* or more contributor license agreements.  See the NOTICE file
    +* distributed with this work for additional information
    +* regarding copyright ownership.  The ASF licenses this file
    +* to you under the Apache License, Version 2.0 (the
    +* "License"); you may not use this file except in compliance
    +* with the License.  You may obtain a copy of the License at
    +*
    +*   http://www.apache.org/licenses/LICENSE-2.0
    +*
    +* Unless required by applicable law or agreed to in writing,
    +* software distributed under the License is distributed on an
    +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    +* KIND, either express or implied.  See the License for the
    +* specific language governing permissions and limitations
    +* under the License.
    +*
    +*-------------------------------------------------------------------------
    +*/
    +
    +
    +#include <json-c/json.h>
    +
    +#include "c.h"
    +#include "port.h"
    +#include "postgres.h"
    +#include "fmgr.h"
    +#include "funcapi.h"
    +#include "nodes/pg_list.h"
    +#include "utils/hawq_type_mapping.h"
    +#include "utils/memutils.h"
    +#include "utils/relcache.h"
    +#include "utils/uri.h"
    +#include "utils/formatting.h"
    +#include "utils/lsyscache.h"
    +#include "utils/datetime.h"
    +#include "mb/pg_wchar.h"
    +#include "commands/defrem.h"
    +#include "commands/copy.h"
    +#include "access/tupdesc.h"
    +#include "access/filesplit.h"
    +#include "access/plugstorage.h"
    +#include "cdb/cdbvars.h"
    +#include "catalog/pg_exttable.h"
    +#include "catalog/namespace.h"
    +#include "postmaster/identity.h"
    +#include "nodes/makefuncs.h"
    +#include "nodes/plannodes.h"
    +#include "utils/uri.h"
    +
    +
    +#define ORC_TIMESTAMP_EPOCH_JDATE	2457024 /* == date2j(2015, 1, 1) */
    +#define MAX_ORC_ARRAY_DIMS        10000
    +
    +/* Do the module magic dance */
    +PG_MODULE_MAGIC;
    +
    +/* Validators for pluggable storage format ORC */
    +PG_FUNCTION_INFO_V1(orc_validate_interfaces);
    +PG_FUNCTION_INFO_V1(orc_validate_options);
    +PG_FUNCTION_INFO_V1(orc_validate_encodings);
    +PG_FUNCTION_INFO_V1(orc_validate_datatypes);
    +
    +/* Accessors for pluggable storage format ORC */
    +PG_FUNCTION_INFO_V1(orc_beginscan);
    +PG_FUNCTION_INFO_V1(orc_getnext_init);
    +PG_FUNCTION_INFO_V1(orc_getnext);
    +PG_FUNCTION_INFO_V1(orc_rescan);
    +PG_FUNCTION_INFO_V1(orc_endscan);
    +PG_FUNCTION_INFO_V1(orc_stopscan);
    +PG_FUNCTION_INFO_V1(orc_insert_init);
    +PG_FUNCTION_INFO_V1(orc_insert);
    +PG_FUNCTION_INFO_V1(orc_insert_finish);
    +
    +/* Definitions of validators for pluggable storage format ORC */
    +Datum orc_validate_interfaces(PG_FUNCTION_ARGS);
    +Datum orc_validate_options(PG_FUNCTION_ARGS);
    +Datum orc_validate_encodings(PG_FUNCTION_ARGS);
    +Datum orc_validate_datatypes(PG_FUNCTION_ARGS);
    +
    +/* Definitions of accessors for pluggable storage format ORC */
    +Datum orc_beginscan(PG_FUNCTION_ARGS);
    +Datum orc_getnext_init(PG_FUNCTION_ARGS);
    +Datum orc_getnext(PG_FUNCTION_ARGS);
    +Datum orc_rescan(PG_FUNCTION_ARGS);
    +Datum orc_endscan(PG_FUNCTION_ARGS);
    +Datum orc_stopscan(PG_FUNCTION_ARGS);
    +Datum orc_insert_init(PG_FUNCTION_ARGS);
    +Datum orc_insert(PG_FUNCTION_ARGS);
    +Datum orc_insert_finish(PG_FUNCTION_ARGS);
    +
    +typedef struct ORCFormatFileSplit {
    +
    +
    +} ORCFormatFileSplit;
    +
    +
    +typedef struct ORCFormatC {
    +
    +}ORCFormatC;
    +
    +
    +typedef struct {
    +
    +} TimestampType;
    +
    +typedef struct ORCFormatUserData
    +{
    +
    +} ORCFormatUserData;
    +
    +
    +static FmgrInfo *get_orc_function(char *formatter_name, char *function_name);
    +static void get_scan_functions(FileScanDesc file_scan_desc);
    +static void get_insert_functions(ExternalInsertDesc ext_insert_desc);
    +static void init_format_user_data_for_read(TupleDesc tup_desc, ORCFormatUserData *user_data);
    +static void init_format_user_data_for_write(TupleDesc tup_desc, ORCFormatUserData *user_data);
    +static void build_options_in_json(List *fmt_opts_defelem, int encoding, char **json_str);
    +static ORCFormatC *create_formatter_instance(List *fmt_opts_defelem, int encoding, int segno);
    +static void build_file_splits(Uri *uri, ScanState *scan_state, ORCFormatUserData *user_data);
    +static void build_tuple_descrition_for_read(Plan *plan, Relation relation, ORCFormatUserData *user_data);
    +static void build_tuple_descrition_for_write(Relation relation, ORCFormatUserData *user_data);
    +static void orc_scan_error_callback(void *arg);
    +static void orc_parse_format_string(CopyState pstate, char *fmtstr);
    +static char *orc_strtokx2(const char *s, const char *whitespace, const char *delim,
    +                          const char *quote, char escape, bool e_strings,
    +                          bool del_quotes, int encoding);
    +static void orc_strip_quotes(char *source, char quote, char escape, int encoding);
    +
    +/* Implementation of validators for pluggable storage format ORC */
    +
    +/*
    + * void
    + * orc_validate_interfaces(char *formatName)
    + */
    +Datum orc_validate_interfaces(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_validate_interfaces has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_validate_options(List *formatOptions,
    + *                      char *formatStr,
    + *                      bool isWritable)
    + */
    +Datum orc_validate_options(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_validate_options has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_validate_encodings(char *encodingName)
    + */
    +Datum orc_validate_encodings(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_validate_encodings has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_validate_datatypes(TupleDesc tupDesc)
    + */
    +Datum orc_validate_datatypes(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_validate_datatypes has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * FileScanDesc
    + * orc_beginscan(ExternalScan *extScan,
    + *               ScanState *scanState,
    + *               Relation relation,
    + *               int formatterType,
    + *               char *formatterName)
    + */
    +Datum orc_beginscan(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_beginscan has not be completed, please fill it");
    +	PG_RETURN_POINTER(NULL);
    +}
    +
    +/*
    + * ExternalSelectDesc
    + * orc_getnext_init(PlanState *planState,
    + *                  ExternalScanState *extScanState)
    + */
    +Datum orc_getnext_init(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_getnext_init has not be completed, please fill it");
    +	PG_RETURN_POINTER(NULL);
    +}
    +
    +/*
    + * bool
    + * orc_getnext(FileScanDesc fileScanDesc,
    + *             ScanDirection direction,
    + *             ExternalSelectDesc extSelectDesc,
    + *             ScanState *scanState,
    + *             TupleTableSlot *tupTableSlot)
    + */
    +Datum orc_getnext(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_getnext has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_rescan(FileScanDesc scan)
    + */
    +Datum orc_rescan(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_rescan has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_endscan(FileScanDesc scan)
    + */
    +Datum orc_endscan(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_endscan has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * void
    + * orc_stopscan(FileScanDesc scan)
    + */
    +Datum orc_stopscan(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_stopscan has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +/*
    + * ExternalInsertDesc
    + * orc_insert_init(Relation relation,
    + *                 int formatterType,
    + *                 char *formatterName)
    + */
    +Datum orc_insert_init(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_insert_init has not be completed, please fill it");
    +	PG_RETURN_POINTER(NULL);
    +}
    +
    +/*
    + * Oid
    + * orc_insert(ExternalInsertDesc extInsertDesc,
    + *            TupleTableSlot *tupTableSlot)
    + */
    +Datum orc_insert(PG_FUNCTION_ARGS)
    +{
    +
    +	elog(ERROR, "Funtion orc_insert has not be completed, please fill it");
    +	PG_RETURN_OID(InvalidOid);
    +}
    +
    +/*
    + * void
    + * orc_insert_finish(ExternalInsertDesc extInsertDesc)
    + */
    +Datum orc_insert_finish(PG_FUNCTION_ARGS)
    +{
    +	elog(ERROR, "Funtion orc_insert_finish has not be completed, please fill it");
    +	PG_RETURN_VOID();
    +}
    +
    +static FmgrInfo *get_orc_function(char *formatter_name, char *function_name)
    --- End diff --
    
    Remove the function other than scan and insert


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by huor <gi...@git.apache.org>.
Github user huor commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1394#discussion_r212932024
  
    --- Diff: contrib/orc/orc_install.sql ---
    @@ -0,0 +1,85 @@
    +-- --------------------------------------------------------------------
    +--
    +-- orc_install.sql
    +--
    +-- Support ORC format in pluggable storage framework
    +--
    +-- --------------------------------------------------------------------
    +
    +SET allow_system_table_mods=ddl;
    +  
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_interfaces() RETURNS void
    +AS '$libdir/orc.so', 'orc_validate_interfaces'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_options() RETURNS void
    +AS '$libdir/orc.so', 'orc_validate_options'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_encodings() RETURNS void
    +AS '$libdir/orc.so', 'orc_validate_encodings'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_validate_datatypes() RETURNS void
    +AS '$libdir/orc.so', 'orc_validate_datatypes'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_beginscan() RETURNS bytea
    +AS '$libdir/orc.so', 'orc_beginscan'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext_init() RETURNS bytea
    +AS '$libdir/orc.so', 'orc_getnext_init'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_getnext() RETURNS bytea
    +AS '$libdir/orc.so', 'orc_getnext'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_rescan() RETURNS void
    +AS '$libdir/orc.so', 'orc_rescan'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_endscan() RETURNS void
    +AS '$libdir/orc.so', 'orc_endscan'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_stopscan() RETURNS void
    +AS '$libdir/orc.so', 'orc_stopscan'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_init() RETURNS bytea
    +AS '$libdir/orc.so', 'orc_insert_init'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_insert() RETURNS bytea
    +AS '$libdir/orc.so', 'orc_insert'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION pg_catalog.orc_insert_finish() RETURNS void
    +AS '$libdir/orc.so', 'orc_insert_finish'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION hdfs_validate() RETURNS void
    +AS '$libdir/exthdfs.so', 'hdfsprotocol_validate'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION hdfs_blocklocation() RETURNS void
    +AS '$libdir/exthdfs.so', 'hdfsprotocol_blocklocation'
    +LANGUAGE C STABLE;
    +
    +CREATE OR REPLACE FUNCTION csv_in() RETURNS record
    --- End diff --
    
    remove text csv


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by radarwave <gi...@git.apache.org>.
Github user radarwave commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1394#discussion_r212928867
  
    --- Diff: contrib/orc/README ---
    @@ -0,0 +1,84 @@
    +Fresh installation from 3.0.0.0 source code
    +-------------------------------------------
    +1. Compile ORC format in pluggable storage framework
    +   $ ./configure --with-orc; make -j8; make install
    +
    +2. Configure and initialize cluster
    +   $ hawq init cluster -a
    +
    +
    +
    +Fresh installation from 3.0.0.0 rpm package
    +-------------------------------------------
    +1. Setup 3.0.0.0 yum repository
    +   $ cd /etc/yum.repos.d
    +   $ sudo wget  http://yum.oushu.io/oushurepo/yumrepo/release/oushu-database/centos7/3.0.0.0/oushu-database.repo
    --- End diff --
    
    Is this repo required? Any more general repo url?


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by huor <gi...@git.apache.org>.
Github user huor commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1394#discussion_r212931811
  
    --- Diff: src/include/utils/hawq_type_mapping.h ---
    @@ -0,0 +1,162 @@
    +/*-------------------------------------------------------------------------
    --- End diff --
    
    Remove type mapping


---

[GitHub] incubator-hawq issue #1394: HAWQ-1629. Add ORC format using pluggable storag...

Posted by radarwave <gi...@git.apache.org>.
Github user radarwave commented on the issue:

    https://github.com/apache/incubator-hawq/pull/1394
  
    Please check licenses and make sure you passed RAT checks.


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by oushu1wangziming1 <gi...@git.apache.org>.
Github user oushu1wangziming1 closed the pull request at:

    https://github.com/apache/incubator-hawq/pull/1394


---

[GitHub] incubator-hawq issue #1394: HAWQ-1629. Add ORC format using pluggable storag...

Posted by huor <gi...@git.apache.org>.
Github user huor commented on the issue:

    https://github.com/apache/incubator-hawq/pull/1394
  
    +1


---

[GitHub] incubator-hawq issue #1394: HAWQ-1629. Add ORC format using pluggable storag...

Posted by oushu1wangziming1 <gi...@git.apache.org>.
Github user oushu1wangziming1 commented on the issue:

    https://github.com/apache/incubator-hawq/pull/1394
  
    Well. I add the licenses in orc.c


---

[GitHub] incubator-hawq pull request #1394: HAWQ-1629. Add ORC format using pluggable...

Posted by radarwave <gi...@git.apache.org>.
Github user radarwave commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1394#discussion_r212928645
  
    --- Diff: contrib/orc/README ---
    @@ -0,0 +1,84 @@
    +Fresh installation from 3.0.0.0 source code
    --- End diff --
    
    The version is confused.


---

[GitHub] incubator-hawq issue #1394: HAWQ-1629. Add ORC format using pluggable storag...

Posted by radarwave <gi...@git.apache.org>.
Github user radarwave commented on the issue:

    https://github.com/apache/incubator-hawq/pull/1394
  
    +1


---