You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2015/11/05 03:54:53 UTC
[07/17] incubator-hawq git commit: HAWQ-100. Code Cleanup:
gpmapreduce.
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/badref.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/badref.yml.in b/src/bin/gpmapreduce/test/input/badref.yml.in
deleted file mode 100644
index 79d2815..0000000
--- a/src/bin/gpmapreduce/test/input/badref.yml.in
+++ /dev/null
@@ -1,155 +0,0 @@
-#
-# This test outlines has YAML that conforms to the Greenplum MapReduce schema,
-# But object names have don't hookup correctly
-#
-%YAML 1.1
-
-# Bad INPUTS
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_hostname
- FILE: invalidhost:/path/to/file
-EXECUTE:
- - RUN:
- SOURCE: input_bad_hostname
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_hostname
- GPFDIST: invalidhost:/badfile
-EXECUTE:
- - RUN:
- SOURCE: input_bad_hostname
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_file
- FILE: @hostname@:/dev/null/badfile
-EXECUTE:
- - RUN:
- SOURCE: input_bad_file
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_file
- FILE: @hostname@:/badfile
-EXECUTE:
- - RUN:
- SOURCE: input_bad_file
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_exec
- EXEC: '/dev/null'
-EXECUTE:
- - RUN:
- SOURCE: input_bad_exec
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: input_bad_table
- TABLE: invalid_table
-EXECUTE:
- - RUN:
- SOURCE: input_bad_table
----
-VERSION: 1.0.0.1
-DEFINE:
- - MAP:
- NAME: map_not_input
- LANGUAGE: perl
- FUNCTION: return [$@]
-EXECUTE:
- - RUN:
- SOURCE: map_not_input
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: good_input
- TABLE: gp_configuration
- - MAP:
- NAME: map_not_input
- LANGUAGE: perl
- FUNCTION: return [$@]
-EXECUTE:
- - RUN:
- SOURCE: good_input
- TARGET: map_not_output
----
-
-VERSION: 1.0.0.1
-DEFINE:
- - REDUCE:
- NAME: badref_reduce
- INITIALIZE: '0'
- TRANSITION: invalid_transition
----
-VERSION: 1.0.0.1
-DEFINE:
- - REDUCE:
- NAME: badref_reduce
- INITIALIZE: '0'
- TRANSITION: int4_sum
- CONSOLIDATE: invalid_consolidate
----
-VERSION: 1.0.0.1
-DEFINE:
- - REDUCE:
- NAME: badref_reduce
- INITIALIZE: '0'
- TRANSITION: int4_sum
- FINALIZE: invalid_finalize
----
-VERSION: 1.0.0.1
-DEFINE:
- - REDUCE:
- NAME: badref_reduce
- INITIALIZE: '0'
- TRANSITION: generate_series
----
-VERSION: 1.0.0.1
-DEFINE:
- - TASK:
- NAME: badref_task
- SOURCE: invalid_input
- MAP: invalid_map
- REDUCE: invalid_reduce
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: good_input
- TABLE: gp_configuration
- - TASK:
- NAME: badref_task
- SOURCE: good_input
- MAP: invalid_map
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: good_input
- TABLE: gp_configuration
-EXECUTE:
- - RUN:
- SOURCE: invalid_input
- REDUCE: invalid_reduce
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: good_input
- TABLE: gp_configuration
-EXECUTE:
- - RUN:
- SOURCE: good_input
- TARGET: invalid_output
-...
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/builtin_1.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/builtin_1.yml.in b/src/bin/gpmapreduce/test/input/builtin_1.yml.in
deleted file mode 100644
index 41888a3..0000000
--- a/src/bin/gpmapreduce/test/input/builtin_1.yml.in
+++ /dev/null
@@ -1,156 +0,0 @@
-%YAML 1.1
-# ============================================================================
-# Test support of builtin functions
-# ============================================================================
-
-
-# ==========================================================================
-# Test 1a: Parameters specified in YAML
-#
-# Note:
-# - wordsplit_1 was not defined with named parameters
-# - wordsplit_2 was defined with a parameter named "value"
-# ==========================================================================
----
-VERSION: 1.0.0.2
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: smalldoc
- FILE: @hostname@:@abs_srcdir@/data/smalldoc.txt
- COLUMNS: [value text]
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/builtin_1.1
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/builtin_1.2
- MODE: replace
- DELIMITER: '|'
-
- - MAP:
- NAME: wordsplit_1
- PARAMETERS: [value text]
-
- - MAP:
- NAME: wordsplit_2
- PARAMETERS: [value text]
-
-
-EXECUTE:
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_1
- REDUCE: sum
- TARGET: out_1
-
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_2
- REDUCE: sum
- TARGET: out_2
-
-
-# ==========================================================================
-# Test 1b: Parameters specified in YAML, but different names than specified
-#
-# Note:
-# - wordsplit_1 was not defined with named parameters
-# - wordsplit_2 was defined with a parameter named "value"
-# ==========================================================================
----
-VERSION: 1.0.0.2
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: smalldoc
- FILE: @hostname@:@abs_srcdir@/data/smalldoc.txt
- COLUMNS: [line text]
-
- - OUTPUT:
- NAME: out_3
- FILE: @abs_srcdir@/output/builtin_1.3
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_4
- FILE: @abs_srcdir@/output/builtin_1.4
- MODE: replace
- DELIMITER: '|'
-
- - MAP:
- NAME: wordsplit_1
- PARAMETERS: [line text]
-
- - MAP:
- NAME: wordsplit_2
- PARAMETERS: [line text]
-
-
-EXECUTE:
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_1
- REDUCE: sum
- TARGET: out_3
-
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_2
- REDUCE: sum
- TARGET: out_4
-
-# ==========================================================================
-# Test 1c: Using default parameters
-#
-# Note:
-# - wordsplit_1 was not defined with named parameters
-# - wordsplit_2 was defined with a parameter named "value"
-# ==========================================================================
----
-VERSION: 1.0.0.2
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: smalldoc
- FILE: @hostname@:@abs_srcdir@/data/smalldoc.txt
- COLUMNS: [value text]
-
- - OUTPUT:
- NAME: out_5
- FILE: @abs_srcdir@/output/builtin_1.5
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_6
- FILE: @abs_srcdir@/output/builtin_1.6
- MODE: replace
- DELIMITER: '|'
-
- - MAP:
- NAME: wordsplit_1
-
- - MAP:
- NAME: wordsplit_2
-
-EXECUTE:
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_1
- REDUCE: sum
- TARGET: out_5
-
- - RUN:
- SOURCE: smalldoc
- MAP: wordsplit_2
- REDUCE: sum
- TARGET: out_6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/c_function.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/c_function.yml.in b/src/bin/gpmapreduce/test/input/c_function.yml.in
deleted file mode 100644
index 6eb1ef4..0000000
--- a/src/bin/gpmapreduce/test/input/c_function.yml.in
+++ /dev/null
@@ -1,182 +0,0 @@
-%YAML 1.1
----
-
-# ============================================================================
-# Support of "C" language functions was enabled in the 1.0.0.2 version of
-# the Greenplum YAML specification by adding the new "LIBRARY" attribute
-# to MAP/TRANSITION/CONSOLIDATE/FINALIZE specifications.
-# ============================================================================
-VERSION: 1.0.0.2
-USER: @db_user@
-
-DEFINE:
-
- # ==========================================================================
- # This is the input used by all our examples
- # ==========================================================================
- - INPUT:
- NAME: smalldoc
- FILE: @hostname@:@abs_srcdir@/data/smalldoc.txt
- COLUMNS: [line text]
-
- # ==========================================================================
- # For comparison purposes we also show how to do the same thing
- # 1) Using user defined C functions
- # 2) Using the builtin "SUM" reducer
- # 3) Using builtin functions to define a custom reducer
- # 4) Using a straight callout to SQL
- # ==========================================================================
-
- # ==========================================================================
- # 1) Using user defined C functions
- #
- # See the code in lib/gpmrdemo.c for details
- #
- # Note: There is an assumption that the gpmrdemo.so has already been installed
- # across the cluster in $GPHOME/lib/postgresql.
- # ==========================================================================
- - MAP:
- NAME: wordsplit
- PARAMETERS: [line text]
- RETURNS: [key text, value int4]
- LANGUAGE: C
- LIBRARY: $libdir/gpmrdemo.so
- FUNCTION: wordsplit
-
- - TRANSITION:
- NAME: int4_accum
- PARAMETERS: [state int8, value int4]
- RETURNS: [state int8]
- LANGUAGE: C
- LIBRARY: $libdir/gpmrdemo.so
- FUNCTION: int4_accum
-
- - CONSOLIDATE:
- NAME: int8_add
- PARAMETERS: [value1 int8, value2 int8]
- RETURNS: [value int8]
- LANGUAGE: C
- LIBRARY: $libdir/gpmrdemo.so
- FUNCTION: int8_add
-
- - REDUCE:
- NAME: c_sum
- TRANSITION: int4_accum
- CONSOLIDATE: int8_add
-
- # This task isn't strictly necessary, we could have simply defined
- # the REDUCE in the RUN block below.
- - TASK:
- NAME: sum_mr_task
- SOURCE: smalldoc
- MAP: wordsplit
- REDUCE: c_sum
-
-
- # ==========================================================================
- # 2) SUM as a builtin reducer
- #
- # Note: the use of the builtin SUM reducer
- # ==========================================================================
- - TASK:
- NAME: sum_builtin_task
- SOURCE: smalldoc
- MAP: wordsplit
- REDUCE: SUM
-
- # ==========================================================================
- # 3) Using builtin functions to define a custom reducer
- #
- # Note: the transition function and consolidate function are not defined
- # anywhere in the yaml, they are pre-existing functions within the database.
- # This only works because the functions "int4_sum" and "int8mi" are uniquely
- # defined in the database (in contrast to a function like "convert" which
- # has multiple overloaded arguments). It is also important to ensure that
- # the output type of the transition function is the same as the input types
- # of the consolidate function.
- #
- # in this case int4_sum(a int8, b int4) => c int8
- # 'a': is the input state, it must be as same as the return type 'c'
- # 'b': is the input value, the input we are passing into the function
- # (the value column) should be castable to this datatype.
- #
- # ... int8mi(x int8, y int8) => z int8
- # 'x' and 'y' are input states produced by the transition function.
- # 'z' is the return type of the function.
- # 'x', 'y', and 'z' must all be the same datatype as 'c'
- # ==========================================================================
- - REDUCE:
- NAME: sum_builtin2
- TRANSITION: int4_sum
- CONSOLIDATE: int8mi
-
- - TASK:
- NAME: sum_bulitin2_task
- SOURCE: smalldoc
- MAP: wordsplit
- REDUCE: sum_builtin2
-
- # ==========================================================================
- # 4) A direct callout to SQL
- #
- # Note: we can use either a TASK or an INPUT as a SOURCE
- # for execution.
- # ==========================================================================
- - INPUT:
- NAME: sum_sql
- QUERY: |
- SELECT key, sum(value)
- FROM (select regexp_split_to_table(line, ' ') as key, 1 as value from smalldoc) m
- WHERE key != ''
- GROUP BY key;
-
- # ==========================================================================
- # Define the outputs for all the examples above.
- # ==========================================================================
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/c_function.1
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/c_function.2
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_3
- FILE: @abs_srcdir@/output/c_function.3
- MODE: replace
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_4
- FILE: @abs_srcdir@/output/c_function.4
- MODE: replace
- DELIMITER: '|'
-
-
-
-# ============================================================================
-# The execute block is what is responsible for actually calling
-# each of the above examples and associating them with specific
-# outputs.
-# ============================================================================
-EXECUTE:
- - RUN:
- SOURCE: sum_mr_task
- TARGET: out_1
-
- - RUN:
- SOURCE: sum_builtin_task
- TARGET: out_2
-
- - RUN:
- SOURCE: sum_builtin_task
- TARGET: out_3
-
- - RUN:
- SOURCE: sum_sql
- TARGET: out_4
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/columns.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/columns.yml.in b/src/bin/gpmapreduce/test/input/columns.yml.in
deleted file mode 100644
index c52746c..0000000
--- a/src/bin/gpmapreduce/test/input/columns.yml.in
+++ /dev/null
@@ -1,76 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: weblog
- FILE:
- - @hostname@:@abs_srcdir@/data/access_log
- - @hostname@:@abs_srcdir@/data/access_log2
- COLUMNS:
- - ip text
- - rfc text
- - uname text
- - date text
- - timezone text
- - method text
- - url text
- - protocol text
- - status integer
- - bytes integer
- DELIMITER: ' '
-
- - MAP:
- NAME: myMap
- LANGUAGE: perl
- PARAMETERS:
- - key text
- - ip text
- - url text
- RETURNS:
- - ip text
- - url text
- - value integer
- FUNCTION: |
- my ($key, $ip, $url) = @_;
- return [] unless ($url =~ m/$key/);
- return [{"ip" => $ip, "url" => $url, "value" => 1}]
-
-# myReduce is exactly the same as SUM except for the grouping keys and
-# the non-null initialization.
- - REDUCE:
- NAME: myReduce
- INITIALIZE: '0'
- TRANSITION: int4_sum
- CONSOLIDATE: int8pl
- KEYS: [ip]
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/columns.1
- DELIMITER: '|'
- MODE: REPLACE
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/columns.1
- DELIMITER: '|'
- MODE: APPEND
-
-EXECUTE:
-
-# built in SUM groups by all columns not "value"
- - RUN:
- SOURCE: weblog
- MAP: myMap
- REDUCE: SUM
- TARGET: out_1
-
-# myReduce is defined with "keys" specified as just the "ip" column
- - RUN:
- SOURCE: weblog
- MAP: myMap
- REDUCE: myReduce
- TARGET: out_2
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/graph.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/graph.yml.in b/src/bin/gpmapreduce/test/input/graph.yml.in
deleted file mode 100644
index fc607bd..0000000
--- a/src/bin/gpmapreduce/test/input/graph.yml.in
+++ /dev/null
@@ -1,99 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
-
-# A sample of www link data
-# 137,941 distinct source pages
-# 325,729 distinct linked pages
-# 1,497,135 total links
- - INPUT:
- NAME: network
- FILE: @hostname@:@abs_srcdir@/data/www.small.dat
- COLUMNS:
- - key integer
- - value integer
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/graph.1
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/graph.2
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_3
- FILE: @abs_srcdir@/output/graph.3
- DELIMITER: '|'
-
-# One thing about the Greenplum MapReduce is that it allows you to
-# express calculations in whatever method is easiest. Here we use
-# SQL to transform the above input from key, value pairs to sparse
-# matrix representation.
-# - INPUT:
-# NAME: sparse_network
-# QUERY: |
-# SELECT key, 0.85 as pagerank, array_accum(value) as value
-# FROM network
-# GROUP BY key
-
-# Reverse()
-# Takes a (key, value) tuple and produces a new tuple of (value, key)
-# taking the input graph and producing the reverse graph
-# => It would be faster to have simply reversed the names in the input
- - MAP:
- NAME: reverse
- LANGUAGE: python
- PARAMETERS:
- - key integer
- - value integer
- RETURNS:
- - key integer
- - value integer
- MODE: SINGLE
- FUNCTION: |
- return {'key': value, 'value': key}
-
-# pcount()
-# Implements a count aggregator in python
-# => native sql count is about 12x faster
- - TRANSITION:
- NAME: pcount
- LANGUAGE: python
- PARAMETERS:
- - state integer
- - value integer
- RETURNS:
- - state integer
- FUNCTION: return state + 1
-
- - REDUCE:
- NAME: traverse
- INITIALIZE: 0
- TRANSITION: pcount
-
-
-EXECUTE:
-
-# This counts the number of links coming FROM a page
- - RUN:
- SOURCE: network
- REDUCE: traverse
- TARGET: out_1
-
-# This counts the number of links going TO a page.
- - RUN:
- SOURCE: network
- MAP: reverse
- REDUCE: traverse
- TARGET: out_2
-
- - RUN:
- SOURCE: network
- TARGET: out_3
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/grep.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/grep.yml.in b/src/bin/gpmapreduce/test/input/grep.yml.in
deleted file mode 100644
index d6fbbc7..0000000
--- a/src/bin/gpmapreduce/test/input/grep.yml.in
+++ /dev/null
@@ -1,37 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: inDirectory
- FILE:
- - @hostname@:@abs_srcdir@/data/access_log
- - @hostname@:@abs_srcdir@/data/access_log2
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/grep.1
- DELIMITER: '|'
-
- - MAP:
- NAME: grep_map
- FUNCTION: |
- return [{"key" => $_[0], "value" => $_[1]}] if ($_[1] =~/$_[0]/);
- return [];
- LANGUAGE: perl
- OPTIMIZE: STRICT IMMUTABLE
-
- - TASK:
- NAME: grep_task
- SOURCE: inDirectory
- MAP: grep_map
- REDUCE: IDENTITY
-
-EXECUTE:
- - RUN:
- SOURCE: grep_task
- TARGET: out_1
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/grep_raw.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/grep_raw.yml.in b/src/bin/gpmapreduce/test/input/grep_raw.yml.in
deleted file mode 100644
index f8b98cb..0000000
--- a/src/bin/gpmapreduce/test/input/grep_raw.yml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: raw_in
- FILE: @hostname@:@abs_srcdir@/data/raw.txt
-
- - MAP:
- NAME: grep_map
- FUNCTION: |
- return [{"key" => $_[0], "value" => $_[1]}] if ($_[1] =~/$_[0]/);
- return [];
- LANGUAGE: perl
- OPTIMIZE: STRICT IMMUTABLE
-
-EXECUTE:
- - RUN:
- SOURCE: raw_in
- MAP: grep_map
- TARGET: STDOUT
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/input.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/input.yml.in b/src/bin/gpmapreduce/test/input/input.yml.in
deleted file mode 100644
index 3d3cefd..0000000
--- a/src/bin/gpmapreduce/test/input/input.yml.in
+++ /dev/null
@@ -1,74 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-
-DEFINE:
- - INPUT:
- NAME: network1
- FILE: @hostname@:@abs_srcdir@/data/www.small.dat
- COLUMNS: [key text, value text]
- DELIMITER: '|'
-
- - INPUT:
- NAME: network2
- QUERY: SELECT key, value FROM network1 GROUP BY key, value
-
- - INPUT:
- NAME: network3
- TABLE: network2
-
- - INPUT:
- NAME: network4
- EXEC: 'cat @abs_srcdir@/data/www.small.dat'
- COLUMNS: [key text, value text]
- DELIMITER: '|'
-
- - Input:
- NAME: network5
- TABLE: input.qualified
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/input.1
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/input.2
- DELIMITER: '|'
-
-
- - OUTPUT:
- NAME: out_3
- FILE: @abs_srcdir@/output/input.3
- DELIMITER: ':'
-
- - OUTPUT:
- NAME: out_4
- FILE: @abs_srcdir@/output/input.4
- DELIMITER: ','
-
- - OUTPUT:
- NAME: out_5
- TABLE: output.qualified
-
-EXECUTE:
-
- - RUN:
- SOURCE: network1
- TARGET: out_1
-
- - RUN:
- SOURCE: network2
- TARGET: out_2
-
- - RUN:
- SOURCE: network3
- TARGET: out_3
-
- - RUN:
- SOURCE: network4
- TARGET: out_4
-
- - RUN:
- SOURCE: network5
- TARGET: out_5
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/join.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/join.yml.in b/src/bin/gpmapreduce/test/input/join.yml.in
deleted file mode 100644
index 2354443..0000000
--- a/src/bin/gpmapreduce/test/input/join.yml.in
+++ /dev/null
@@ -1,94 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-DATABASE: @db_user@
-USER: @db_user@
-
-DEFINE:
-
-# 3) SQL join
-# (Define this first to prove non-yaml dependency checks work)
- - INPUT:
- NAME: reverse_access_count
- QUERY: |
- SELECT a.key as akey, b.key as bkey, sum(b.value::integer) as value
- FROM reverse_network a, access_count b
- WHERE a.key != b.key
- GROUP BY a.key, b.key
-
-# 1) Access log
- - INPUT:
- NAME: weblog
- FILE: @hostname@:@abs_srcdir@/data/access_log
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/join.1
- DELIMITER: '|'
-
- - MAP:
- NAME: myMap
- LANGUAGE: perl
- PARAMETERS:
- - value text
- RETURNS:
- - key text
- - value integer
- FUNCTION: |
- return [] unless ($_[0] =~ m/GET ([^ ?,]+)/);
- return [{"key" => $1, "value" => 1}]
-
- - TRANSITION:
- NAME: myTransition
- LANGUAGE: perl
- PARAMETERS:
- - state integer
- - value integer
- RETURNS:
- - value integer
- FUNCTION: return $_[0] + $_[1];
-
- - REDUCE:
- NAME: myReduce
- INITIALIZE: '0'
- TRANSITION: myTransition
- CONSOLIDATE: myTransition
-
- - TASK:
- NAME: access_count
- SOURCE: weblog
- MAP: myMap
- REDUCE: myReduce
-
-# 2) Network graph
- - INPUT:
- NAME: network
- FILE: @hostname@:@abs_srcdir@/data/www.small.dat
- COLUMNS:
- - key integer
- - value integer
- DELIMITER: '|'
-
- - MAP:
- NAME: reverse
- LANGUAGE: python
- PARAMETERS:
- - key integer
- - value integer
- RETURNS:
- - key integer
- - value integer
- FUNCTION: |
- return {'key': value, 'value': key}
- MODE: SINGLE
-
- - TASK:
- NAME: reverse_network
- SOURCE: network
- MAP: reverse
-
-EXECUTE:
- - RUN:
- SOURCE: reverse_access_count
- TARGET: out_1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/member_kw.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/member_kw.yml.in b/src/bin/gpmapreduce/test/input/member_kw.yml.in
deleted file mode 100755
index 9607de9..0000000
--- a/src/bin/gpmapreduce/test/input/member_kw.yml.in
+++ /dev/null
@@ -1,195 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-DATABASE: @db_user@
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: mp
- TABLE: member_summary
-
- - INPUT:
- NAME: kw
- TABLE: keywords
-
- # Original example used nltk to do word stemming and tokenization
- # this version for testing simply does a brain dead space delimited
- # tokenization and does no stemming.
- #
- # The reason for this simplification is that we do not have nltk
- # installed on all machines.
- - MAP:
- NAME: mp_map
- LANGUAGE: python
- FUNCTION: |
- i = 0
- terms = {}
- for token in text.lower().split():
- i = i + 1
- stem = token
- if stem in terms:
- terms[stem].append(i)
- else:
- terms[stem] = [i]
- for term in terms:
- yield([member_id, term, terms[term]])
- OPTIMIZE: STRICT IMMUTABLE
- MODE: MULTI
- PARAMETERS:
- - member_id integer
- - text text
- RETURNS:
- - member_id integer
- - term text
- - positions text
-
- - MAP:
- NAME: kw_map
- LANGUAGE: python
- FUNCTION: |
- i = 0
- terms = {}
- for token in keyword.lower().split():
- i = i + 1
- stem = token
- if stem in terms:
- terms[stem].append(i)
- else:
- terms[stem] = [i]
- for term in terms:
- yield([keyword_id, i, term, terms[term]])
- OPTIMIZE: STRICT IMMUTABLE
- MODE: MULTI
- PARAMETERS:
- - keyword_id integer
- - keyword text
- RETURNS:
- - keyword_id integer
- - nterms integer
- - term text
- - positions text
-
- - TASK:
- NAME: mp_prep
- SOURCE: mp
- MAP: mp_map
-
- - TASK:
- NAME: kw_prep
- SOURCE: kw
- MAP: kw_map
-
- - INPUT:
- NAME: term_join
- QUERY: |
- SELECT mp.member_id, kw.keyword_id, mp.term, kw.nterms,
- mp.positions as mp_positions,
- kw.positions as kw_positions
- FROM mp_prep mp INNER JOIN kw_prep kw ON (mp.term = kw.term)
-
- - REDUCE:
- NAME: term_reducer
- TRANSITION: term_transition
- FINALIZE: term_finalizer
-
- - TRANSITION:
- NAME: term_transition
- LANGUAGE: python
- PARAMETERS:
- - state text
- - term text
- - nterms integer
- - mp_positions text
- - kw_positions text
- FUNCTION: |
- # STATE initialized to: ''
- # STATE is: 'member positions:member positions: ...'
- # STATE can be '1,3,2:4:'
- if state:
- kw_split = state.split(':')
- else:
- kw_split = []
- for i in range(0,nterms):
- kw_split.append('')
-
- # add mp_positions for the current term
- for kw_p in kw_positions[1:-1].split(','):
- kw_split[int(kw_p)-1] = mp_positions
-
- # reconstruct the delimited state
- outstate = kw_split[0]
- for s in kw_split[1:]:
- outstate = outstate + ':' + s
- return outstate
-
- - FINALIZE:
- NAME: term_finalizer
- LANGUAGE: python
- MODE: SINGLE
- RETURNS:
- - count integer
- FUNCTION: |
- if not state:
- return 0
- kw_split = state.split(':')
-
- # We adjust each member position list based on
- # the offset of the term in the keyword and then
- # intesect all resulting lists
- previous = None
- for i in range(0,len(kw_split)):
- isplit = kw_split[i][1:-1].split(',')
- if any(map(lambda(x): x == '', isplit)):
- return 0
- adjusted = set(map(lambda(x): int(x)-i, isplit))
- if (previous):
- previous = adjusted.intersection(previous)
- else:
- previous = adjusted
-
- # return the final count
- if previous:
- return len(previous)
- return 0
-
- - TASK:
- NAME: term_match
- SOURCE: term_join
- REDUCE: term_reducer
-
- - INPUT:
- NAME: final_join
- QUERY: |
- SELECT ms.*, kw.*, tm.count
- FROM member_summary ms, keywords kw, term_match tm
- WHERE ms.member_id = tm.member_id
- AND kw.keyword_id = tm.keyword_id
- AND tm.count > 0
-
- - OUTPUT:
- NAME: final_output_replace
- TABLE: member_keywords_1
- MODE: REPLACE
-
- - OUTPUT:
- NAME: final_output_append
- TABLE: member_keywords_2
- MODE: APPEND
-
-EXECUTE:
- - RUN:
- SOURCE: final_join
- TARGET: final_output_replace
-
- - RUN:
- SOURCE: final_join
- TARGET: final_output_replace
-
- - RUN:
- SOURCE: final_join
- TARGET: final_output_append
-
- - RUN:
- SOURCE: final_join
- TARGET: final_output_append
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/mode.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/mode.yml.in b/src/bin/gpmapreduce/test/input/mode.yml.in
deleted file mode 100644
index a4fae81..0000000
--- a/src/bin/gpmapreduce/test/input/mode.yml.in
+++ /dev/null
@@ -1,72 +0,0 @@
---- # in perl
-VERSION: 1.0.0.1
-
-DEFINE:
- - INPUT:
- NAME: key_value_input
- QUERY: |
- SELECT 'k1' as key, 'v1' as value
- UNION ALL
- SELECT 'k2' as key, 'v2' as value;
-
- - MAP:
- NAME: perl_single
- MODE: SINGLE
- PARAMETERS: [key text, value text]
- RETURNS: [key text, value text]
- LANGUAGE: perl
- FUNCTION: |
- my ($key, $value) = @_;
- return {'key' => $key, 'value' => $value}
-
- - MAP:
- NAME: perl_multi
- MODE: MULTI
- PARAMETERS: [key text, value text]
- RETURNS: [key text, value text]
- LANGUAGE: perl
- FUNCTION: |
- my ($key, $value) = @_;
- for my $i (0..10) {
- return_next {'key' => $key, 'value' => $value}
- }
- return undef
-
- - MAP:
- NAME: python_single
- MODE: SINGLE
- PARAMETERS: [key text, value text]
- RETURNS: [key text, value text]
- LANGUAGE: python
- FUNCTION: |
- return {'key': key, 'value': value}
-
- - MAP:
- NAME: python_multi
- MODE: MULTI
- PARAMETERS: [key text, value text]
- RETURNS: [key text, value text]
- LANGUAGE: python
- FUNCTION: |
- try:
- for i in range(0,10):
- yield {'key': key, 'value': value}
- except Exception, e:
- plpy.warning(str(e))
-
-EXECUTE:
- - RUN:
- SOURCE: key_value_input
- MAP: perl_single
-
- - RUN:
- SOURCE: key_value_input
- MAP: perl_multi
-
- - RUN:
- SOURCE: key_value_input
- MAP: python_single
-
- - RUN:
- SOURCE: key_value_input
- MAP: python_multi
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/network.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/network.yml.in b/src/bin/gpmapreduce/test/input/network.yml.in
deleted file mode 100644
index bd17f48..0000000
--- a/src/bin/gpmapreduce/test/input/network.yml.in
+++ /dev/null
@@ -1,42 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-DATABASE: @db_user@
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: email
- FILE:
- - @hostname@:@abs_srcdir@/data/email/mailfiles
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/network.1
- DELIMITER: '|'
-
- - MAP:
- NAME: msg_from_to
- FUNCTION: |
- from email import message_from_file
- msg = message_from_file(open(value))
- if 'From' in msg and 'To' in msg:
- me = msg['From'].strip()
- yall = msg['To'].split(',')
- for you in yall:
- yield([me, you.strip(), 1])
- LANGUAGE: python
- OPTIMIZE: STRICT IMMUTABLE
- MODE: MULTI
- PARAMETERS: value text
- RETURNS:
- - source text
- - target text
- - value integer
-
-EXECUTE:
- - RUN:
- SOURCE: email
- MAP: msg_from_to
- REDUCE: SUM
- TARGET: out_1
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/ordering.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/ordering.yml.in b/src/bin/gpmapreduce/test/input/ordering.yml.in
deleted file mode 100644
index b5a6609..0000000
--- a/src/bin/gpmapreduce/test/input/ordering.yml.in
+++ /dev/null
@@ -1,60 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.3
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: weblog
- FILE:
- - @hostname@:@abs_srcdir@/data/access_log
- - @hostname@:@abs_srcdir@/data/access_log2
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/ordering.1
- DELIMITER: '|'
-
- - OUTPUT:
- NAME: out_2
- FILE: @abs_srcdir@/output/ordering.2
- DELIMITER: '|'
-
- - MAP:
- NAME: myMap
- LANGUAGE: perl
- FUNCTION: |
- return [] unless ($_[1] =~ m/([0123456789.]+).*GET ([^ ?,]+)/);
- return [{"key" => $1, "value" => $2}]
-
- - TRANSITION:
- NAME: myTransition
- LANGUAGE: perl
- FUNCTION: |
- return $_[1] unless $_[0];
- return "$_[0], $_[1]";
-
- - REDUCE:
- NAME: myReduce
- INITIALIZE: ''
- TRANSITION: myTransition
- ORDERING: value
-
- - REDUCE:
- NAME: myReduce_2
- INITIALIZE: ''
- TRANSITION: myTransition
- ORDERING: value desc
-
-EXECUTE:
- - RUN:
- SOURCE: weblog
- MAP: myMap
- REDUCE: myReduce
- TARGET: out_1
-
- - RUN:
- SOURCE: weblog
- MAP: myMap
- REDUCE: myReduce_2
- TARGET: out_2
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/oreilly.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/oreilly.yml.in b/src/bin/gpmapreduce/test/input/oreilly.yml.in
deleted file mode 100644
index 691feb2..0000000
--- a/src/bin/gpmapreduce/test/input/oreilly.yml.in
+++ /dev/null
@@ -1,229 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-
-EXECUTE:
- - RUN:
- SOURCE: ProductionJobs
- MAP: extractFields
- REDUCE: Uniquify
- TARGET: out_1
-
-DEFINE:
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/oreilly.1
- DELIMITER: '|'
-
- - REDUCE:
- NAME: Uniquify
- TRANSITION: Uniquify_transition
- FINALIZE: Uniquify_finalize
-
- - TRANSITION:
- NAME: Uniquify_transition
- PARAMETERS:
- - state text
- - id integer
- - date_posted date
- - date_posted_timestamp integer
- - first_extract_date timestamp
- - last_extract_date timestamp
- - inter_source_hash text
- - gap integer
- LANGUAGE: python
-
- # The transition function just accumulates a list of delimited records
- FUNCTION: |
- import pickle;
-
- # newrecord = (id, date_posted, date_posted_timestamp, refind_key,
- newrecord = (id, date_posted, date_posted_timestamp,
- first_extract_date, last_extract_date, inter_source_hash)
-
- # Add the new record to the existing state, if any
- if state == None:
- oldlist = [gap, newrecord]
- else:
- oldlist = pickle.loads(state)
- oldlist.append(newrecord)
-
- # repickle and return
- return pickle.dumps(oldlist)
-
- - FINALIZE:
- NAME: Uniquify_finalize
- RETURNS:
- - id integer
- - date_posted date
- - date_posted_timestamp integer
- - first_extract_date timestamp
- - last_extract_date timestamp
- - inter_source_hash text
- - refind_index integer
-
- MODE: MULTI
- LANGUAGE: python
- FUNCTION: |
- import pickle
- import datetime
-
- def date_conv(date, fmt):
- return datetime.datetime.strptime(date, fmt)
- date_fmt = '%Y-%m-%d'
-
- # Get the accumulated data for this refind_key
- mydata = pickle.loads(state)
-
- # The first value in the list is the 'gap' as it was
- # passed to the transition function.
- gap = mydata.pop(0)
- if gap == None:
- gap = 60
-
- # derive the sequence
- mydata.sort(key=lambda(x): x[1]) # date_posted is 2nd element in each tuple
-
- # Determine which ones are 'new' values
- seq = 0
- prev = mydata[0]
- prev_date = date_conv(prev[1], date_fmt)
- for t in mydata[1:]:
- date_gap = date_conv(t[1], date_fmt) - prev_date;
- if (date_gap.days > gap):
- yield(prev[0], prev[1], prev[2], prev[3], prev[4], prev[5], seq)
- seq += 1
- prev = t
- yield(prev[0], prev[1], prev[2], prev[3], prev[4], prev[5], seq)
-
-
- - MAP:
- NAME: extractFields
- PARAMETERS:
- - id integer
- - date_posted date
- - date_posted_timestamp integer
- - refind_key text
- - first_extract_date timestamp
- - last_extract_date timestamp
- - inter_source_hash text
- RETURNS:
- - id integer
- - date_posted date
- - date_posted_timestamp integer
- - refind_key text
- - first_extract_date timestamp
- - last_extract_date timestamp
- - inter_source_hash text
- MODE: MULTI
- LANGUAGE: python
- FUNCTION: |
- if date_posted:
- yield [id, date_posted, date_posted_timestamp, refind_key,
- first_extract_date, last_extract_date, inter_source_hash]
-
- - INPUT:
- NAME: ProductionJobs
- FILE: @hostname@:@abs_srcdir@/data/ProductionJobs.txt
- FORMAT: CSV
- ERROR_LIMIT: 1000
- NULL: '\\N'
- QUOTE: '"'
- COLUMNS:
- - id integer
- - dummy1 text
- - dummy2 text
- - dummy3 text
- - dummy4 text
- - dummy5 text
- - dummy6 text
- - dummy7 text
- - dummy8 text
- - dummy9 text
- - dummy10 text
- - dummy11 text
- - dummy12 text
- - dummy13 text
- - dummy14 text
- - dummy15 text
- - dummy16 text
- - dummy17 text
- - dummy18 text
- - dummy19 text
- - dummy20 text
- - dummy21 text
- - dummy22 text
- - dummy23 text
- - dummy24 text
- - dummy25 text
- - dummy26 text
- - dummy27 text
- - dummy28 text
- - dummy29 text
- - dummy30 text
- - dummy31 text
- - dummy32 text
- - dummy33 text
- - dummy34 text
- - dummy35 text
- - dummy36 text
- - dummy37 text
- - dummy38 text
- - date_posted date
- - dummy40 text
- - date_posted_timestamp integer
- - dummy42 text
- - dummy43 text
- - dummy44 text
- - dummy45 text
- - dummy46 text
- - dummy47 text
- - dummy48 text
- - dummy49 text
- - dummy50 text
- - dummy51 text
- - dummy52 text
- - dummy53 text
- - dummy54 text
- - dummy55 text
- - dummy56 text
- - dummy57 text
- - dummy58 text
- - dummy59 text
- - dummy60 text
- - dummy61 text
- - dummy62 text
- - dummy63 text
- - dummy64 text
- - dummy65 text
- - dummy66 text
- - dummy67 text
- - dummy68 text
- - dummy69 text
- - dummy70 text
- - dummy71 text
- - dummy72 text
- - dummy73 text
- - refind_key text
- - first_extract_date timestamp
- - last_extract_date timestamp
- - dummy77 text
- - dummy78 text
- - dummy79 text
- - dummy80 text
- - dummy81 text
- - dummy82 text
- - dummy83 text
- - dummy84 text
- - dummy85 text
- - dummy86 text
- - dummy87 text
- - dummy88 text
- - dummy89 text
- - dummy90 text
- - dummy91 text
- - inter_source_hash text
- - dummy93 text
-
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/overwrite_retval_multiple.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/overwrite_retval_multiple.yml.in b/src/bin/gpmapreduce/test/input/overwrite_retval_multiple.yml.in
deleted file mode 100644
index acc692a..0000000
--- a/src/bin/gpmapreduce/test/input/overwrite_retval_multiple.yml.in
+++ /dev/null
@@ -1,47 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: data
- QUERY: "select * from simple"
- COLUMNS: [m int, n int]
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/overwrite_retval_multiple.1
- DELIMITER: '|'
-
- - FINALIZE:
- NAME: retcomposite
- PARAMETERS:
- - state int
- RETURNS:
- - f1_ integer
- - f2_ integer
- - f3_ integer
-
- - TRANSITION:
- NAME: tran
- PARAMETERS:
- - m int
- - n int
- RETURNS:
- - state int
-
- - REDUCE:
- NAME: reducer
- TRANSITION: tran
- FINALIZE: retcomposite
-
- - TASK:
- NAME: worker
- SOURCE: data
- REDUCE: reducer
-
-EXECUTE:
- - RUN:
- SOURCE: worker
- TARGET: out_1
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/overwrite_retval_single.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/overwrite_retval_single.yml.in b/src/bin/gpmapreduce/test/input/overwrite_retval_single.yml.in
deleted file mode 100644
index fb506fd..0000000
--- a/src/bin/gpmapreduce/test/input/overwrite_retval_single.yml.in
+++ /dev/null
@@ -1,47 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: data
- QUERY: "select * from simple"
- COLUMNS: [m int, n int]
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/overwrite_retval_single.1
- DELIMITER: '|'
-
- - FINALIZE:
- NAME: final
- FUNCTION: final
- PARAMETERS:
- - arg1 int
- RETURNS:
- - retval int
-
- - TRANSITION:
- NAME: tran
- PARAMETERS:
- - m int
- - n int
- RETURNS:
- - state int
-
-
- - REDUCE:
- NAME: reducer
- TRANSITION: tran
- FINALIZE: final
-
- - TASK:
- NAME: worker
- SOURCE: data
- REDUCE: reducer
-
-EXECUTE:
- - RUN:
- SOURCE: worker
- TARGET: out_1
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/overwrite_retval_table.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/overwrite_retval_table.yml.in b/src/bin/gpmapreduce/test/input/overwrite_retval_table.yml.in
deleted file mode 100644
index f06f711..0000000
--- a/src/bin/gpmapreduce/test/input/overwrite_retval_table.yml.in
+++ /dev/null
@@ -1,47 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: data
- QUERY: "select * from simple"
- COLUMNS: [m int, n int]
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/overwrite_retval_table.1
- DELIMITER: '|'
-
- - FINALIZE:
- NAME: retcomposite
- PARAMETERS:
- - state int
- RETURNS:
- - f1_ integer
- - f2_ integer
- - f3_ integer
-
- - TRANSITION:
- NAME: tran
- PARAMETERS:
- - m int
- - n int
- RETURNS:
- - state int
-
- - REDUCE:
- NAME: reducer
- TRANSITION: tran
- FINALIZE: retcomposite
-
- - TASK:
- NAME: worker
- SOURCE: data
- REDUCE: reducer
-
-EXECUTE:
- - RUN:
- SOURCE: worker
- TARGET: out_1
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/pagerank.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/pagerank.yml.in b/src/bin/gpmapreduce/test/input/pagerank.yml.in
deleted file mode 100644
index 695e06f..0000000
--- a/src/bin/gpmapreduce/test/input/pagerank.yml.in
+++ /dev/null
@@ -1,41 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: network
- FILE: @hostname@:@abs_srcdir@/data/www.small.dat
- COLUMNS: [key text, value text]
- DELIMITER: '|'
-
- - INPUT:
- NAME: network_graph
- QUERY: |
- SELECT key, value, count(*) over (partition by key) as outfactor
- FROM network
-
- - INPUT:
- NAME: network_rank
- QUERY: |
- SELECT key, .85 as value
- FROM network
- GROUP BY key
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/pagerank.1
- DELIMITER: '|'
-
- - TRANSITION:
- NAME: pagerank_transition
- PARAMETERS: [state float, rank float, outfactor integer]
- LANGUAGE: plperl
- FUNCTION: |
- return .85*$rank/$outfactor
-
-EXECUTE:
- - RUN:
- SOURCE: network_rank
- TARGET: out_1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/sort.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/sort.yml.in b/src/bin/gpmapreduce/test/input/sort.yml.in
deleted file mode 100644
index a5c49a2..0000000
--- a/src/bin/gpmapreduce/test/input/sort.yml.in
+++ /dev/null
@@ -1,19 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-
-DEFINE:
- - INPUT:
- NAME: access_log
- FILE:
- - @hostname@:@abs_srcdir@/data/access_log
- - @hostname@:@abs_srcdir@/data/access_log2
-
- - OUTPUT:
- NAME: sort_out
- FILE: @abs_srcdir@/output/sort.1
-
-EXECUTE:
- - RUN:
- SOURCE: access_log
- TARGET: sort_out
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/unload.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/unload.yml.in b/src/bin/gpmapreduce/test/input/unload.yml.in
deleted file mode 100644
index 9260b25..0000000
--- a/src/bin/gpmapreduce/test/input/unload.yml.in
+++ /dev/null
@@ -1,45 +0,0 @@
----
-VERSION: 1.0.0.1
-DEFINE:
- - INPUT:
- NAME: unload_input
- QUERY: |
- SELECT gp_execution_segment() as id, textin(record_out(row)) as tuple
- FROM unload_test row;
-
- - MAP:
- NAME: unload_map
- LANGUAGE: perlu
- PARAMETERS:
- - id integer
- - tuple text
- RETURNS:
- - value integer
- MODE: single
- FUNCTION: |
- # It is slightly faster to refer to parameters directly
- # my ($id, $tuple) = @_
-
- # strip parens from tuple, add newline
- $tuple = substr($_[1], 1, -1) . "\n";
-
- # Get the filehandle
- my $file = $_SHARED{'file'};
- if (not defined $file) {
- my $filename = "@abs_srcdir@/output/unload_$_[0]";
- open($file, ">$filename") or die "Unable to open file: $filename ($!)";
- $_SHARED{'file'} = $file;
- }
-
- # Write the tuple out to the file
- print $file $tuple;
-
- # print happens to return 1 on success.
- # we can eek out a little extra performance using that is the return value
- # return 1;
-
-EXECUTE:
- - RUN:
- SOURCE: unload_input
- MAP: unload_map
- REDUCE: COUNT
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/wordcount.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/wordcount.yml.in b/src/bin/gpmapreduce/test/input/wordcount.yml.in
deleted file mode 100644
index dc64a3a..0000000
--- a/src/bin/gpmapreduce/test/input/wordcount.yml.in
+++ /dev/null
@@ -1,99 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-DATABASE: @db_user@
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: email
- FILE:
- - @hostname@:@abs_srcdir@/data/email/mailfiles
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/wordcount.1
- DELIMITER: '|'
-
- # The original example for this used nltk for word stemming and
- # tokenization, but it was simplified for testing to simply due
- # whitespace tokenization and no stemming.
- #
- # The reason for the simplification is that not all test machines
- # have nltk installed.
- - MAP:
- NAME: wordsplit_python
- FUNCTION: |
- from email import message_from_file
- text = message_from_file(open(value)).get_payload()
-
- # Old nltk version
- #import nltk
- #tokenizer = nltk.WordTokenizer()
- #stemmer = nltk.PorterStemmer()
- #stopwords = nltk.corpus.stopwords.words()
- #tokens = tokenizer.tokenize(text)
- #stems = map(lambda(x): stemmer.stem(x.lower()), tokens)
-
-
- # New version
- # Results are obviously not the same as the above since the stemming
- # in particular is pretty dumb, but it gets the right general idea
- # across
- from string import maketrans
- stopwords = ["at", "aft", "ar", "is", "be", "it",
- "to", "for", "from", "few", "ha", "last", "lik",
- "now", "no", "not", "on", "ov", "se", "up", "who",
- "which", "within", "what", "if", "into", "out",
- "som", "soon", "they", "that", "you", "with", "of", "will",
- "her", "him",
- "or", "in", "thes", "ther", "all", "any", "th", "away",
- "could", "each", "end", "should", "help", "next"]
- trailing = ["ation", "ments",
- "ated", "able", "ment",
- "ent", "ers", "ing", "ful", "ion",
- "al", "ed", "er", "es", "ly",
- "e", "s"]
- tokens = ''.join(map(lambda(x): x.isalnum() and x or ' ', text.lower())).split()
- def remove_trailing(x):
- for ending in trailing:
- if x.endswith(ending):
- return x[0:-len(ending)]
- return x
- stems = map(remove_trailing, tokens)
- stems = filter(lambda(x): len(x) > 1, stems)
-
-
- # filter stopwords and return results
- words = filter(lambda(x): x not in stopwords and not x.isdigit(), stems)
-
- return map(lambda(x): [x, 1], words)
- LANGUAGE: python
- OPTIMIZE: STRICT IMMUTABLE
- PARAMETERS: value text
- RETURNS:
- - key text
- - value integer
-
- - MAP:
- NAME: reverse_python
- FUNCTION: |
- return (value, key)
- LANGUAGE: python
- OPTIMIZE: STRICT IMMUTABLE
- MODE: SINGLE
- RETURNS:
- - key integer
- - value text
-
- - TASK:
- NAME: email_histogram
- SOURCE: email
- MAP: wordsplit_python
- REDUCE: SUM
-
-EXECUTE:
- - RUN:
- SOURCE: email_histogram
- MAP: reverse_python
- TARGET: out_1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/input/xml.yml.in
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/input/xml.yml.in b/src/bin/gpmapreduce/test/input/xml.yml.in
deleted file mode 100644
index 7d6edde..0000000
--- a/src/bin/gpmapreduce/test/input/xml.yml.in
+++ /dev/null
@@ -1,47 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-DATABASE: @db_user@
-USER: @db_user@
-
-DEFINE:
- - INPUT:
- NAME: xml
- FILE:
- - @hostname@:@abs_srcdir@/data/xml.dat
- DELIMITER: '|'
- COLUMNS:
- - key integer
- - value text
-
- - OUTPUT:
- NAME: out_1
- FILE: @abs_srcdir@/output/xml.1
- DELIMITER: '|'
-
- - MAP:
- NAME: xml_extract
- FUNCTION: |
- import re
- myre = re.compile('([0-9]*)</item>')
- vlist = filter(lambda(x): myre.search(x), value.split('<item>'))
- rval = []
- for pos in range(0,len(vlist)):
- rval.append([key, myre.search(vlist[pos]).group(1), pos])
- return rval
- LANGUAGE: python
- OPTIMIZE: STRICT IMMUTABLE
- MODE: MULTI
- PARAMETERS:
- - key integer
- - value text
- RETURNS:
- - key integer
- - value integer
- - pos integer
-
-EXECUTE:
- - RUN:
- SOURCE: xml
- MAP: xml_extract
- TARGET: out_1
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/lib/Makefile
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/lib/Makefile b/src/bin/gpmapreduce/test/lib/Makefile
deleted file mode 100644
index 9cee8d2..0000000
--- a/src/bin/gpmapreduce/test/lib/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES = gpmrdemo
-
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/lib/gpmrdemo.c
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/lib/gpmrdemo.c b/src/bin/gpmapreduce/test/lib/gpmrdemo.c
deleted file mode 100644
index dd2738a..0000000
--- a/src/bin/gpmapreduce/test/lib/gpmrdemo.c
+++ /dev/null
@@ -1,348 +0,0 @@
-#include "postgres.h"
-#include "fmgr.h"
-#include "funcapi.h"
-#include "catalog/pg_type.h" /* oids of known types */
-#include "utils/builtins.h" /* Builtin functions, including lower() */
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <strings.h>
-
-/* Do the module magic dance */
-#ifdef PG_MODULE_MAGIC
-PG_MODULE_MAGIC;
-#endif
-PG_FUNCTION_INFO_V1(wordsplit);
-PG_FUNCTION_INFO_V1(int4_accum);
-PG_FUNCTION_INFO_V1(int8_add);
-PG_FUNCTION_INFO_V1(tran);
-PG_FUNCTION_INFO_V1(final);
-PG_FUNCTION_INFO_V1(retcomposite);
-
-
-/* Declare the functions as module exports */
-
-int tran(PG_FUNCTION_ARGS);
-int final(PG_FUNCTION_ARGS);
-Datum retcomposite(PG_FUNCTION_ARGS);
-Datum wordsplit(PG_FUNCTION_ARGS);
-Datum int4_accum(PG_FUNCTION_ARGS);
-Datum int8_add(PG_FUNCTION_ARGS);
-
-const char* wordsplit_str = "gpmrdemo:wordsplit()";
-const char* accum_str = "gpmrdemo:int4_accum()";
-const char* add_str = "gpmrdemo:int8_add()";
-
-
-/*
- * In this case the state structure is so simple that this is unnecessary,
- * but frequently a more complex user state is needed, this demonstrates
- * how that may be accomplished.
- */
-typedef struct {
- char *string;
-} wordsplit_state;
-
-/* Define the functions */
-Datum
-wordsplit(PG_FUNCTION_ARGS)
-{
- FuncCallContext *funcctx;
- TupleDesc tupdesc;
- wordsplit_state *myState;
- char *word;
- HeapTuple res;
- Datum result;
- Datum values[2];
- bool nulls[2] = {false, false};
-
- /* stuff done only on the first call of the function */
- if (SRF_IS_FIRSTCALL())
- {
- MemoryContext oldcontext;
-
- /* create a function context for cross-call persistence */
- funcctx = SRF_FIRSTCALL_INIT();
-
- /* switch to memory context appropriate for multiple function calls */
- oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
- /* Build a tuple descriptor for our result type */
- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("function returning record called in context "
- "that cannot accept type record")));
-
- /* Bless the tuple descriptor for later use */
- funcctx->tuple_desc = BlessTupleDesc(tupdesc);
-
- /* Allocate a cross call user state */
- funcctx->user_fctx = palloc0(sizeof(wordsplit_state));
- myState = (wordsplit_state*) funcctx->user_fctx;
-
- /* Extract needed information from input parameters */
- if (PG_ARGISNULL(0))
- {
- myState->string = NULL;
- }
- else
- {
- /* Fetch the input parameter */
- Datum d = PG_GETARG_DATUM(0);
-
- /* Use a builtin function definition to convert it to lower case */
- d = DirectFunctionCall1(lower, d);
-
- /* Convert the datum into a cstring for our use */
- myState->string = TextDatumGetCString(d);
- }
-
- /* Restore the per-call memory context */
- MemoryContextSwitchTo(oldcontext);
- }
-
- /* stuff done on every call of the function */
- funcctx = SRF_PERCALL_SETUP();
- myState = (wordsplit_state*) funcctx->user_fctx;
-
-
- /* Handle the case of null input */
- if (!myState->string)
- {
- SRF_RETURN_DONE(funcctx);
- }
-
- /*
- * Begin messy string handling in "C", this would be so much nicer
- * in any more advanced language...
- */
-
- /* Scan forward to the next word character */
- while (myState->string[0] < 'a' || myState->string[0] > 'z')
- {
- /* If we hit the end then we are done */
- if (myState->string[0] == '\0')
- SRF_RETURN_DONE(funcctx);
- myState->string++;
- }
-
- /* Find the next word */
- word = myState->string;
-
- /* Scan forward until the end of the word */
- while (myState->string[0] >= 'a' && myState->string[0] <= 'z')
- myState->string++;
-
- /*
- * If we terminated on whitespace then advance to the next non-whitespace
- * character.
- */
- if (myState->string[0] != '\0')
- {
- myState->string[0] = '\0';
- myState->string++;
- }
-
- /*
- * We have our word, and now need to construct the return tuple of:
- * (word, 1)
- */
- values[0] = CStringGetTextDatum(word);
- values[1] = Int32GetDatum(1);
-
- /* Construct the Tuple */
- res = heap_form_tuple(funcctx->tuple_desc, values, nulls);
-
- /* Convert the Tuple into a Datum */
- result = HeapTupleGetDatum(res);
-
- /* Return the tuple */
- SRF_RETURN_NEXT(funcctx, result);
-}
-
-Datum
-int4_accum(PG_FUNCTION_ARGS)
-{
- int64 state;
- int32 value;
-
- /*
- * GUARD against an incorrectly defined SQL function by verifying
- * that the parameters are the types we are expecting:
- * int4_accum(int64, int32) => int64
- */
- if (PG_NARGS() != 2)
- {
- elog(ERROR, "%s defined with %d arguments, expected 2",
- accum_str, PG_NARGS() );
- }
- if (get_fn_expr_argtype(fcinfo->flinfo, 0) != INT8OID ||
- get_fn_expr_argtype(fcinfo->flinfo, 1) != INT4OID)
- {
- elog(ERROR, "%s defined with invalid types, expected (int8, int4)",
- accum_str );
- }
- if (get_fn_expr_rettype(fcinfo->flinfo) != INT8OID)
- {
- elog(ERROR, "%s defined with invalid return type, expected int8",
- accum_str );
- }
-
- /*
- * GUARD against NULL input:
- * - IF both are null return NULL
- * - otherwise treat NULL as a zero value
- */
- if (PG_ARGISNULL(0) && PG_ARGISNULL(1))
- PG_RETURN_NULL();
- state = PG_ARGISNULL(0) ? 0 : PG_GETARG_INT64(0);
- value = PG_ARGISNULL(1) ? 0 : PG_GETARG_INT32(1);
-
- /* Do the math and return the result */
- PG_RETURN_INT64(state + value);
-}
-
-
-
-Datum
-int8_add(PG_FUNCTION_ARGS)
-{
- int64 state1;
- int64 state2;
-
- /*
- * GUARD against an incorrectly defined SQL function by verifying
- * that the parameters are the types we are expecting:
- * int8_add(int64, int64) => int64
- */
- if (PG_NARGS() != 2)
- {
- elog(ERROR, "%s defined with %d arguments, expected 2",
- add_str, PG_NARGS() );
- }
- if (get_fn_expr_argtype(fcinfo->flinfo, 0) != INT8OID ||
- get_fn_expr_argtype(fcinfo->flinfo, 1) != INT8OID)
- {
- elog(ERROR, "%s defined with invalid types, expected (int8, int8)",
- add_str );
- }
- if (get_fn_expr_rettype(fcinfo->flinfo) != INT8OID)
- {
- elog(ERROR, "%s defined with invalid return type, expected int8",
- add_str );
- }
-
- /*
- * GUARD against NULL input:
- * - IF both are null return NULL
- * - otherwise treat NULL as a zero value
- */
- if (PG_ARGISNULL(0) && PG_ARGISNULL(1))
- PG_RETURN_NULL();
- state1 = PG_ARGISNULL(0) ? 0 : PG_GETARG_INT64(0);
- state2 = PG_ARGISNULL(1) ? 0 : PG_GETARG_INT64(1);
-
- /* Do the math and return the result */
- PG_RETURN_INT64(state1 + state2);
-}
-
-int tran(PG_FUNCTION_ARGS)
-{
- int state = PG_GETARG_INT32(0);
- int arg2 = PG_GETARG_INT32(1);
-
- if (state > 0)
- {
- arg2 = state + arg2;
- }
- return arg2;
-}
-
-int final(PG_FUNCTION_ARGS)
-{
- int a = PG_GETARG_INT32(0);
-
- PG_RETURN_INT32(a);
-}
-
-Datum retcomposite(PG_FUNCTION_ARGS)
-{
- FuncCallContext *funcctx;
- int call_cntr;
- int max_calls;
- TupleDesc tupdesc;
- AttInMetadata *attinmeta;
-
- /* stuff done only on the first call of the function */
- if (SRF_IS_FIRSTCALL())
- {
- MemoryContext oldcontext;
-
- /* create a function context for cross-call persistence */
- funcctx = SRF_FIRSTCALL_INIT();
-
- /* switch to memory context appropriate for multiple function calls */
- oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
- /* total number of tuples to be returned */
- //funcctx->max_calls = PG_GETARG_UINT32(0);
- funcctx->max_calls = 1;
-
- /* Build a tuple descriptor for our result type */
- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("function returning record called in context "
- "that cannot accept type record")));
-
- /*
- * generate attribute metadata needed later to produce tuples from raw
- * C strings
- */
- attinmeta = TupleDescGetAttInMetadata(tupdesc);
- funcctx->attinmeta = attinmeta;
-
- MemoryContextSwitchTo(oldcontext);
- }
-
- /* stuff done on every call of the function */
- funcctx = SRF_PERCALL_SETUP();
-
- call_cntr = funcctx->call_cntr;
- max_calls = 1;
- attinmeta = funcctx->attinmeta;
-
- if (call_cntr < max_calls) /* do when there is more left to send */
- {
- char **values;
- HeapTuple tuple;
- Datum result;
-
- /*
- * Prepare a values array for building the returned tuple.
- * This should be an array of C strings which will
- * be processed later by the type input functions.
- */
- values = (char **) palloc(3 * sizeof(char *));
- values[0] = (char *) palloc(16 * sizeof(char));
- values[1] = (char *) palloc(16 * sizeof(char));
- values[2] = (char *) palloc(16 * sizeof(char));
-
- snprintf(values[0], 16, "%d", 1* PG_GETARG_INT32(0));
- snprintf(values[1], 16, "%d", 2* PG_GETARG_INT32(0));
- snprintf(values[2], 16, "%d", 3* PG_GETARG_INT32(0));
-
- /* build a tuple */
- tuple = BuildTupleFromCStrings(attinmeta, values);
-
- /* make the tuple into a datum */
- result = HeapTupleGetDatum(tuple);
-
- SRF_RETURN_NEXT(funcctx, result);
- }
- else /* do when there is no more left */
- {
- SRF_RETURN_DONE(funcctx);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/output/badplperl.err
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/output/badplperl.err b/src/bin/gpmapreduce/test/output/badplperl.err
deleted file mode 100644
index d72f56b..0000000
--- a/src/bin/gpmapreduce/test/output/badplperl.err
+++ /dev/null
@@ -1,10 +0,0 @@
--- start_matchsubs
--- m/mapreduce_\d+_run/
--- s/mapreduce_\d+/mapreduce_DUMMY/
--- m/\(\w+.\w+:[0-9]+\)$/
--- s/\(\w+.\w+:[0-9]+\)$/\(file:line\)/
--- end_matchsubs
-ERROR: creation of Perl function failed
-DETAIL: syntax error at line 23, near "}GABLECK"
-syntax error at line 26, near "; }"
-Error: Object creation Failure
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/output/badplperl.out
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/output/badplperl.out b/src/bin/gpmapreduce/test/output/badplperl.out
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/builtin_1_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/builtin_1_init.sql b/src/bin/gpmapreduce/test/sql/builtin_1_init.sql
deleted file mode 100644
index a49285a..0000000
--- a/src/bin/gpmapreduce/test/sql/builtin_1_init.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-CREATE TYPE wordsplit_out AS (key text, value int4);
-
-CREATE FUNCTION wordsplit_1(text) returns setof wordsplit_out as '$libdir/gpmrdemo', 'wordsplit' language C;
-CREATE FUNCTION wordsplit_2(IN value text, OUT key text, OUT value int4) returns setof record as '$libdir/gpmrdemo', 'wordsplit' language C;
-
-CREATE FUNCTION myadd(int8, int4) returns int8 as '$libdir/gpmrdemo', 'int4_accum' language C;
-CREATE FUNCTION mysum(int8, int8) returns int8 as '$libdir/gpmrdemo', 'int8_add' language C;
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/input_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/input_done.sql b/src/bin/gpmapreduce/test/sql/input_done.sql
deleted file mode 100644
index c227896..0000000
--- a/src/bin/gpmapreduce/test/sql/input_done.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-select * from output.qualified order by a;
-
-drop table input.qualified;
-drop table output.qualified;
-drop schema input cascade;
-drop schema output cascade;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/input_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/input_init.sql b/src/bin/gpmapreduce/test/sql/input_init.sql
deleted file mode 100644
index 6c7bf64..0000000
--- a/src/bin/gpmapreduce/test/sql/input_init.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-create schema input;
-create schema output;
-create table input.qualified(a int, b text) distributed by (a);
-insert into input.qualified values(1, 'one');
-insert into input.qualified values(2, 'two');
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/kmeans_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/kmeans_done.sql b/src/bin/gpmapreduce/test/sql/kmeans_done.sql
deleted file mode 100644
index 8a183ea..0000000
--- a/src/bin/gpmapreduce/test/sql/kmeans_done.sql
+++ /dev/null
@@ -1,3 +0,0 @@
-select * from termfreqs order by key;
-
-drop table if exists termfreqs;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/member_kw_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/member_kw_done.sql b/src/bin/gpmapreduce/test/sql/member_kw_done.sql
deleted file mode 100644
index 62ca173..0000000
--- a/src/bin/gpmapreduce/test/sql/member_kw_done.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-select * from member_keywords_1;
-
-select * from member_keywords_2;
-
-drop table if exists member_keywords_1;
-drop table if exists member_keywords_2;
-drop table if exists member_summary;
-drop table if exists keywords;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/member_kw_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/member_kw_init.sql b/src/bin/gpmapreduce/test/sql/member_kw_init.sql
deleted file mode 100644
index a00de43..0000000
--- a/src/bin/gpmapreduce/test/sql/member_kw_init.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-create table member_summary (member_id int, text text) distributed by (member_id);
-create table keywords (keyword_id int, keyword text) distributed by (keyword_id);
-
-insert into member_summary values
- (1, 'Interested in software development and software profiling');
-
-insert into keywords values (100, 'software engineer');
-
-insert into keywords values (101, 'software development');
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_done.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_done.sql
deleted file mode 100644
index 1574b99..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_done.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-drop table simple;
-
-drop function tran(value int, value int);
-
-drop function retcomposite(IN integer, OUT integer, OUT integer, OUT integer);
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_init.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_init.sql
deleted file mode 100644
index 39416a8..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_multiple_init.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-create table simple(m int,n int) distributed randomly;
-insert into simple values (1,10);
-insert into simple values (2,20);
-insert into simple values (2,21);
-insert into simple values (2,22);
-insert into simple values (3,30);
-insert into simple values (4,40);
-insert into simple values (5,50);
-insert into simple values (5,50);
-insert into simple values (10,100);
-insert into simple values (2,21);
-
-create or replace function tran (value int, value int) returns int language 'C' as '$libdir/gpmrdemo', 'tran';
-
-CREATE OR REPLACE FUNCTION retcomposite(IN integer,
-OUT f1 integer, OUT integer, OUT f3 integer)
-RETURNS SETOF record
-AS '$libdir/gpmrdemo', 'retcomposite'
-LANGUAGE C IMMUTABLE STRICT;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_single_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_single_done.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_single_done.sql
deleted file mode 100644
index 64d21bb..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_single_done.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-drop table simple;
-
-drop function tran(value int, value int);
-
-drop function final(IN integer, OUT integer);
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_single_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_single_init.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_single_init.sql
deleted file mode 100644
index ef37c76..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_single_init.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-create table simple(m int,n int) distributed randomly;
-insert into simple values (1,10);
-insert into simple values (2,20);
-insert into simple values (2,21);
-insert into simple values (2,22);
-insert into simple values (3,30);
-insert into simple values (4,40);
-insert into simple values (5,50);
-insert into simple values (5,50);
-insert into simple values (10,100);
-insert into simple values (2,21);
-
-create function tran (value int, value int) returns int language 'C' as '$libdir/gpmrdemo', 'tran';
-
-CREATE OR REPLACE FUNCTION final(IN integer,
-OUT integer)
-RETURNS integer
-AS '$libdir/gpmrdemo', 'final'
-LANGUAGE C IMMUTABLE STRICT;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_table_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_table_done.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_table_done.sql
deleted file mode 100644
index a01811f..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_table_done.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-drop table simple;
-
-drop function tran(value int, value int, out integer);
-
-drop function retcomposite(IN integer);
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/overwrite_retval_table_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/overwrite_retval_table_init.sql b/src/bin/gpmapreduce/test/sql/overwrite_retval_table_init.sql
deleted file mode 100644
index 01c3c2d..0000000
--- a/src/bin/gpmapreduce/test/sql/overwrite_retval_table_init.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-create table simple(m int,n int) distributed randomly;
-insert into simple values (1,10);
-insert into simple values (2,20);
-insert into simple values (2,21);
-insert into simple values (2,22);
-insert into simple values (3,30);
-insert into simple values (4,40);
-insert into simple values (5,50);
-insert into simple values (5,50);
-insert into simple values (10,100);
-insert into simple values (2,21);
-
-create or replace function tran (value int, value int, out ret integer) returns integer language 'C' as '$libdir/gpmrdemo', 'tran';
-
-CREATE OR REPLACE FUNCTION retcomposite(IN integer)
-RETURNS TABLE(x integer, y integer, z integer)
-AS '$libdir/gpmrdemo', 'retcomposite'
-LANGUAGE C IMMUTABLE STRICT;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/unload_done.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/unload_done.sql b/src/bin/gpmapreduce/test/sql/unload_done.sql
deleted file mode 100644
index 71fa14a..0000000
--- a/src/bin/gpmapreduce/test/sql/unload_done.sql
+++ /dev/null
@@ -1 +0,0 @@
-drop table if exists unload_test;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/sql/unload_init.sql
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/sql/unload_init.sql b/src/bin/gpmapreduce/test/sql/unload_init.sql
deleted file mode 100644
index 5add471..0000000
--- a/src/bin/gpmapreduce/test/sql/unload_init.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-create table unload_test (f1 int, f2 text , f3 text) distributed by (f1);
-
-copy unload_test from stdin;
-1 one uno
-2 two dos
-3 three treis
-4 four cuatro
-5 \N \N
-6 null null
-7 text with space "text" with 'quotes'
-\.
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4e392375/src/bin/gpmapreduce/test/yml/kmeans.yml
----------------------------------------------------------------------
diff --git a/src/bin/gpmapreduce/test/yml/kmeans.yml b/src/bin/gpmapreduce/test/yml/kmeans.yml
deleted file mode 100644
index b915c57..0000000
--- a/src/bin/gpmapreduce/test/yml/kmeans.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-%YAML 1.1
----
-VERSION: 1.0.0.1
-
-DEFINE:
- - INPUT:
- NAME: blogdata
- FILE: maple:/Users/cwelton/dev/cdb2/main/src/bin/gpmapreduce/test/data/blognormal.txt
- DELIMITER: ';'
- COLUMNS:
- - blog text
- - term text
- - value float
-
- - MAP:
- NAME: termvalue
- PARAMETERS: [blog,term,value]
- LANGUAGE: python
- RETURNS: [key text,value float]
- MODE: SINGLE
- FUNCTION: |
- return {'key': term, 'value': value}
-
- - REDUCE:
- NAME: min_and_max
- TRANSITION: mm_trans
- FINALIZE: unpack_mm
-
- - TRANSITION:
- NAME: mm_trans
- LANGUAGE: python
- FUNCTION: |
- if state==None:
- min=max=0
- else:
- [min,max] = state.split(',')
- if value < min:
- min = value
- if value > max:
- max = value
- return str(min)+","+str(max)
-
- - FINALIZE:
- NAME: unpack_mm
- LANGUAGE: python
- RETURNS: [min, max]
- MODE: SINGLE
- FUNCTION: |
- a = state.split(',')
- return {'min':a[0], 'max':a[1]}
-
- - OUTPUT:
- NAME: termfreqs_table
- TABLE: termfreqs
- MODE: REPLACE
-
-EXECUTE:
- - RUN:
- SOURCE: blogdata
- MAP: termvalue
- REDUCE: min_and_max
- TARGET: termfreqs_table
\ No newline at end of file