You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2015/12/30 23:48:34 UTC
incubator-madlib git commit: Covariance: Add covariance matrix function (Pearson)

Repository: incubator-madlib
Updated Branches:
  refs/heads/master 33ed578b9 -> ba5fc1ead


Covariance: Add covariance matrix function (Pearson)

JIRA: MADLIB-941

Added new function covariance() which returns the covariance matrix. The
implementation is an update to the Pearson's correlation method, where
the scaling at the final step is avoided.


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/ba5fc1ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/ba5fc1ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/ba5fc1ea

Branch: refs/heads/master
Commit: ba5fc1ead162f71a37badcc772f410dbcd4322b2
Parents: 33ed578
Author: Rahul Iyer <ri...@pivotal.io>
Authored: Wed Dec 30 14:46:22 2015 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Wed Dec 30 14:46:22 2015 -0800

----------------------------------------------------------------------
 src/modules/stats/correlation.cpp               |   6 +-
 .../postgres/modules/stats/correlation.py_in    | 271 +++++++++----------
 .../postgres/modules/stats/correlation.sql_in   | 117 ++++++++
 .../modules/utilities/validate_args.py_in       |  31 ++-
 4 files changed, 269 insertions(+), 156 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ba5fc1ea/src/modules/stats/correlation.cpp
----------------------------------------------------------------------
diff --git a/src/modules/stats/correlation.cpp b/src/modules/stats/correlation.cpp
index b0fd858..e300829 100644
--- a/src/modules/stats/correlation.cpp
+++ b/src/modules/stats/correlation.cpp
@@ -20,7 +20,7 @@ using namespace dbal::eigen_integration;
 
 AnyType
 correlation_transition::run(AnyType& args) {
-    // args[2]
+    // args[2] is the mean of features vector
     if (args[2].isNull()) {
         throw std::runtime_error("Correlation: Mean vector is NULL.");
     }
@@ -31,7 +31,7 @@ correlation_transition::run(AnyType& args) {
     } catch (const ArrayWithNullException &e) {
         throw std::runtime_error("Correlation: Mean vector contains NULL.");
     }
-    // args[0]
+    // args[0] is the covariance matrix
     MutableNativeMatrix state;
     if (args[0].isNull()) {
         state.rebind(this->allocateArray<double>(mean.size(), mean.size()),
@@ -39,7 +39,7 @@ correlation_transition::run(AnyType& args) {
     } else {
         state.rebind(args[0].getAs<MutableArrayHandle<double> >());
     }
-    // args[1]
+    // args[1] is the current data vector
     if (args[1].isNull()) { return state; }
     MappedColumnVector x;
     try {

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ba5fc1ea/src/ports/postgres/modules/stats/correlation.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/correlation.py_in b/src/ports/postgres/modules/stats/correlation.py_in
index 6ad07ce..6bc1b5a 100644
--- a/src/ports/postgres/modules/stats/correlation.py_in
+++ b/src/ports/postgres/modules/stats/correlation.py_in
@@ -8,9 +8,13 @@
 import plpy
 from time import time
 from utilities.utilities import unique_string, add_postfix
+from utilities.validate_args import get_cols_and_types
+from utilities.control import MinWarning
+
 
 def correlation(schema_madlib, source_table, output_table,
-                target_cols, verbose, **kwargs):
+                target_cols, get_cov=False, verbose=False,
+                **kwargs):
     """
     Populates an output table with the coefficients of correlation between
     the columns in a source table
@@ -19,6 +23,8 @@ def correlation(schema_madlib, source_table, output_table,
         @param source_table   Name of input table
         @param output_table   Name of output table
         @param target_cols    Name of specific columns targetted for correlation
+        @param get_cov       If False return the correlation matrix else
+                                return the covariance matrix
         @param verbose        Flag to determine whether to output debug info
 
     Returns:
@@ -34,11 +40,11 @@ def correlation(schema_madlib, source_table, output_table,
         # column names
         _existing_target_cols = list(set(column for column in _target_cols
                                          if column in _numeric_column_names))
-        _nonexisting_target_cols = list(set(_target_cols)
-                                        - set(_existing_target_cols)
-                                        - set(_nonnumeric_column_names))
-        _nonnumeric_target_cols = list(set(_target_cols)
-                                       & set(_nonnumeric_column_names))
+        _nonexisting_target_cols = list(set(_target_cols) -
+                                        set(_existing_target_cols) -
+                                        set(_nonnumeric_column_names))
+        _nonnumeric_target_cols = list(set(_target_cols) &
+                                       set(_nonnumeric_column_names))
     else:
         # if target_cols not provided then all numeric columns are to be targeted
         _existing_target_cols = _numeric_column_names[:]
@@ -64,7 +70,7 @@ def correlation(schema_madlib, source_table, output_table,
     # ---- Output message ----
 
     return _populate_output_table(schema_madlib, source_table, output_table,
-                                  _existing_target_cols, verbose)
+                                  _existing_target_cols, get_cov, verbose)
 
 
 # -----------------------------------------------------------------------
@@ -102,7 +108,7 @@ def _validate_corr_arg(source_table, output_table):
 
 
 # -----------------------------------------------------------------------
-# Get all column names in source table
+# Get all numeric column names in source table
 # -----------------------------------------------------------------------
 def _get_numeric_columns(source_table):
     """
@@ -114,42 +120,18 @@ def _get_numeric_columns(source_table):
     Returns:
         List of column names in table
     """
-    # determine the exact table_schema and table_name
-    # in case that source_table only contains table_name
-    row = plpy.execute("""
-                        SELECT
-                            quote_ident(nspname) AS table_schema,
-                            quote_ident(relname) AS table_name
-                        FROM
-                            pg_class AS c,
-                            pg_namespace AS nsp
-                        WHERE
-                            c.oid = '{source_table}'::regclass::oid AND
-                            c.relnamespace = nsp.oid
-                        """.format(source_table=source_table))
-    table_schema = row[0]['table_schema']
-    table_name = row[0]['table_name']
 
     # retrieve the numeric columns
     numeric_types = ('smallint', 'integer', 'bigint',
                      'real', 'numeric', 'double precision')
-    all_columns = plpy.execute("""
-                                SELECT quote_ident(column_name) as column_name,
-                                       data_type
-                                FROM
-                                    information_schema.columns
-                                WHERE
-                                    quote_ident(table_schema) = '{table_schema}' AND
-                                    quote_ident(table_name) = '{table_name}'
-                                ORDER BY ordinal_position
-                                """.format(table_schema=table_schema,
-                                           table_name=table_name))
-    all_col_names = set(column['column_name'] for column in all_columns)
-    num_col_names = [column['column_name'] for column in all_columns
-                     if column['data_type'] in numeric_types]
-    nonnum_col_names = list(all_col_names - set(num_col_names))
-
-    return (num_col_names, nonnum_col_names)
+    all_col_types = get_cols_and_types(source_table)
+    all_col_names = set(all_col_types.keys())
+
+    numeric_col_names = [c for c in all_col_names
+                         if all_col_types[c] in numeric_types]
+    nonnum_col_names = list(all_col_names - set(numeric_col_names))
+
+    return (numeric_col_names, nonnum_col_names)
 
 
 # -----------------------------------------------------------------------
@@ -170,7 +152,7 @@ def _analyze_target_cols(target_cols):
 # Create and populate output table
 # -----------------------------------------------------------------------
 def _populate_output_table(schema_madlib, source_table, output_table,
-                           col_names, verbose):
+                           col_names, get_cov=True, verbose=False):
     """
     Creates a relation with the appropriate number of columns given a list of
     column names and populates with the correlation coefficients. If the table
@@ -180,122 +162,126 @@ def _populate_output_table(schema_madlib, source_table, output_table,
         @param schema_madlib  Schema of MADlib
         @param source_table   Name of source table
         @param output_table   Name of output table
-        @param _target_cols   Name of all columns to place in output table
+        @param col_names      Name of all columns to place in output table
+        @param get_cov        If False return the correlation matrix else
+                                return covariance matrix
 
     Returns:
         Tuple (output table name, number of columns, time for computation)
     """
-    old_msg_level = plpy.execute("""
-        SELECT setting FROM pg_settings
-        WHERE name='client_min_messages'
-        """)[0]['setting']
-    if verbose:
-        plpy.execute("SET client_min_messages TO info")
-    else:
-        plpy.execute("SET client_min_messages TO error")
-
-    start = time()
-    col_len = len(col_names)
-    col_names_str = ",".join(col_names)
-    temp_table = unique_string()
-
-    # actual computation
-    plpy.execute("""
-        CREATE TEMP TABLE {temp_table} AS
-        SELECT
-            tot_cnt,
-            count(*) AS non_null_cnt,
-            mean,
-            {schema_madlib}.correlation_agg(x, mean) as cor_mat
-        FROM
-        (
-            SELECT ARRAY[{col_names_str}]::float8[] AS x
-            FROM {source_table}
-        ) src1,
-        (
+    with MinWarning("info" if verbose else "error"):
+        start = time()
+        col_len = len(col_names)
+        col_names_str = ",".join(col_names)
+        temp_table = unique_string()
+        if get_cov:
+            agg_str = """
+                (CASE WHEN count(*) > 0
+                      THEN {0}.array_scalar_mult({0}.covariance_agg(x, mean),
+                                                 1.0 / count(*)::double precision)
+                      ELSE NULL
+                END) """.format(schema_madlib)
+        else:
+            agg_str = "{0}.correlation_agg(x, mean)".format(schema_madlib)
+
+        # actual computation
+        plpy.execute("""
+            CREATE TEMP TABLE {temp_table} AS
             SELECT
-                count(*) AS tot_cnt,
-                {schema_madlib}.avg(x) AS mean
+                tot_cnt,
+                count(*) AS non_null_cnt,
+                mean,
+                {agg_str} as cor_mat
             FROM
             (
                 SELECT ARRAY[{col_names_str}]::float8[] AS x
                 FROM {source_table}
-            ) src2
-        ) subq
-        WHERE NOT {schema_madlib}.array_contains_null(x)
-        GROUP BY tot_cnt, mean
-        """.format(**locals()))
-
-    # create summary table
-    summary_table = add_postfix(output_table, "_summary")
-    q_summary = """
-        CREATE TABLE {summary_table} AS
-        SELECT
-            'correlation'::varchar      AS method,
-            '{source_table}'::varchar   AS source,
-            '{output_table}'::varchar   AS output_table,
-            '{col_names_str}'::varchar  AS column_names,
-            mean                        AS mean_vector,
-            non_null_cnt                AS total_rows_processed,
-            tot_cnt - non_null_cnt      AS total_rows_skipped
-        FROM {temp_table}
-        """.format(**locals())
-
-    plpy.execute(q_summary)
-
-    # create output table
-    as_list = "deconstructed(column_position integer"
-    for k, c in enumerate(col_names):
-        if k % 10 == 0:
-            as_list += "\n                "
-        as_list += ", {c} float8".format(c=c)
-    as_list += ")"
-
-    output_plan = plpy.prepare("""
-        CREATE TABLE {output_table} AS
-        SELECT
-            *
-        FROM
-        (
+            ) src1,
+            (
+                SELECT
+                    count(*) AS tot_cnt,
+                    {schema_madlib}.avg(x) AS mean
+                FROM
+                (
+                    SELECT ARRAY[{col_names_str}]::float8[] AS x
+                    FROM {source_table}
+                ) src2
+            ) subq
+            WHERE NOT {schema_madlib}.array_contains_null(x)
+            GROUP BY tot_cnt, mean
+            """.format(**locals()))
+
+        # create summary table
+        summary_table = add_postfix(output_table, "_summary")
+        q_summary = """
+            CREATE TABLE {summary_table} AS
             SELECT
-                generate_series(1, {num_cols}) AS column_position,
-                unnest($1) AS variable
-        ) variable_subq
-        JOIN
-        (
+                'correlation'::varchar      AS method,
+                '{source_table}'::varchar   AS source,
+                '{output_table}'::varchar   AS output_table,
+                '{col_names_str}'::varchar  AS column_names,
+                mean                        AS mean_vector,
+                non_null_cnt                AS total_rows_processed,
+                tot_cnt - non_null_cnt      AS total_rows_skipped
+            FROM {temp_table}
+            """.format(**locals())
+
+        plpy.execute(q_summary)
+
+        # create output table
+        as_list = "deconstructed(column_position integer"
+        for k, c in enumerate(col_names):
+            if k % 10 == 0:
+                as_list += "\n                "
+            as_list += ", {c} float8".format(c=c)
+        as_list += ")"
+
+        output_plan = plpy.prepare("""
+            CREATE TABLE {output_table} AS
             SELECT
                 *
             FROM
-                {schema_madlib}.__deconstruct_lower_triangle(
-                    (SELECT cor_mat FROM {temp_table})
-                )
-                AS {as_list}
-        ) matrix_subq
-        USING (column_position)
-        """.format(num_cols=len(col_names), **locals()), ["varchar[]"])
+            (
+                SELECT
+                    generate_series(1, {num_cols}) AS column_position,
+                    unnest($1) AS variable
+            ) variable_subq
+            JOIN
+            (
+                SELECT
+                    *
+                FROM
+                    {schema_madlib}.__deconstruct_lower_triangle(
+                        (SELECT cor_mat FROM {temp_table})
+                    )
+                    AS {as_list}
+            ) matrix_subq
+            USING (column_position)
+            """.format(num_cols=len(col_names), **locals()), ["varchar[]"])
 
-    plpy.execute(output_plan, [col_names])
+        plpy.execute(output_plan, [col_names])
 
-    # clean up and return
-    plpy.execute("DROP TABLE {temp_table}".format(**locals()))
-    plpy.execute("SET client_min_messages TO " + old_msg_level)
-    end = time()
-    return (output_table, len(col_names), end - start)
+        # clean up and return
+        plpy.execute("DROP TABLE {temp_table}".format(**locals()))
+        end = time()
+        return (output_table, len(col_names), end - start)
 
 
 # -----------------------------------------------------------------------
 # Help messages
 # -----------------------------------------------------------------------
-def correlation_help_message(schema_madlib, message, **kwargs):
+def correlation_help_message(schema_madlib, message, cov=False, **kwargs):
     """
     Given a help string, provide usage information
     """
+    func = "covariance" if cov else "correlation"
+
     if message is not None and \
             message.lower() in ("usage", "help", "?"):
         return """
 Usage:
 -----------------------------------------------------------------------
-SELECT {schema_madlib}.correlation
+SELECT {schema_madlib}.{func}
 (
     source_table TEXT,   -- Source table name (Required)
     output_table TEXT,   -- Output table name (Required)
@@ -310,14 +296,14 @@ The columns of the table are described as follows:
 
     - column_position   : Position of the variable in the 'source_table'.
     - variable          : Provides the row-header for each variable
-    - Rest of the table is the NxN correlation matrix for all numeric columns
+    - Rest of the table is the NxN {func} matrix for all numeric columns
     in 'source_table'.
 
 The output table is arranged as a lower-traingular matrix with the upper
 triangle set to NULL and the diagonal elements set to 1.0. To obtain the
 result from the output_table in this matrix format ensure to order the
 elements using the 'column_position' column.
-        """.format(schema_madlib=schema_madlib)
+        """.format(schema_madlib=schema_madlib, func=func)
     elif message is not None and message.lower() in ('example', 'examples'):
         return """
 DROP TABLE IF EXISTS example_data;
@@ -362,13 +348,13 @@ VALUES(NULL, 100, 100, 'true', NULL);
 INSERT INTO example_data(outlook, temperature, humidity, windy, class)
 VALUES(NULL, 110, 100, 'true', NULL);
 
-SELECT madlib.correlation('example_data', 'example_data_output');
-SELECT madlib.correlation('example_data', 'example_data_output', '*');
-SELECT madlib.correlation('example_data', 'example_data_output', 'temperature, humidity');
+SELECT madlib.{func}('example_data', 'example_data_output');
+SELECT madlib.{func}('example_data', 'example_data_output', '*');
+SELECT madlib.{func}('example_data', 'example_data_output', 'temperature, humidity');
 
-To get the correlation matrix from output table:
+-- To get the {func} matrix from output table:
 SELECT * from example_data_output order by column_position;
-         """
+         """.format(func=func)
     else:
         return """
 A correlation function is the degree and direction of association of
@@ -378,9 +364,14 @@ from the other. The coefficient of correlation varies from -1 to 1:
 perfectly anti-correlated.
 -------
 For an overview on usage, run:
-SELECT {schema_madlib}.correlation('usage');
+    SELECT {schema_madlib}.correlation('usage');
+
+To obtain the covariance values instead of correlation:
+    SELECT {schema_madlib}.covariance('usage');
 -------
-For an example, run:
-SELECT {schema_madlib}.correlation('example')
-            """.format(schema_madlib=schema_madlib)
+For examples:
+    SELECT {schema_madlib}.correlation('example');
+OR
+    SELECT {schema_madlib}.covariance('example');
 
+            """.format(schema_madlib=schema_madlib)

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ba5fc1ea/src/ports/postgres/modules/stats/correlation.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/correlation.sql_in b/src/ports/postgres/modules/stats/correlation.sql_in
index 4f2c255..36cb133 100644
--- a/src/ports/postgres/modules/stats/correlation.sql_in
+++ b/src/ports/postgres/modules/stats/correlation.sql_in
@@ -50,6 +50,16 @@ correlation( source_table,
            )
 </pre>
 
+The covariance function, with a similar syntax,
+can be used to compute the covariance between features.
+<pre class="syntax">
+covariance( source_table,
+             output_table,
+             target_cols,
+             verbose
+           )
+</pre>
+
 <dl class="arglist">
 
 <dt>source_table</dt>
@@ -176,6 +186,26 @@ Result:
 (2 rows)
 </pre>
 
+-# Compute the covariance of features in the data set.
+<pre class="example">
+SELECT madlib.covariance( 'example_data',
+                          'cov_output'
+                         );
+</pre>
+
+-# View the covariance matrix.
+<pre class="example">
+SELECT * FROM cov_output ORDER BY column_position;
+</pre>
+Result:
+<pre class="result">
+ column_position |  variable   |    temperature    | humidity
+-----------------+-------------+-------------------+----------
+               1 | temperature |      146.25       |
+               2 | humidity    |      82.125       | 121.1875
+(2 rows)
+</pre>
+
 @par Notes
 Current implementation ignores a row that contains NULL entirely. This means
 any correlation in such a row (with NULLs) does not contribute to the final answer.
@@ -230,6 +260,21 @@ CREATE AGGREGATE MADLIB_SCHEMA.correlation_agg(
     -- use NULL as the initial value
 );
 
+DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.covariance_agg(
+    double precision[], double precision[]);
+CREATE AGGREGATE MADLIB_SCHEMA.covariance_agg(
+    /* x */     double precision[],
+    /* mean */  double precision[]
+) (
+    SType = double precision[],
+    m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.correlation_merge,')
+    SFunc = MADLIB_SCHEMA.correlation_transition
+    -- use NULL as the initial value
+    -- return the last transition or merge state as the final state
+    -- this aggregate does not divide by the number of samples
+    --    (hence it's sum of (x-mean)^2 instead of expectation)
+);
+
 -----------------------------------------------------------------------
 
 DROP TYPE IF EXISTS MADLIB_SCHEMA.correlation_result CASCADE;
@@ -309,3 +354,75 @@ RETURNS text AS $$
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
 -------------------------------------------------------------------------
+
+-----------------------------------------------------------------------
+-- Main function for covariance
+-----------------------------------------------------------------------
+/* @brief Compute a covariance matrix for a table with optional target columns specified
+
+   @param source_table Name of source relation containing the data
+   @param output_table Name of output table name to store the correlation
+   @param target_cols  String with comma separated list of columns for which cross-correlation is desired
+   @param verbose      Flag to determine verbosity
+
+   @usage
+   <pre> SELECT MADLIB_SCHEMA.covariance (
+         '<em>source_table</em>', '<em>output_table</em>',
+         '<em>target_cols</em>'
+     );
+     SELECT * FROM '<em>output_table</em>' ORDER BY '<em>colum_position</em>';
+   </pre>
+*/
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
+    source_table varchar, --  input table name
+    output_table varchar, -- output table name
+    target_cols  varchar, -- comma separated list of output cols (default = '*')
+    verbose      boolean  -- flag to determine verbosity
+) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+    PythonFunctionBodyOnly(`stats', `correlation')
+    return correlation.correlation(schema_madlib, source_table, output_table,
+                                   target_cols, True, verbose)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+-----------------------------------------------------------------------
+-- Overloaded functions
+-----------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
+    source_table varchar, --  input table name
+    output_table varchar, -- output table name
+    target_cols  varchar  -- comma separated list of output cols (default = '*')
+)
+RETURNS MADLIB_SCHEMA.correlation_result AS $$
+    select MADLIB_SCHEMA.covariance($1, $2, $3, FALSE)
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+-----------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
+    source_table varchar, --  input table name
+    output_table varchar  -- output table name
+) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+    select MADLIB_SCHEMA.covariance($1, $2, NULL)
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+
+-----------------------------------------------------------------------
+-- Help functions
+-----------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
+    input_message       text
+) RETURNS TEXT AS $$
+    PythonFunctionBodyOnly(`stats', `correlation')
+    return correlation.correlation_help_message(schema_madlib, input_message, cov=True)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+
+-----------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance()
+RETURNS text AS $$
+    PythonFunctionBodyOnly(`stats', `correlation')
+    return correlation.correlation_help_message(schema_madlib, None, cov=True)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+-------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/ba5fc1ea/src/ports/postgres/modules/utilities/validate_args.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index 68bc862..fc4d142 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -282,23 +282,28 @@ def get_cols_and_types(tbl):
     if tbl is None or tbl.lower() == 'null':
         plpy.error('Input error: Table name (NULL) is invalid')
 
-    names = tbl.split(".")
+   # determine the exact table_schema and table_name
+    # in case that source_table only contains table_name
+    row = plpy.execute("""
+                        SELECT
+                            quote_ident(nspname) AS table_schema,
+                            quote_ident(relname) AS table_name
+                        FROM
+                            pg_class AS c,
+                            pg_namespace AS nsp
+                        WHERE
+                            c.oid = '{source_table}'::regclass::oid AND
+                            c.relnamespace = nsp.oid
+                        """.format(source_table=tbl))
+    schema = row[0]['table_schema']
+    table = row[0]['table_name']
 
-    if not names or len(names) > 2:
-        raise TypeError("Input error: Invalid table name - {0}!".format(tbl))
-    elif len(names) == 1:
-        table = _unquote_name(names[0])
-        schema = get_first_schema(table)
-    elif len(names) == 2:
-        schema = _unquote_name(names[0])
-        table = _unquote_name(names[1])
     sql_string = """SELECT array_agg(quote_ident(column_name)::varchar) AS cols,
                            array_agg(data_type::varchar) AS types
                     FROM information_schema.columns
-                    WHERE table_name = '{table_name}'
-                    AND table_schema = '{schema_name}'
-                """.format(table_name=table,
-                           schema_name=schema)
+                    WHERE table_name = '{table}'
+                    AND table_schema = '{schema}'
+                """.format(table=table, schema=schema)
     result = plpy.execute(sql_string)[0]
     col_names = _string_to_array(result['cols'])
     col_types = _string_to_array(result['types'])