You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hawq.apache.org by "Ruilong Huo (JIRA)" <ji...@apache.org> on 2016/02/16 09:01:18 UTC

[jira] [Updated] (HAWQ-412) Allocate query resource for multiple EXECUTIONs in prepared statement

     [ https://issues.apache.org/jira/browse/HAWQ-412?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ruilong Huo updated HAWQ-412:
-----------------------------
    Description: 
There are several scenarios that prepared statement can be used:
1) Explicit prepared statement, either in SQL statement or in JDBC/ODBC
2) Implicit prepared statement, i.e. (table) functions, UDFs

In prepared statement, the query is planned once, while the plan is executed multiple times. We need to allocate resource for the multiple executions of the plan. Otherwise, it may error out or even core dump.

Here are several cases that hit this problem:
1) Explicit prepared statement in (table) function and UDF
{noformat}
CREATE OR REPLACE FUNCTION f()
RETURNS VOID AS $$
    plpy.execute("DROP TABLE IF EXISTS t")
    plpy.execute("CREATE TABLE t(key TEXT, value DOUBLE PRECISION)")
    sql_plan = plpy.prepare("""
        INSERT INTO test
        SELECT 
            unnest($1) as key, 
            unnest($2) as value
        """, ['text[]', 'double precision[]'])
    plpy.execute(sql_plan,  [['a'], [1]])
    plpy.execute(sql_plan,  [['a'], [1]])
$$ LANGUAGE plpythonu VOLATILE;

SELECT f();
Error:
ERROR: plpy.SPIError: could not serialize unrecognized node type: 45758608 (plpython.c:4651) CONTEXT: Traceback (most recent call last): PL/Python function "f", line 13, in <module> [['a'], [1]]) PL/Python function "f"
{noformat}

2) Implicit prepared statement in (table) function and UDF
{noformat}
CREATE TABLE users (
	fname text not null,
	lname text not null,
	username text,
	userid serial
	-- , PRIMARY KEY(lname, fname) 
	) DISTRIBUTED BY (userid);

INSERT INTO users (fname, lname, username) VALUES ('jane', 'doe', 'j_doe');
INSERT INTO users (fname, lname, username) VALUES ('john', 'doe', 'johnd');
INSERT INTO users (fname, lname, username) VALUES ('willem', 'doe', 'w_doe');
INSERT INTO users (fname, lname, username) VALUES ('rick', 'smith', 'slash');

CREATE FUNCTION spi_prepared_plan_test_one(a text)
RETURNS text
AS
'
if not SD.has_key("myplan"):
    q = "SELECT count(*) FROM users WHERE lname = $1"
    SD["myplan"] = plpy.prepare(q, [ "text" ])
try:
    rv = plpy.execute(SD["myplan"], [a])
    return "there are " + str(rv[0]["count"]) + " " + str(a) + "s"
except Exception, ex:
    plpy.error(str(ex))
return None
'
LANGUAGE plpythonu;


select spi_prepared_plan_test_one('doe');
select spi_prepared_plan_test_one('doe');
select spi_prepared_plan_test_one('smith');
{noformat}

3) Combined explicit and implicit prepared statement
{noformat}
PREPARE fooplan (int, text) AS
    SELECT spi_prepared_plan_test_one($2);
EXECUTE fooplan(1, 'doe');
EXECUTE fooplan(2, 'doe');
EXECUTE fooplan(3, 'doe');
EXECUTE fooplan(4, 'smith');
EXECUTE fooplan(5, 'smith');
EXECUTE fooplan(6, 'smith');
EXECUTE fooplan(7, 'smith3');
EXECUTE fooplan(8, 'smith3');
{noformat}

  was:
There are several scenarios that prepared statement can be used:
1. Explicit prepared statement, either in SQL statement or in JDBC/ODBC
2. Implicit prepared statement, i.e. (table) functions, UDFs

In prepared statement, the query is planned once, while the plan is executed multiple times. We need to allocate resource for the multiple executions of the plan. Otherwise, it may error out or even core dump.

Here are several cases that hit this problem:
1. Explicit prepared statement in (table) function and UDF
{noformat}
CREATE OR REPLACE FUNCTION f()
RETURNS VOID AS $$
    plpy.execute("DROP TABLE IF EXISTS t")
    plpy.execute("CREATE TABLE t(key TEXT, value DOUBLE PRECISION)")
    sql_plan = plpy.prepare("""
        INSERT INTO test
        SELECT 
            unnest($1) as key, 
            unnest($2) as value
        """, ['text[]', 'double precision[]'])
    plpy.execute(sql_plan,  [['a'], [1]])
    plpy.execute(sql_plan,  [['a'], [1]])
$$ LANGUAGE plpythonu VOLATILE;

SELECT f();
Error:
ERROR: plpy.SPIError: could not serialize unrecognized node type: 45758608 (plpython.c:4651) CONTEXT: Traceback (most recent call last): PL/Python function "f", line 13, in <module> [['a'], [1]]) PL/Python function "f"
{noformat}

2. Implicit prepared statement in (table) function and UDF
{noformat}
CREATE TABLE users (
	fname text not null,
	lname text not null,
	username text,
	userid serial
	-- , PRIMARY KEY(lname, fname) 
	) DISTRIBUTED BY (userid);

INSERT INTO users (fname, lname, username) VALUES ('jane', 'doe', 'j_doe');
INSERT INTO users (fname, lname, username) VALUES ('john', 'doe', 'johnd');
INSERT INTO users (fname, lname, username) VALUES ('willem', 'doe', 'w_doe');
INSERT INTO users (fname, lname, username) VALUES ('rick', 'smith', 'slash');

CREATE FUNCTION spi_prepared_plan_test_one(a text)
RETURNS text
AS
'
if not SD.has_key("myplan"):
    q = "SELECT count(*) FROM users WHERE lname = $1"
    SD["myplan"] = plpy.prepare(q, [ "text" ])
try:
    rv = plpy.execute(SD["myplan"], [a])
    return "there are " + str(rv[0]["count"]) + " " + str(a) + "s"
except Exception, ex:
    plpy.error(str(ex))
return None
'
LANGUAGE plpythonu;


select spi_prepared_plan_test_one('doe');
select spi_prepared_plan_test_one('doe');
select spi_prepared_plan_test_one('smith');
{noformat}

3. Combined explicit and implicit prepared statement
{noformat}
PREPARE fooplan (int, text) AS
    SELECT spi_prepared_plan_test_one($2);
EXECUTE fooplan(1, 'doe');
EXECUTE fooplan(2, 'doe');
EXECUTE fooplan(3, 'doe');
EXECUTE fooplan(4, 'smith');
EXECUTE fooplan(5, 'smith');
EXECUTE fooplan(6, 'smith');
EXECUTE fooplan(7, 'smith3');
EXECUTE fooplan(8, 'smith3');
{noformat}


> Allocate query resource for multiple EXECUTIONs in prepared statement
> ---------------------------------------------------------------------
>
>                 Key: HAWQ-412
>                 URL: https://issues.apache.org/jira/browse/HAWQ-412
>             Project: Apache HAWQ
>          Issue Type: Bug
>          Components: Core, Query Execution
>    Affects Versions: 2.0.0-beta-incubating
>            Reporter: Ruilong Huo
>            Assignee: George Caragea
>             Fix For: 2.0.0
>
>
> There are several scenarios that prepared statement can be used:
> 1) Explicit prepared statement, either in SQL statement or in JDBC/ODBC
> 2) Implicit prepared statement, i.e. (table) functions, UDFs
> In prepared statement, the query is planned once, while the plan is executed multiple times. We need to allocate resource for the multiple executions of the plan. Otherwise, it may error out or even core dump.
> Here are several cases that hit this problem:
> 1) Explicit prepared statement in (table) function and UDF
> {noformat}
> CREATE OR REPLACE FUNCTION f()
> RETURNS VOID AS $$
>     plpy.execute("DROP TABLE IF EXISTS t")
>     plpy.execute("CREATE TABLE t(key TEXT, value DOUBLE PRECISION)")
>     sql_plan = plpy.prepare("""
>         INSERT INTO test
>         SELECT 
>             unnest($1) as key, 
>             unnest($2) as value
>         """, ['text[]', 'double precision[]'])
>     plpy.execute(sql_plan,  [['a'], [1]])
>     plpy.execute(sql_plan,  [['a'], [1]])
> $$ LANGUAGE plpythonu VOLATILE;
> SELECT f();
> Error:
> ERROR: plpy.SPIError: could not serialize unrecognized node type: 45758608 (plpython.c:4651) CONTEXT: Traceback (most recent call last): PL/Python function "f", line 13, in <module> [['a'], [1]]) PL/Python function "f"
> {noformat}
> 2) Implicit prepared statement in (table) function and UDF
> {noformat}
> CREATE TABLE users (
> 	fname text not null,
> 	lname text not null,
> 	username text,
> 	userid serial
> 	-- , PRIMARY KEY(lname, fname) 
> 	) DISTRIBUTED BY (userid);
> INSERT INTO users (fname, lname, username) VALUES ('jane', 'doe', 'j_doe');
> INSERT INTO users (fname, lname, username) VALUES ('john', 'doe', 'johnd');
> INSERT INTO users (fname, lname, username) VALUES ('willem', 'doe', 'w_doe');
> INSERT INTO users (fname, lname, username) VALUES ('rick', 'smith', 'slash');
> CREATE FUNCTION spi_prepared_plan_test_one(a text)
> RETURNS text
> AS
> '
> if not SD.has_key("myplan"):
>     q = "SELECT count(*) FROM users WHERE lname = $1"
>     SD["myplan"] = plpy.prepare(q, [ "text" ])
> try:
>     rv = plpy.execute(SD["myplan"], [a])
>     return "there are " + str(rv[0]["count"]) + " " + str(a) + "s"
> except Exception, ex:
>     plpy.error(str(ex))
> return None
> '
> LANGUAGE plpythonu;
> select spi_prepared_plan_test_one('doe');
> select spi_prepared_plan_test_one('doe');
> select spi_prepared_plan_test_one('smith');
> {noformat}
> 3) Combined explicit and implicit prepared statement
> {noformat}
> PREPARE fooplan (int, text) AS
>     SELECT spi_prepared_plan_test_one($2);
> EXECUTE fooplan(1, 'doe');
> EXECUTE fooplan(2, 'doe');
> EXECUTE fooplan(3, 'doe');
> EXECUTE fooplan(4, 'smith');
> EXECUTE fooplan(5, 'smith');
> EXECUTE fooplan(6, 'smith');
> EXECUTE fooplan(7, 'smith3');
> EXECUTE fooplan(8, 'smith3');
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)