You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@age.apache.org by jo...@apache.org on 2022/07/15 07:21:02 UTC
[age] branch master updated: Fix Bug in integer serialization for integers in GIN
This is an automated email from the ASF dual-hosted git repository.
joshinnis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/age.git
The following commit(s) were added to refs/heads/master by this push:
new 392a937 Fix Bug in integer serialization for integers in GIN
392a937 is described below
commit 392a937bdfa7c7a869798d7dca93f250c94d42e9
Author: Josh Innis <Jo...@gmail.com>
AuthorDate: Fri Jul 15 16:19:47 2022 +0900
Fix Bug in integer serialization for integers in GIN
Fixed a bug where integers were not being serialized
correctly when stored in GIN indices.
Added more regression tests in light of the found issue
---
regress/expected/index.out | 87 +++++++++++++++++++++++--------
regress/sql/index.sql | 59 ++++++++++++++-------
src/backend/utils/adt/agtype_gin.c | 102 ++++++++++++++++++++-----------------
3 files changed, 163 insertions(+), 85 deletions(-)
diff --git a/regress/expected/index.out b/regress/expected/index.out
index 87c316e..a27ca5b 100644
--- a/regress/expected/index.out
+++ b/regress/expected/index.out
@@ -4,6 +4,7 @@ SET search_path TO ag_catalog;
SET enable_mergejoin = ON;
SET enable_hashjoin = ON;
SET enable_nestloop = ON;
+SET enable_seqscan = false;
SELECT create_graph('cypher_index');
NOTICE: graph "cypher_index" has been created
create_graph
@@ -227,19 +228,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
* Section 2: Graphid Indices to Improve Join Performance
*/
SELECT * FROM cypher('cypher_index', $$
- CREATE (us:Country {name: "United States"}),
- (ca:Country {name: "Canada"}),
- (mx:Country {name: "Mexico"}),
- (us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
- (ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
- (ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
- (ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
- (mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
- (mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
- (mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
+ CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
+ (ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
+ (mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
+ (us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
+ (ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
+ (ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
+ (ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
+ (mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
+ (mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
+ (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
$$) as (n agtype);
n
---
@@ -299,19 +300,63 @@ SET enable_nestloop = ON;
--
-- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
--
-CREATE INDEX load_city_gid_idx
+CREATE INDEX load_city_gin_idx
ON cypher_index."City" USING gin (properties);
-SELECT COUNT(*) FROM cypher('cypher_index', $$
- MATCH (c:City {country_code: "AD"})
+CREATE INDEX load_country_gin_idx
+ON cypher_index."Country" USING gin (properties);
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:City {city_id: 1})
RETURN c
$$) as (n agtype);
- count
--------
- 0
+ n
+------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
+(1 row)
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (:Country {country_code: "US"})<-[]-(city:City)
+ RETURN city
+$$) as (n agtype);
+ n
+----------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
+ {"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
+(4 rows)
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:City {west_coast: true})
+ RETURN c
+$$) as (n agtype);
+ n
+----------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex
+(4 rows)
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:Country {life_expectancy: 82.05})
+ RETURN c
+$$) as (n agtype);
+ n
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1407374883553282, "label": "Country", "properties": {"gdp": 1.643::numeric, "name": "Canada", "country_code": "CA", "life_expectancy": 82.05}}::vertex
+(1 row)
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:Country {gdp: 20.94::numeric})
+ RETURN c
+$$) as (n agtype);
+ n
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1407374883553281, "label": "Country", "properties": {"gdp": 20.94::numeric, "name": "United States", "country_code": "US", "life_expectancy": 78.79}}::vertex
(1 row)
-DROP INDEX load_city_gid_idx;
-ERROR: index "load_city_gid_idx" does not exist
+DROP INDEX cypher_index.load_city_gin_idx;
+DROP INDEX cypher_index.load_country_gin_idx;
--
-- Section 4: Index use with WHERE clause
--
diff --git a/regress/sql/index.sql b/regress/sql/index.sql
index 0495db0..9e1cd04 100644
--- a/regress/sql/index.sql
+++ b/regress/sql/index.sql
@@ -6,6 +6,7 @@ SET search_path TO ag_catalog;
SET enable_mergejoin = ON;
SET enable_hashjoin = ON;
SET enable_nestloop = ON;
+SET enable_seqscan = false;
SELECT create_graph('cypher_index');
@@ -131,19 +132,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
* Section 2: Graphid Indices to Improve Join Performance
*/
SELECT * FROM cypher('cypher_index', $$
- CREATE (us:Country {name: "United States"}),
- (ca:Country {name: "Canada"}),
- (mx:Country {name: "Mexico"}),
- (us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
- (us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
- (ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
- (ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
- (ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
- (mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
- (mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
- (mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
+ CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
+ (ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
+ (mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
+ (us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
+ (us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
+ (ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
+ (ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
+ (ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
+ (mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
+ (mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
+ (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
$$) as (n agtype);
ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id);
@@ -201,16 +202,40 @@ SET enable_nestloop = ON;
--
-- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
--
-CREATE INDEX load_city_gid_idx
+CREATE INDEX load_city_gin_idx
ON cypher_index."City" USING gin (properties);
-SELECT COUNT(*) FROM cypher('cypher_index', $$
- MATCH (c:City {country_code: "AD"})
+CREATE INDEX load_country_gin_idx
+ON cypher_index."Country" USING gin (properties);
+
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:City {city_id: 1})
+ RETURN c
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (:Country {country_code: "US"})<-[]-(city:City)
+ RETURN city
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:City {west_coast: true})
RETURN c
$$) as (n agtype);
-DROP INDEX load_city_gid_idx;
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:Country {life_expectancy: 82.05})
+ RETURN c
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+ MATCH (c:Country {gdp: 20.94::numeric})
+ RETURN c
+$$) as (n agtype);
+DROP INDEX cypher_index.load_city_gin_idx;
+DROP INDEX cypher_index.load_country_gin_idx;
--
-- Section 4: Index use with WHERE clause
--
diff --git a/src/backend/utils/adt/agtype_gin.c b/src/backend/utils/adt/agtype_gin.c
index fb28a51..669935c 100644
--- a/src/backend/utils/adt/agtype_gin.c
+++ b/src/backend/utils/adt/agtype_gin.c
@@ -500,53 +500,61 @@ static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key)
char buf[MAXINT8LEN + 1];
switch (scalarVal->type)
{
- case AGTV_NULL:
- Assert(!is_key);
- item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
- break;
- case AGTV_INTEGER:
- Assert(!is_key);
- pg_lltoa(scalarVal->val.int_value, buf);
- item = make_text_key(AGT_GIN_FLAG_NUM, buf, MAXINT8LEN + 1);
- break;
- case AGTV_FLOAT:
- Assert(!is_key);
- cstr = float8out_internal(scalarVal->val.float_value);
- item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
- break;
- case AGTV_BOOL:
- Assert(!is_key);
- item = make_text_key(AGT_GIN_FLAG_BOOL,
- scalarVal->val.boolean ? "t" : "f", 1);
- break;
- case AGTV_NUMERIC:
- Assert(!is_key);
- /*
- * A normalized textual representation, free of trailing zeroes,
- * is required so that numerically equal values will produce equal
- * strings.
- *
- * It isn't ideal that numerics are stored in a relatively bulky
- * textual format. However, it's a notationally convenient way of
- * storing a "union" type in the GIN B-Tree, and indexing Jsonb
- * strings takes precedence.
- */
- cstr = numeric_normalize(scalarVal->val.numeric);
- item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
- pfree(cstr);
- break;
- case AGTV_STRING:
- item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
- scalarVal->val.string.val,
- scalarVal->val.string.len);
- break;
- case AGTV_VERTEX:
- case AGTV_EDGE:
- case AGTV_PATH:
- elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
- default:
- elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
- break;
+ case AGTV_NULL:
+ Assert(!is_key);
+ item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
+ break;
+ case AGTV_INTEGER:
+ {
+ char *result;
+
+ Assert(!is_key);
+
+ pg_lltoa(scalarVal->val.int_value, buf);
+
+ result = pstrdup(buf);
+ item = make_text_key(AGT_GIN_FLAG_NUM, result, strlen(result));
+ break;
+ }
+ case AGTV_FLOAT:
+ Assert(!is_key);
+ cstr = float8out_internal(scalarVal->val.float_value);
+ item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
+ break;
+ case AGTV_BOOL:
+ Assert(!is_key);
+ item = make_text_key(AGT_GIN_FLAG_BOOL,
+ scalarVal->val.boolean ? "t" : "f", 1);
+ break;
+ case AGTV_NUMERIC:
+ Assert(!is_key);
+ /*
+ * A normalized textual representation, free of trailing zeroes,
+ * is required so that numerically equal values will produce equal
+ * strings.
+ *
+ * It isn't ideal that numerics are stored in a relatively bulky
+ * textual format. However, it's a notationally convenient way of
+ * storing a "union" type in the GIN B-Tree, and indexing Jsonb
+ * strings takes precedence.
+ */
+ cstr = numeric_normalize(scalarVal->val.numeric);
+ item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
+ pfree(cstr);
+ break;
+ case AGTV_STRING:
+ item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
+ scalarVal->val.string.val,
+ scalarVal->val.string.len);
+ break;
+ case AGTV_VERTEX:
+ case AGTV_EDGE:
+ case AGTV_PATH:
+ elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
+ break;
+ default:
+ elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
+ break;
}
return item;