You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@age.apache.org by jo...@apache.org on 2022/07/15 07:21:02 UTC

[age] branch master updated: Fix Bug in integer serialization for integers in GIN

This is an automated email from the ASF dual-hosted git repository.

joshinnis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/age.git


The following commit(s) were added to refs/heads/master by this push:
     new 392a937  Fix Bug in integer serialization for integers in GIN
392a937 is described below

commit 392a937bdfa7c7a869798d7dca93f250c94d42e9
Author: Josh Innis <Jo...@gmail.com>
AuthorDate: Fri Jul 15 16:19:47 2022 +0900

    Fix Bug in integer serialization for integers in GIN
    
    Fixed a bug where integers were not being serialized
    correctly when stored in GIN indices.
    
    Added more regression tests in light of the found issue
---
 regress/expected/index.out         |  87 +++++++++++++++++++++++--------
 regress/sql/index.sql              |  59 ++++++++++++++-------
 src/backend/utils/adt/agtype_gin.c | 102 ++++++++++++++++++++-----------------
 3 files changed, 163 insertions(+), 85 deletions(-)

diff --git a/regress/expected/index.out b/regress/expected/index.out
index 87c316e..a27ca5b 100644
--- a/regress/expected/index.out
+++ b/regress/expected/index.out
@@ -4,6 +4,7 @@ SET search_path TO ag_catalog;
 SET enable_mergejoin = ON;
 SET enable_hashjoin = ON;
 SET enable_nestloop = ON;
+SET enable_seqscan = false;
 SELECT create_graph('cypher_index');
 NOTICE:  graph "cypher_index" has been created
  create_graph 
@@ -227,19 +228,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
  * Section 2: Graphid Indices to Improve Join Performance
  */
 SELECT * FROM cypher('cypher_index', $$
-    CREATE (us:Country {name: "United States"}),
-        (ca:Country {name: "Canada"}),
-        (mx:Country {name: "Mexico"}),
-        (us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
-        (ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
-        (ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
-        (ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
-        (mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
-        (mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
-        (mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
+    CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
+        (ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
+        (mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
+        (us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
+        (ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
+        (ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
+        (ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
+        (mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
+        (mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
+        (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
 $$) as (n agtype);
  n 
 ---
@@ -299,19 +300,63 @@ SET enable_nestloop = ON;
 --
 -- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
 --
-CREATE INDEX load_city_gid_idx
+CREATE INDEX load_city_gin_idx
 ON cypher_index."City" USING gin (properties);
-SELECT COUNT(*) FROM cypher('cypher_index', $$
-    MATCH (c:City {country_code: "AD"})
+CREATE INDEX load_country_gin_idx
+ON cypher_index."Country" USING gin (properties);
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:City {city_id: 1})
     RETURN c
 $$) as (n agtype);
- count 
--------
-     0
+                                                                       n                                                                        
+------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
+(1 row)
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (:Country {country_code: "US"})<-[]-(city:City)
+    RETURN city
+$$) as (n agtype);
+                                                                         n                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
+ {"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
+(4 rows)
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:City {west_coast: true})
+    RETURN c
+$$) as (n agtype);
+                                                                         n                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
+ {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex
+(4 rows)
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:Country {life_expectancy: 82.05})
+    RETURN c
+$$) as (n agtype);
+                                                                               n                                                                               
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1407374883553282, "label": "Country", "properties": {"gdp": 1.643::numeric, "name": "Canada", "country_code": "CA", "life_expectancy": 82.05}}::vertex
+(1 row)
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:Country {gdp: 20.94::numeric})
+    RETURN c
+$$) as (n agtype);
+                                                                                  n                                                                                   
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"id": 1407374883553281, "label": "Country", "properties": {"gdp": 20.94::numeric, "name": "United States", "country_code": "US", "life_expectancy": 78.79}}::vertex
 (1 row)
 
-DROP INDEX load_city_gid_idx;
-ERROR:  index "load_city_gid_idx" does not exist
+DROP INDEX cypher_index.load_city_gin_idx;
+DROP INDEX cypher_index.load_country_gin_idx;
 --
 -- Section 4: Index use with WHERE clause
 --
diff --git a/regress/sql/index.sql b/regress/sql/index.sql
index 0495db0..9e1cd04 100644
--- a/regress/sql/index.sql
+++ b/regress/sql/index.sql
@@ -6,6 +6,7 @@ SET search_path TO ag_catalog;
 SET enable_mergejoin = ON;
 SET enable_hashjoin = ON;
 SET enable_nestloop = ON;
+SET enable_seqscan = false;
 
 SELECT create_graph('cypher_index');
 
@@ -131,19 +132,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
  * Section 2: Graphid Indices to Improve Join Performance
  */
 SELECT * FROM cypher('cypher_index', $$
-    CREATE (us:Country {name: "United States"}),
-        (ca:Country {name: "Canada"}),
-        (mx:Country {name: "Mexico"}),
-        (us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
-        (us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
-        (ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
-        (ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
-        (ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
-        (mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
-        (mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
-        (mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
+    CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
+        (ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
+        (mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
+        (us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
+        (us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
+        (ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
+        (ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
+        (ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
+        (mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
+        (mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
+        (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
 $$) as (n agtype);
 
 ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id);
@@ -201,16 +202,40 @@ SET enable_nestloop = ON;
 --
 -- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
 --
-CREATE INDEX load_city_gid_idx
+CREATE INDEX load_city_gin_idx
 ON cypher_index."City" USING gin (properties);
 
-SELECT COUNT(*) FROM cypher('cypher_index', $$
-    MATCH (c:City {country_code: "AD"})
+CREATE INDEX load_country_gin_idx
+ON cypher_index."Country" USING gin (properties);
+
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:City {city_id: 1})
+    RETURN c
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (:Country {country_code: "US"})<-[]-(city:City)
+    RETURN city
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:City {west_coast: true})
     RETURN c
 $$) as (n agtype);
 
-DROP INDEX load_city_gid_idx;
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:Country {life_expectancy: 82.05})
+    RETURN c
+$$) as (n agtype);
+
+SELECT * FROM cypher('cypher_index', $$
+    MATCH (c:Country {gdp: 20.94::numeric})
+    RETURN c
+$$) as (n agtype);
 
+DROP INDEX cypher_index.load_city_gin_idx;
+DROP INDEX cypher_index.load_country_gin_idx;
 --
 -- Section 4: Index use with WHERE clause
 --
diff --git a/src/backend/utils/adt/agtype_gin.c b/src/backend/utils/adt/agtype_gin.c
index fb28a51..669935c 100644
--- a/src/backend/utils/adt/agtype_gin.c
+++ b/src/backend/utils/adt/agtype_gin.c
@@ -500,53 +500,61 @@ static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key)
     char buf[MAXINT8LEN + 1];
     switch (scalarVal->type)
     {
-        case AGTV_NULL:
-            Assert(!is_key);
-            item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
-            break;
-        case AGTV_INTEGER:
-            Assert(!is_key);
-            pg_lltoa(scalarVal->val.int_value, buf);
-            item = make_text_key(AGT_GIN_FLAG_NUM, buf, MAXINT8LEN + 1);
-            break;
-        case AGTV_FLOAT:
-            Assert(!is_key);
-            cstr = float8out_internal(scalarVal->val.float_value);
-            item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
-            break;
-        case AGTV_BOOL:
-            Assert(!is_key);
-            item = make_text_key(AGT_GIN_FLAG_BOOL,
-                                 scalarVal->val.boolean ? "t" : "f", 1);
-            break;
-        case AGTV_NUMERIC:
-            Assert(!is_key);
-            /*
-             * A normalized textual representation, free of trailing zeroes,
-             * is required so that numerically equal values will produce equal
-             * strings.
-             *
-             * It isn't ideal that numerics are stored in a relatively bulky
-             * textual format.  However, it's a notationally convenient way of
-             * storing a "union" type in the GIN B-Tree, and indexing Jsonb
-             * strings takes precedence.
-             */
-            cstr = numeric_normalize(scalarVal->val.numeric);
-            item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
-            pfree(cstr);
-            break;
-        case AGTV_STRING:
-            item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
-                                 scalarVal->val.string.val,
-                                 scalarVal->val.string.len);
-            break;
-        case AGTV_VERTEX:
-        case AGTV_EDGE:
-        case AGTV_PATH:
-            elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
-        default:
-            elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
-            break;
+    case AGTV_NULL:
+        Assert(!is_key);
+        item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
+        break;
+    case AGTV_INTEGER:
+    {
+        char *result;
+
+        Assert(!is_key);
+
+        pg_lltoa(scalarVal->val.int_value, buf);
+
+        result = pstrdup(buf);
+        item = make_text_key(AGT_GIN_FLAG_NUM, result, strlen(result));
+        break;
+    }
+    case AGTV_FLOAT:
+        Assert(!is_key);
+        cstr = float8out_internal(scalarVal->val.float_value);
+        item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
+        break;
+    case AGTV_BOOL:
+        Assert(!is_key);
+        item = make_text_key(AGT_GIN_FLAG_BOOL,
+                             scalarVal->val.boolean ? "t" : "f", 1);
+        break;
+    case AGTV_NUMERIC:
+        Assert(!is_key);
+        /*
+         * A normalized textual representation, free of trailing zeroes,
+         * is required so that numerically equal values will produce equal
+         * strings.
+         *
+         * It isn't ideal that numerics are stored in a relatively bulky
+         * textual format.  However, it's a notationally convenient way of
+         * storing a "union" type in the GIN B-Tree, and indexing Jsonb
+         * strings takes precedence.
+         */
+        cstr = numeric_normalize(scalarVal->val.numeric);
+        item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
+        pfree(cstr);
+        break;
+    case AGTV_STRING:
+        item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
+                             scalarVal->val.string.val,
+                             scalarVal->val.string.len);
+        break;
+    case AGTV_VERTEX:
+    case AGTV_EDGE:
+    case AGTV_PATH:
+        elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
+        break;
+    default:
+        elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
+        break;
     }
 
     return item;