You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Richard Tia (Jira)" <ji...@apache.org> on 2022/07/05 15:55:00 UTC

[jira] [Created] (ARROW-16980) [Python] Results of running a substrait plan against a tpch data table written into parquet are all null

Richard Tia created ARROW-16980:
-----------------------------------

             Summary: [Python] Results of running a substrait plan against a tpch data table written into parquet are all null
                 Key: ARROW-16980
                 URL: https://issues.apache.org/jira/browse/ARROW-16980
             Project: Apache Arrow
          Issue Type: Bug
          Components: Python
            Reporter: Richard Tia
         Attachments: lineitem.json

SQL
{code:java}
SELECT l_returnflag, l_linestatus FROM lineitem{code}
 

substrait plan type info for l_returnflag:
{code:java}
{
"fixedChar": {
"length": 1,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}{code}
fixedChar is an extension type.

 

Error:
{code:java}
pyarrow/table.pxi:1223: in pyarrow.lib.ChunkedArray.chunks.__get__
    ???
pyarrow/table.pxi:1241: in iterchunks
    ???
pyarrow/table.pxi:1185: in pyarrow.lib.ChunkedArray.chunk
    ???
pyarrow/public-api.pxi:200: in pyarrow.lib.pyarrow_wrap_array
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
>   ???
E   AttributeError: 'pyarrow.lib.BaseExtensionType' object has no attribute '__arrow_ext_class__'

{code}
 

Reproduction Steps:
{code:java}
import pyarrow as pa
import pyarrow.substrait as substrait

from pyarrow import json as pyarrow_json
from pyarrow.lib import tobytes


substrait_query = <code block below>

json_file_path = os.path.join(<path>, 'lineitem.json')
arrow_data_path_ipc = os.path.join(<path>, 'substrait_data.arrow')
substrait_query = tobytes(substrait_query.replace("FILENAME_PLACEHOLDER", arrow_data_path_ipc))


# Save lineitem.json into IPC arrow binary file
table = pyarrow_json.read_json(json_file_path)

with pa.ipc.RecordBatchFileWriter(filepath, schema=table.schema, arrow_data_path_ipc) as writer:
    writer.write_table(table)


# Run the substrait query plan
buf = pa._substrait._parse_json_plan(substrait_query)
reader = substrait.run_query(buf)
result = reader.read_all()

print(result.columns[0].chunks)


{code}
lineitem.json is attached

substrait query plan:
{code:java}
"""
{
  "extensionUris": [],
  "extensions": [],
  "relations": [{
    "root": {
      "input": {
        "project": {
          "common": {
          },
          "input": {
            "read": {
              "common": {
                "direct": {
                }
              },
              "baseSchema": {
                "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
                "struct": {
                  "types": [{
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i64": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "i32": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "decimal": {
                      "scale": 0,
                      "precision": 19,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 1,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 1,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "date": {
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 25,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "fixedChar": {
                      "length": 10,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }, {
                    "varchar": {
                      "length": 44,
                      "typeVariationReference": 0,
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  }],
                  "typeVariationReference": 0,
                  "nullability": "NULLABILITY_REQUIRED"
                }
              },
             "local_files": {
                 "items": [
                 {
                     "uri_file": "file://FILENAME_PLACEHOLDER"
                 }
                 ]
             }
            }
          },
          "expressions": [{
            "selection": {
              "directReference": {
                "structField": {
                  "field": 8
                }
              },
              "rootReference": {
              }
            }
          }, {
            "selection": {
              "directReference": {
                "structField": {
                  "field": 9
                }
              },
              "rootReference": {
              }
            }
          }]
        }
      },
      "names": ["L_RETURNFLAG", "L_LINESTATUS"]
    }
  }],
  "expectedTypeUrls": []
} {code}
 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)