You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Richard Tia (Jira)" <ji...@apache.org> on 2022/07/05 15:55:00 UTC
[jira] [Created] (ARROW-16980) [Python] Results of running a substrait plan against a tpch data table written into parquet are all null
Richard Tia created ARROW-16980:
-----------------------------------
Summary: [Python] Results of running a substrait plan against a tpch data table written into parquet are all null
Key: ARROW-16980
URL: https://issues.apache.org/jira/browse/ARROW-16980
Project: Apache Arrow
Issue Type: Bug
Components: Python
Reporter: Richard Tia
Attachments: lineitem.json
SQL
{code:java}
SELECT l_returnflag, l_linestatus FROM lineitem{code}
substrait plan type info for l_returnflag:
{code:java}
{
"fixedChar": {
"length": 1,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}{code}
fixedChar is an extension type.
Error:
{code:java}
pyarrow/table.pxi:1223: in pyarrow.lib.ChunkedArray.chunks.__get__
???
pyarrow/table.pxi:1241: in iterchunks
???
pyarrow/table.pxi:1185: in pyarrow.lib.ChunkedArray.chunk
???
pyarrow/public-api.pxi:200: in pyarrow.lib.pyarrow_wrap_array
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E AttributeError: 'pyarrow.lib.BaseExtensionType' object has no attribute '__arrow_ext_class__'
{code}
Reproduction Steps:
{code:java}
import pyarrow as pa
import pyarrow.substrait as substrait
from pyarrow import json as pyarrow_json
from pyarrow.lib import tobytes
substrait_query = <code block below>
json_file_path = os.path.join(<path>, 'lineitem.json')
arrow_data_path_ipc = os.path.join(<path>, 'substrait_data.arrow')
substrait_query = tobytes(substrait_query.replace("FILENAME_PLACEHOLDER", arrow_data_path_ipc))
# Save lineitem.json into IPC arrow binary file
table = pyarrow_json.read_json(json_file_path)
with pa.ipc.RecordBatchFileWriter(filepath, schema=table.schema, arrow_data_path_ipc) as writer:
writer.write_table(table)
# Run the substrait query plan
buf = pa._substrait._parse_json_plan(substrait_query)
reader = substrait.run_query(buf)
result = reader.read_all()
print(result.columns[0].chunks)
{code}
lineitem.json is attached
substrait query plan:
{code:java}
"""
{
"extensionUris": [],
"extensions": [],
"relations": [{
"root": {
"input": {
"project": {
"common": {
},
"input": {
"read": {
"common": {
"direct": {
}
},
"baseSchema": {
"names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
"struct": {
"types": [{
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"i32": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"decimal": {
"scale": 0,
"precision": 19,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"decimal": {
"scale": 0,
"precision": 19,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"decimal": {
"scale": 0,
"precision": 19,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"decimal": {
"scale": 0,
"precision": 19,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"fixedChar": {
"length": 1,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"fixedChar": {
"length": 1,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"date": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"date": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"date": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"fixedChar": {
"length": 25,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"fixedChar": {
"length": 10,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"varchar": {
"length": 44,
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}],
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"local_files": {
"items": [
{
"uri_file": "file://FILENAME_PLACEHOLDER"
}
]
}
}
},
"expressions": [{
"selection": {
"directReference": {
"structField": {
"field": 8
}
},
"rootReference": {
}
}
}, {
"selection": {
"directReference": {
"structField": {
"field": 9
}
},
"rootReference": {
}
}
}]
}
},
"names": ["L_RETURNFLAG", "L_LINESTATUS"]
}
}],
"expectedTypeUrls": []
} {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)