You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@arrow.apache.org by "Michael Spector (Jira)" <ji...@apache.org> on 2020/11/11 07:48:00 UTC

[jira] [Created] (ARROW-10553) Panic when reading Parquet file produced with parquet-cpp

Michael Spector created ARROW-10553:
---------------------------------------

             Summary: Panic when reading Parquet file produced with parquet-cpp
                 Key: ARROW-10553
                 URL: https://issues.apache.org/jira/browse/ARROW-10553
             Project: Apache Arrow
          Issue Type: Bug
          Components: Rust
    Affects Versions: 2.0.0
         Environment: Windows 10 x86_64
Cargo nightly
            Reporter: Michael Spector


See attached Parquet file that was created with parquet-cpp.

The file metadata is:

 

{color:#dcdfe4}{color}
{color:#dcdfe4}creator: parquet-cpp version 1.5.1-SNAPSHOT

file schema: schema
--------------------------------------------------------------------------------
__sys_isSystemRelocated: OPTIONAL INT64 R:0 D:1
__sys_schemaId: OPTIONAL INT64 R:0 D:1
__sys_invOffsetLSID: OPTIONAL INT64 R:0 D:1
__sys_invOffsetGroupIdx: OPTIONAL INT64 R:0 D:1
__sys_invOffsetRecordIdx: OPTIONAL INT64 R:0 D:1
_rid: OPTIONAL BINARY L:STRING R:0 D:1
__sys_sequenceNumber: OPTIONAL INT64 R:0 D:1
__sys_recordIndex: OPTIONAL INT64 R:0 D:1
__sys_isTombstone: OPTIONAL INT64 R:0 D:1
_ts: OPTIONAL INT64 R:0 D:1
partitionKey: OPTIONAL BINARY L:STRING R:0 D:1
entityType: OPTIONAL BINARY L:STRING R:0 D:1
ttl: OPTIONAL INT64 R:0 D:1
tripId: OPTIONAL INT32 R:0 D:1
vin: OPTIONAL BINARY L:STRING R:0 D:1
state: OPTIONAL BINARY L:STRING R:0 D:1
region: OPTIONAL INT32 R:0 D:1
outsideTemperature: OPTIONAL INT64 R:0 D:1
engineTemperature: OPTIONAL INT64 R:0 D:1
speed: OPTIONAL INT64 R:0 D:1
fuel: OPTIONAL INT64 R:0 D:1
fuelRate: OPTIONAL DOUBLE R:0 D:1
engineoil: OPTIONAL INT64 R:0 D:1
tirepressure: OPTIONAL INT64 R:0 D:1
odometer: OPTIONAL DOUBLE R:0 D:1
accelerator_pedal_position: OPTIONAL INT64 R:0 D:1
parking_brake_status: OPTIONAL BOOLEAN R:0 D:1
brake_pedal_status: OPTIONAL BOOLEAN R:0 D:1
headlamp_status: OPTIONAL BOOLEAN R:0 D:1
transmission_gear_position: OPTIONAL INT64 R:0 D:1
ignition_status: OPTIONAL BOOLEAN R:0 D:1
windshield_wiper_status: OPTIONAL BOOLEAN R:0 D:1
abs: OPTIONAL BOOLEAN R:0 D:1
refrigerationUnitKw: OPTIONAL DOUBLE R:0 D:1
refrigerationUnitTemp: OPTIONAL DOUBLE R:0 D:1
timestamp: OPTIONAL BINARY L:STRING R:0 D:1
id: OPTIONAL BINARY L:STRING R:0 D:1
_etag: OPTIONAL BINARY L:STRING R:0 D:1
__sys_value: OPTIONAL BINARY L:STRING R:0 D:1

row group 1: RC:27150 TS:2481123 OFFSET:4
--------------------------------------------------------------------------------
__sys_isSystemRelocated: INT64 SNAPPY DO:4 FPO:28 SZ:102/98/0.96 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 0, num_nulls: 0]
__sys_schemaId: INT64 SNAPPY DO:205 FPO:220 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]
__sys_invOffsetLSID: INT64 SNAPPY DO:308 FPO:323 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]
__sys_invOffsetGroupIdx: INT64 SNAPPY DO:416 FPO:431 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]
__sys_invOffsetRecordIdx: INT64 SNAPPY DO:528 FPO:543 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]
_rid: BINARY SNAPPY DO:641 FPO:137000 SZ:187417/811272/4.33 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: o9dcAMA1y14+AAAAAAAABA==, max: o9dcAMA1y17zaQAAAAAABA==, num_nulls: 0]
__sys_sequenceNumber: INT64 SNAPPY DO:188156 FPO:296856 SZ:159746/268260/1.68 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 3, max: 27152, num_nulls: 0]
__sys_recordIndex: INT64 SNAPPY DO:348005 FPO:456699 SZ:159740/268260/1.68 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 27149, num_nulls: 0]
__sys_isTombstone: INT64 SNAPPY DO:507845 FPO:507860 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]
_ts: INT64 SNAPPY DO:507954 FPO:510167 SZ:3974/6137/1.54 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 1597365315, max: 1597365859, num_nulls: 0]
partitionKey: BINARY SNAPPY DO:512012 FPO:512256 SZ:13967/14026/1.00 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0A4SMSAGR5CA4LAY6-2020-08, max: YKO1Q8RX7Z20BVBG0-2020-08, num_nulls: 0]
entityType: BINARY SNAPPY DO:526088 FPO:526124 SZ:110/106/0.96 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: VehicleTelemetry, max: VehicleTelemetry, num_nulls: 0]
ttl: INT64 SNAPPY DO:526285 FPO:526309 SZ:102/98/0.96 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 5184000, max: 5184000, num_nulls: 0]
tripId: INT32 SNAPPY DO:526471 FPO:526491 SZ:56/52/0.93 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[no stats for this column]
vin: BINARY SNAPPY DO:526568 FPO:526787 SZ:13926/13930/1.00 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0A4SMSAGR5CA4LAY6, max: YKO1Q8RX7Z20BVBG0, num_nulls: 0]
state: BINARY SNAPPY DO:540578 FPO:540647 SZ:13746/13748/1.00 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: AL, max: WI, num_nulls: 0]
region: INT32 SNAPPY DO:554380 FPO:554400 SZ:56/52/0.93 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[no stats for this column]
outsideTemperature: INT64 SNAPPY DO:554477 FPO:554544 SZ:13776/13801/1.00 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 2, max: 100, num_nulls: 0]
engineTemperature: INT64 SNAPPY DO:568354 FPO:570383 SZ:32711/34701/1.06 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 500, num_nulls: 0]
speed: INT64 SNAPPY DO:601165 FPO:601597 SZ:24326/24713/1.02 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 100, num_nulls: 0]
fuel: INT64 SNAPPY DO:625579 FPO:625766 SZ:20687/20830/1.01 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 39, num_nulls: 0]
fuelRate: DOUBLE SNAPPY DO:646353 FPO:648774 SZ:36497/38895/1.07 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 8.0, max: 14.0, num_nulls: 0]
engineoil: INT64 SNAPPY DO:682941 FPO:683172 SZ:20731/20918/1.01 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 50, num_nulls: 0]
tirepressure: INT64 SNAPPY DO:703764 FPO:703995 SZ:20731/20918/1.01 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 50, num_nulls: 0]
odometer: DOUBLE SNAPPY DO:724590 FPO:762499 SZ:85561/121114/1.42 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 36306.0, max: 209363.94, num_nulls: 0]
accelerator_pedal_position: INT64 SNAPPY DO:810242 FPO:810670 SZ:24322/24705/1.02 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 0, max: 99, num_nulls: 0]
parking_brake_status: BOOLEAN SNAPPY DO:834673 FPO:834673 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
brake_pedal_status: BOOLEAN SNAPPY DO:838189 FPO:838189 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
headlamp_status: BOOLEAN SNAPPY DO:841703 FPO:841703 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
transmission_gear_position: INT64 SNAPPY DO:845214 FPO:845268 SZ:10371/10382/1.00 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 1, max: 7, num_nulls: 0]
ignition_status: BOOLEAN SNAPPY DO:855694 FPO:855694 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
windshield_wiper_status: BOOLEAN SNAPPY DO:859205 FPO:859205 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
abs: BOOLEAN SNAPPY DO:862724 FPO:862724 SZ:3444/3439/1.00 VC:27150 ENC:PLAIN,RLE ST:[min: false, max: true, num_nulls: 0]
refrigerationUnitKw: DOUBLE SNAPPY DO:866223 FPO:870944 SZ:42191/46450/1.10 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 13.07, max: 73.69, num_nulls: 0]
refrigerationUnitTemp: DOUBLE SNAPPY DO:908516 FPO:912705 SZ:38265/42143/1.10 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 15.76, max: 58.86, num_nulls: 0]
timestamp: BINARY SNAPPY DO:946885 FPO:1206643 SZ:310824/916935/2.95 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 2020-08-14T00:35:14.7359902Z, max: 2020-08-14T00:44:19.8774902Z, num_nulls:
 0]
id: BINARY SNAPPY DO:1257823 FPO:2203836 SZ:1029368/1135450/1.10 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: 00015f80-038f-48a3-b95f-74af977b1c50, max: fffffc48-18e2-43e9-ab51-2f166
7286ef2, num_nulls: 0]
_etag: BINARY SNAPPY DO:2287317 FPO:2425082 SZ:196850/1187415/6.03 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[min: "380100df-0000-0700-0000-5f35dc440000", max: "3901ffc6-0000-0700-0000-5f3
5de5b0000", num_nulls: 0]
__sys_value: BINARY SNAPPY DO:2484300 FPO:2484315 SZ:51/48/0.94 VC:27150 ENC:PLAIN,PLAIN_DICTIONARY,RLE ST:[num_nulls: 27150, min/max not defined]{color}
{color:#dcdfe4}{color}

 

I'm using the following Rust code for reading the Parquet file metadata:

[https://github.com/spektom/parquet-rs-test/blob/master/src/main.rs]

 

Here's the exception I'm getting:

 


{color:#dcdfe4}thread 'main' panicked at 'Error when parsing Parquet file: Parquet error: Could not parse metadata: bad data', src\main.rs:20:19
stack backtrace:
 0: backtrace::backtrace::trace_unsynchronized
 at C:\Users\VssAdministrator\.cargo\registry\src\github.com-1ecc6299db9ec823\backtrace-0.3.46\src\backtrace\mod.rs:66
 1: std::sys_common::backtrace::_print_fmt
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\sys_common\backtrace.rs:78
 2: std::sys_common::backtrace::_print::\{{impl}}::fmt
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\sys_common\backtrace.rs:59
 3: core::fmt::write
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libcore\fmt\mod.rs:1069
 4: std::io::Write::write_fmt<std::sys::windows::stdio::Stderr>
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\io\mod.rs:1532
 5: std::sys_common::backtrace::_print
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\sys_common\backtrace.rs:62
 6: std::sys_common::backtrace::print
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\sys_common\backtrace.rs:49
 7: std::panicking::default_hook::\{{closure}}
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:198
 8: std::panicking::default_hook
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:218
 9: std::panicking::rust_panic_with_hook
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:477
 10: std::panicking::begin_panic_handler
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:385
 11: std::panicking::begin_panic_fmt
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:339
 12: parquet_rs_test::main
 at .\src\main.rs:20
 13: std::rt::lang_start::\{{closure}}<()>
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\src\libstd\rt.rs:67
 14: std::rt::lang_start_internal::\{{closure}}
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\rt.rs:52
 15: std::panicking::try::do_call
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:297
 16: std::panicking::try
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panicking.rs:274
 17: std::panic::catch_unwind
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\panic.rs:394
 18: std::rt::lang_start_internal
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\/src\libstd\rt.rs:51
 19: std::rt::lang_start<()>
 at /rustc/65b448273dd280401cd440a6740a7cd891525ba3\src\libstd\rt.rs:67
 20: main
 21: invoke_main
 at D:\agent\_work\9\s\src\vctools\crt\vcstartup\src\startup\exe_common.inl:78
 22: __scrt_common_main_seh
 at D:\agent\_work\9\s\src\vctools\crt\vcstartup\src\startup\exe_common.inl:288
 23: BaseThreadInitThunk
 24: RtlUserThreadStart
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.{color}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)