You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Alexey Kudinkin (Jira)" <ji...@apache.org> on 2022/01/25 21:58:00 UTC

[jira] [Created] (HUDI-3322) Rollback of Delta Commits performed incorrectly for MOR tables

Alexey Kudinkin created HUDI-3322:
-------------------------------------

             Summary: Rollback of Delta Commits performed incorrectly for MOR tables
                 Key: HUDI-3322
                 URL: https://issues.apache.org/jira/browse/HUDI-3322
             Project: Apache Hudi
          Issue Type: Bug
            Reporter: Alexey Kudinkin


Diving deeper into the issue of HUDI-3279, i've realized that the root-cause of the problem is actually a Rollback of Delta Commits that is performed incorrectly for MOR tables. Consider the case below (we will continue to rely on test of {{{}TestHoodieSparkMergeOnReadTableRollback#testMORTableRestore{}}}):

Hoodie Timeline:
{code:java}
alexey.kudinkin@alexeys-mbp junit5494198038159268501 % ls -la .hoodie
total 400
drwxr-xr-x  52 alexey.kudinkin  staff  1664 Jan 25 13:08 .
drwx------   5 alexey.kudinkin  staff   160 Jan 25 12:56 ..
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:56 .001.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    28 Jan 25 12:56 .001.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:56 .001.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    52 Jan 25 12:56 .002.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:56 .002.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:56 .002.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    56 Jan 25 12:57 .003.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:57 .003.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:56 .003.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    56 Jan 25 12:57 .004.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:57 .004.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:57 .004.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:57 .005.commit.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:57 .005.compaction.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff    28 Jan 25 12:57 .005.compaction.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    52 Jan 25 12:57 .006.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:57 .006.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:57 .006.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff    52 Jan 25 12:57 .007.deltacommit.crc
-rw-r--r--   1 alexey.kudinkin  staff    48 Jan 25 12:57 .007.deltacommit.inflight.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 12:57 .007.deltacommit.requested.crc
-rw-r--r--   1 alexey.kudinkin  staff     8 Jan 25 13:08 .20220125130818473.restore.inflight.crc
drwxr-xr-x   5 alexey.kudinkin  staff   160 Jan 25 12:57 .aux
-rw-r--r--   1 alexey.kudinkin  staff    12 Jan 25 12:56 .hoodie.properties.crc
drwxr-xr-x   2 alexey.kudinkin  staff    64 Jan 25 12:57 .temp
-rw-r--r--   1 alexey.kudinkin  staff  4822 Jan 25 12:56 001.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  2499 Jan 25 12:56 001.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:56 001.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff  5451 Jan 25 12:56 002.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  4620 Jan 25 12:56 002.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:56 002.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff  5646 Jan 25 12:57 003.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  4620 Jan 25 12:57 003.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:56 003.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff  5835 Jan 25 12:57 004.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  4620 Jan 25 12:57 004.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:57 004.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff  4756 Jan 25 12:57 005.commit
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:57 005.compaction.inflight
-rw-r--r--   1 alexey.kudinkin  staff  2507 Jan 25 12:57 005.compaction.requested
-rw-r--r--   1 alexey.kudinkin  staff  5362 Jan 25 12:57 006.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  4620 Jan 25 12:57 006.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:57 006.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff  5551 Jan 25 12:57 007.deltacommit
-rw-r--r--   1 alexey.kudinkin  staff  4620 Jan 25 12:57 007.deltacommit.inflight
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 12:57 007.deltacommit.requested
-rw-r--r--   1 alexey.kudinkin  staff     0 Jan 25 13:08 20220125130818473.restore.inflight
drwxr-xr-x   2 alexey.kudinkin  staff    64 Jan 25 12:56 archived
-rw-r--r--   1 alexey.kudinkin  staff   347 Jan 25 12:56 hoodie.properties
drwxr-xr-x   4 alexey.kudinkin  staff   128 Jan 25 12:56 metadata {code}
Commit "007" metadata:
{code:java}
alexey.kudinkin@alexeys-mbp junit5494198038159268501 % cat .hoodie/007.deltacommit
{
  "partitionToWriteStats" : {
    "2016/03/15" : [ {
      "fileId" : "a685909e-14e8-4adf-b6a2-e0b669e89e61-0",
      "path" : "2016/03/15/.a685909e-14e8-4adf-b6a2-e0b669e89e61-0_005.log.2_0-294-471",
      "prevCommit" : "005",
      "numWrites" : 57,
      "numDeletes" : 0,
      "numUpdateWrites" : 57,
      "numInserts" : 0,
      "totalWriteBytes" : 18054,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : "2016/03/15",
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 18054,
      "minEventTime" : null,
      "maxEventTime" : null,
      "logVersion" : 2,
      "logOffset" : 0,
      "baseFile" : "a685909e-14e8-4adf-b6a2-e0b669e89e61-0_2-200-323_005.parquet",
      "logFiles" : [ ".a685909e-14e8-4adf-b6a2-e0b669e89e61-0_005.log.1_0-247-398", ".a685909e-14e8-4adf-b6a2-e0b669e89e61-0_005.log.2_0-294-471" ]
    } ],
    "2015/03/16" : [ {
      "fileId" : "0ac88bb7-ebd4-4384-94f1-593e6827a868-0",
      "path" : "2015/03/16/.0ac88bb7-ebd4-4384-94f1-593e6827a868-0_005.log.2_1-294-472",
      "prevCommit" : "005",
      "numWrites" : 69,
      "numDeletes" : 0,
      "numUpdateWrites" : 69,
      "numInserts" : 0,
      "totalWriteBytes" : 21457,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : "2015/03/16",
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 21457,
      "minEventTime" : null,
      "maxEventTime" : null,
      "logVersion" : 2,
      "logOffset" : 0,
      "baseFile" : "0ac88bb7-ebd4-4384-94f1-593e6827a868-0_1-200-322_005.parquet",
      "logFiles" : [ ".0ac88bb7-ebd4-4384-94f1-593e6827a868-0_005.log.1_1-247-399", ".0ac88bb7-ebd4-4384-94f1-593e6827a868-0_005.log.2_1-294-472" ]
    } ],
    "2015/03/17" : [ {
      "fileId" : "52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0",
      "path" : "2015/03/17/.52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0_005.log.2_2-294-473",
      "prevCommit" : "005",
      "numWrites" : 74,
      "numDeletes" : 0,
      "numUpdateWrites" : 74,
      "numInserts" : 0,
      "totalWriteBytes" : 22889,
      "totalWriteErrors" : 0,
      "tempPath" : null,
      "partitionPath" : "2015/03/17",
      "totalLogRecords" : 0,
      "totalLogFilesCompacted" : 0,
      "totalLogSizeCompacted" : 0,
      "totalUpdatedRecordsCompacted" : 0,
      "totalLogBlocks" : 0,
      "totalCorruptLogBlock" : 0,
      "totalRollbackBlocks" : 0,
      "fileSizeInBytes" : 22889,
      "minEventTime" : null,
      "maxEventTime" : null,
      "logVersion" : 2,
      "logOffset" : 0,
      "baseFile" : "52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0_0-200-321_005.parquet",
      "logFiles" : [ ".52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0_005.log.1_2-247-400", ".52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0_005.log.2_2-294-473" ]
    } ]
  },
  "compacted" : false,
  "extraMetadata" : {
    "schema" : "{\"type\":\"record\",\"name\":\"triprec\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"long\"},{\"name\":\"_row_key\",\"type\":\"string\"},{\"name\":\"partition_path\",\"type\":\"string\"},{\"name\":\"rider\",\"type\":\"string\"},{\"name\":\"driver\",\"type\":\"string\"},{\"name\":\"begin_lat\",\"type\":\"double\"},{\"name\":\"begin_lon\",\"type\":\"double\"},{\"name\":\"end_lat\",\"type\":\"double\"},{\"name\":\"end_lon\",\"type\":\"double\"},{\"name\":\"distance_in_meters\",\"type\":\"int\"},{\"name\":\"seconds_since_epoch\",\"type\":\"long\"},{\"name\":\"weight\",\"type\":\"float\"},{\"name\":\"nation\",\"type\":\"bytes\"},{\"name\":\"current_date\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"current_ts\",\"type\":\"long\"},{\"name\":\"height\",\"type\":{\"type\":\"fixed\",\"name\":\"abc\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":6}},{\"name\":\"city_to_state\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"fare\",\"type\":{\"type\":\"record\",\"name\":\"fare\",\"fields\":[{\"name\":\"amount\",\"type\":\"double\"},{\"name\":\"currency\",\"type\":\"string\"}]}},{\"name\":\"tip_history\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"tip_history\",\"fields\":[{\"name\":\"amount\",\"type\":\"double\"},{\"name\":\"currency\",\"type\":\"string\"}],\"default\":null},\"default\":[]},\"default\":[]},{\"name\":\"_hoodie_is_deleted\",\"type\":\"boolean\",\"default\":false}]}"
  },
  "operationType" : "UPSERT",
  "totalLogFilesCompacted" : 0,
  "fileIdAndRelativePaths" : {
    "52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0" : "2015/03/17/.52b88ea6-09eb-4ab9-9f76-bfbf7cd5362f-0_005.log.2_2-294-473",
    "0ac88bb7-ebd4-4384-94f1-593e6827a868-0" : "2015/03/16/.0ac88bb7-ebd4-4384-94f1-593e6827a868-0_005.log.2_1-294-472",
    "a685909e-14e8-4adf-b6a2-e0b669e89e61-0" : "2016/03/15/.a685909e-14e8-4adf-b6a2-e0b669e89e61-0_005.log.2_0-294-471"
  },
  "totalLogRecordsCompacted" : 0,
  "totalCompactedRecordsUpdated" : 0,
  "totalRecordsDeleted" : 0,
  "totalLogFilesSize" : 0,
  "totalScanTime" : 0,
  "totalCreateTime" : 0,
  "totalUpsertTime" : 139,
  "minAndMaxEventTime" : {
    "Optional.empty" : {
      "val" : null,
      "present" : false
    }
  },
  "writePartitionPaths" : [ "2016/03/15", "2015/03/16", "2015/03/17" ]
}% {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)