You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ozone.apache.org by "Stephen O'Donnell (Jira)" <ji...@apache.org> on 2023/01/16 13:23:00 UTC
[jira] [Updated] (HDDS-7787) EC: GetChecksum for EC files can fail intermittently with IndexOutOfBounds exception
[ https://issues.apache.org/jira/browse/HDDS-7787?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stephen O'Donnell updated HDDS-7787:
------------------------------------
Summary: EC: GetChecksum for EC files can fail intermittently with IndexOutOfBounds exception (was: GetChecksum for EC files can fail intermittently with IndexOutOfBounds exception)
> EC: GetChecksum for EC files can fail intermittently with IndexOutOfBounds exception
> ------------------------------------------------------------------------------------
>
> Key: HDDS-7787
> URL: https://issues.apache.org/jira/browse/HDDS-7787
> Project: Apache Ozone
> Issue Type: Sub-task
> Components: Ozone Datanode
> Reporter: Varsha Ravi
> Priority: Major
> Labels: pull-request-available
>
> When calculating a checksum for an EC file with Rack Topology enabled, you can get the following error intermittently:
> {code}
> ERROR : Failed with exception null
> java.lang.IndexOutOfBoundsException
> at java.nio.ByteBuffer.wrap(ByteBuffer.java:375)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.computeCompositeCrc(ECBlockChecksumComputer.java:163)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.compute(ECBlockChecksumComputer.java:65)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.getBlockChecksumFromChunkChecksums(ECFileChecksumHelper.java:148)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlock(ECFileChecksumHelper.java:106)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlocks(ECFileChecksumHelper.java:73)
> at org.apache.hadoop.ozone.client.checksum.BaseFileChecksumHelper.compute(BaseFileChecksumHelper.java:220)
> at org.apache.hadoop.fs.ozone.OzoneClientUtils.getFileChecksumWithCombineMode(OzoneClientUtils.java:223)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.getFileChecksum(BasicRootedOzoneClientAdapterImpl.java:1123)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.getFileChecksum(BasicRootedOzoneFileSystem.java:955)
> at org.apache.hadoop.fs.FileSystem.getFileChecksum(FileSystem.java:2831)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertNonDirectoryInformation(Hive.java:3659)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertFileInformation(Hive.java:3632)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3578)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3563)
> at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:3224)
> at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:418)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:769)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:504)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:498)
> at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:226)
> at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:88)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:327)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:345)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> ERROR : FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.exec.MoveTask. java.lang.IndexOutOfBoundsException
> at java.nio.ByteBuffer.wrap(ByteBuffer.java:375)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.computeCompositeCrc(ECBlockChecksumComputer.java:163)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.compute(ECBlockChecksumComputer.java:65)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.getBlockChecksumFromChunkChecksums(ECFileChecksumHelper.java:148)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlock(ECFileChecksumHelper.java:106)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlocks(ECFileChecksumHelper.java:73)
> at org.apache.hadoop.ozone.client.checksum.BaseFileChecksumHelper.compute(BaseFileChecksumHelper.java:220)
> at org.apache.hadoop.fs.ozone.OzoneClientUtils.getFileChecksumWithCombineMode(OzoneClientUtils.java:223)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.getFileChecksum(BasicRootedOzoneClientAdapterImpl.java:1123)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.getFileChecksum(BasicRootedOzoneFileSystem.java:955)
> at org.apache.hadoop.fs.FileSystem.getFileChecksum(FileSystem.java:2831)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertNonDirectoryInformation(Hive.java:3659)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertFileInformation(Hive.java:3632)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3578)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3563)
> at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:3224)
> at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:418)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:769)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:504)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:498)
> at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:226)
> at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:88)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:327)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:345)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> INFO : Completed executing command(queryId=hive_20221214035652_bc45477d-98df-408e-b945-a63b4ac6896a); Time taken: 22.167 seconds
> INFO : OK
> Error: Error while compiling statement: FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.exec.MoveTask. java.lang.IndexOutOfBoundsException
> at java.nio.ByteBuffer.wrap(ByteBuffer.java:375)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.computeCompositeCrc(ECBlockChecksumComputer.java:163)
> at org.apache.hadoop.ozone.client.checksum.ECBlockChecksumComputer.compute(ECBlockChecksumComputer.java:65)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.getBlockChecksumFromChunkChecksums(ECFileChecksumHelper.java:148)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlock(ECFileChecksumHelper.java:106)
> at org.apache.hadoop.ozone.client.checksum.ECFileChecksumHelper.checksumBlocks(ECFileChecksumHelper.java:73)
> at org.apache.hadoop.ozone.client.checksum.BaseFileChecksumHelper.compute(BaseFileChecksumHelper.java:220)
> at org.apache.hadoop.fs.ozone.OzoneClientUtils.getFileChecksumWithCombineMode(OzoneClientUtils.java:223)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.getFileChecksum(BasicRootedOzoneClientAdapterImpl.java:1123)
> at org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.getFileChecksum(BasicRootedOzoneFileSystem.java:955)
> at org.apache.hadoop.fs.FileSystem.getFileChecksum(FileSystem.java:2831)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertNonDirectoryInformation(Hive.java:3659)
> at org.apache.hadoop.hive.ql.metadata.Hive.addInsertFileInformation(Hive.java:3632)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3578)
> at org.apache.hadoop.hive.ql.metadata.Hive.addWriteNotificationLog(Hive.java:3563)
> at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:3224)
> at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:418)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:769)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:504)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:498)
> at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:226)
> at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:88)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:327)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:345)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748) (state=08S01,code=40000){noformat}
> {code}
> This is because the wrong nodes are used to obtain the stripe checksum sometimes as the node does not correctly use the replicaIndex in the pipeline to order the nodes.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@ozone.apache.org
For additional commands, e-mail: issues-help@ozone.apache.org