You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2022/08/08 14:31:42 UTC

[hbase] branch branch-2 updated: HBASE-27265 : Tool to read StoreFileTrackerFile (#4673)

This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new a85de838ab3 HBASE-27265 : Tool to read StoreFileTrackerFile (#4673)
a85de838ab3 is described below

commit a85de838ab32b98356cb2b11701d3c5be4f0fd22
Author: Abhradeep Kundu <ab...@gmail.com>
AuthorDate: Mon Aug 8 16:13:06 2022 +0530

    HBASE-27265 : Tool to read StoreFileTrackerFile (#4673)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
    Signed-off-by: Duo Zhang <zh...@apache.org>
---
 bin/hbase                                          |   3 +
 bin/hbase.cmd                                      |   5 +
 .../storefiletracker/StoreFileListFile.java        |  10 +-
 .../StoreFileListFilePrettyPrinter.java            | 227 +++++++++++++++++++++
 .../TestStoreFileListFilePrinter.java              | 168 +++++++++++++++
 5 files changed, 410 insertions(+), 3 deletions(-)

diff --git a/bin/hbase b/bin/hbase
index 029ba4b75da..fa05a67e783 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -83,6 +83,7 @@ show_usage() {
   if [ "${in_omnibus_tarball}" = "true" ]; then
     echo "  wal              Write-ahead-log analyzer"
     echo "  hfile            Store file analyzer"
+    echo "  sft              Store file tracker viewer"
     echo "  zkcli            Run the ZooKeeper shell"
     echo "  master           Run an HBase HMaster node"
     echo "  regionserver     Run an HBase HRegionServer node"
@@ -595,6 +596,8 @@ elif [ "$COMMAND" = "wal" ] ; then
   CLASS='org.apache.hadoop.hbase.wal.WALPrettyPrinter'
 elif [ "$COMMAND" = "hfile" ] ; then
   CLASS='org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter'
+elif [ "$COMMAND" = "sft" ] ; then
+  CLASS='org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileListFilePrettyPrinter'
 elif [ "$COMMAND" = "zkcli" ] ; then
   CLASS="org.apache.hadoop.hbase.zookeeper.ZKMainServer"
   for f in $HBASE_HOME/lib/zkcli/*.jar; do
diff --git a/bin/hbase.cmd b/bin/hbase.cmd
index 240b63c7ec7..2d6604754e4 100644
--- a/bin/hbase.cmd
+++ b/bin/hbase.cmd
@@ -437,6 +437,10 @@ goto :eof
   set CLASS=org.apache.hadoop.hbase.io.hfile.HFile
   goto :eof
 
+:sft
+  set CLASS=org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileListFilePrettyPrinter
+  goto :eof
+
 :zkcli
   set CLASS=org.apache.hadoop.hbase.zookeeper.ZKMainServer
   set CLASSPATH=!CLASSPATH!;%HBASE_HOME%\lib\zkcli\*
@@ -470,6 +474,7 @@ goto :eof
   echo   hbck            Run the hbase 'fsck' tool
   echo   wal             Write-ahead-log analyzer
   echo   hfile           Store file analyzer
+  echo   sft             Store file tracker viewer
   echo   zkcli           Run the ZooKeeper shell
   echo   master          Run an HBase HMaster node
   echo   regionserver    Run an HBase HRegionServer node
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java
index 9328e5efb96..e3d2a182348 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFile.java
@@ -73,7 +73,7 @@ class StoreFileListFile {
 
   private static final char TRACK_FILE_SEPARATOR = '.';
 
-  private static final Pattern TRACK_FILE_PATTERN = Pattern.compile("^f(1|2)\\.\\d+$");
+  static final Pattern TRACK_FILE_PATTERN = Pattern.compile("^f(1|2)\\.\\d+$");
 
   // 16 MB, which is big enough for a tracker file
   private static final int MAX_FILE_SIZE = 16 * 1024 * 1024;
@@ -94,8 +94,7 @@ class StoreFileListFile {
     trackFileDir = new Path(ctx.getFamilyStoreDirectoryPath(), TRACK_FILE_DIR);
   }
 
-  private StoreFileList load(Path path) throws IOException {
-    FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
+  static StoreFileList load(FileSystem fs, Path path) throws IOException {
     byte[] data;
     int expectedChecksum;
     try (FSDataInputStream in = fs.open(path)) {
@@ -118,6 +117,11 @@ class StoreFileListFile {
     return StoreFileList.parseFrom(data);
   }
 
+  StoreFileList load(Path path) throws IOException {
+    FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
+    return load(fs, path);
+  }
+
   private int select(StoreFileList[] lists) {
     if (lists[0] == null) {
       return 1;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java
new file mode 100644
index 00000000000..9338f2f6332
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileListFilePrettyPrinter.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.storefiletracker;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.yetus.audience.InterfaceStability;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList;
+
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+@InterfaceStability.Evolving
+public class StoreFileListFilePrettyPrinter extends Configured implements Tool {
+  private static final Logger LOG = LoggerFactory.getLogger(StoreFileListFilePrettyPrinter.class);
+
+  private Options options = new Options();
+
+  private final String fileOption = "f";
+  private final String columnFamilyOption = "cf";
+  private final String regionOption = "r";
+  private final String tableNameOption = "t";
+
+  private String namespace;
+  private String regionName;
+  private String columnFamily;
+  private String tableName;
+  private Path path;
+  private PrintStream err = System.err;
+  private PrintStream out = System.out;
+
+  public StoreFileListFilePrettyPrinter() {
+    super();
+    init();
+  }
+
+  public StoreFileListFilePrettyPrinter(Configuration conf) {
+    super(conf);
+    init();
+  }
+
+  private void init() {
+    OptionGroup files = new OptionGroup();
+    options.addOption(new Option(tableNameOption, "table", true,
+      "Table to scan. Pass table name; e.g. test_table"));
+    options.addOption(new Option(columnFamilyOption, "columnfamily", true,
+      "column family to scan. Pass column family name; e.g. f"));
+    files.addOption(new Option(regionOption, "region", true,
+      "Region to scan. Pass region name; e.g. '3d58e9067bf23e378e68c071f3dd39eb'"));
+    files.addOption(new Option(fileOption, "file", true,
+      "File to scan. Pass full-path; e.g. /root/hbase-3.0.0-alpha-4-SNAPSHOT/hbase-data/"
+        + "data/default/tbl-sft/093fa06bf84b3b631007f951a14b8457/f/.filelist/f2.1655139542249"));
+    options.addOptionGroup(files);
+  }
+
+  public boolean parseOptions(String[] args) throws ParseException, IOException {
+    HelpFormatter formatter = new HelpFormatter();
+    if (args.length == 0) {
+      formatter
+        .printHelp("sft [--file=</path/to/tracker/file> | --table=<namespace:tablename|tablename>"
+          + " --region=<regionname> [--columnFamily=<columnfamily>] ]", options, true);
+      return false;
+    }
+
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd = parser.parse(options, args);
+
+    if (cmd.hasOption(fileOption)) {
+      path = new Path(cmd.getOptionValue(fileOption));
+    } else {
+      regionName = cmd.getOptionValue(regionOption);
+      if (StringUtils.isEmpty(regionName)) {
+        err.println("Region name is not specified.");
+        formatter.printHelp("sft [--file=</path/to/tracker/file> | --table=<namespace:tablename|"
+          + "tablename> --region=<regionname> [--columnFamily=<columnfamily>] ]", options, true);
+        System.exit(1);
+      }
+      columnFamily = cmd.getOptionValue(columnFamilyOption);
+      if (StringUtils.isEmpty(columnFamily)) {
+        err.println("Column family is not specified.");
+        formatter.printHelp("sft [--file=</path/to/tracker/file> | --table=<namespace:tablename|"
+          + "tablename> --region=<regionname> [--columnFamily=<columnfamily>] ]", options, true);
+        System.exit(1);
+      }
+      String tableNameWtihNS = cmd.getOptionValue(tableNameOption);
+      if (StringUtils.isEmpty(tableNameWtihNS)) {
+        err.println("Table name is not specified.");
+        formatter.printHelp("sft [--file=</path/to/tracker/file> | --table=<namespace:tablename|"
+          + "tablename> --region=<regionname> [--columnFamily=<columnfamily>] ]", options, true);
+        System.exit(1);
+      }
+      TableName tn = TableName.valueOf(tableNameWtihNS);
+      namespace = tn.getNamespaceAsString();
+      tableName = tn.getNameAsString();
+    }
+    return true;
+  }
+
+  public int run(String[] args) {
+    if (getConf() == null) {
+      throw new RuntimeException("A Configuration instance must be provided.");
+    }
+    boolean pass = true;
+    try {
+      CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf()));
+      if (!parseOptions(args)) {
+        return 1;
+      }
+    } catch (IOException ex) {
+      LOG.error("Error parsing command-line options", ex);
+      return 1;
+    } catch (ParseException ex) {
+      LOG.error("Error parsing command-line options", ex);
+      return 1;
+    }
+    FileSystem fs = null;
+    if (path != null) {
+      try {
+        fs = path.getFileSystem(getConf());
+        if (fs.isDirectory(path)) {
+          err.println("ERROR, wrong path given: " + path);
+          return 2;
+        }
+        return print(fs, path);
+      } catch (IOException e) {
+        LOG.error("Error reading " + path, e);
+        return 2;
+      }
+    } else {
+      try {
+        Path root = CommonFSUtils.getRootDir(getConf());
+        Path baseDir = new Path(root, HConstants.BASE_NAMESPACE_DIR);
+        Path nameSpacePath = new Path(baseDir, namespace);
+        Path tablePath = new Path(nameSpacePath, tableName);
+        Path regionPath = new Path(tablePath, regionName);
+        Path cfPath = new Path(regionPath, columnFamily);
+        Path sftPath = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR);
+
+        fs = FileSystem.newInstance(regionPath.toUri(), getConf());
+
+        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(sftPath, false);
+
+        while (iterator.hasNext()) {
+          LocatedFileStatus lfs = iterator.next();
+          if (
+            lfs.isFile()
+              && StoreFileListFile.TRACK_FILE_PATTERN.matcher(lfs.getPath().getName()).matches()
+          ) {
+            out.println("Printing contents for file " + lfs.getPath().toString());
+            int ret = print(fs, lfs.getPath());
+            if (ret != 0) {
+              pass = false;
+            }
+          }
+        }
+      } catch (IOException e) {
+        LOG.error("Error processing " + e);
+        return 2;
+      }
+    }
+    return pass ? 0 : 2;
+  }
+
+  private int print(FileSystem fs, Path path) throws IOException {
+    try {
+      if (!fs.exists(path)) {
+        err.println("ERROR, file doesnt exist: " + path);
+        return 2;
+      }
+    } catch (IOException e) {
+      err.println("ERROR, reading file: " + path + e);
+      return 2;
+    }
+    StoreFileList storeFile = StoreFileListFile.load(fs, path);
+    int end = storeFile.getStoreFileCount();
+    for (int i = 0; i < end; i++) {
+      out.println(storeFile.getStoreFile(i).getName());
+    }
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    Configuration conf = HBaseConfiguration.create();
+    int ret = ToolRunner.run(conf, new StoreFileListFilePrettyPrinter(), args);
+    System.exit(ret);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java
new file mode 100644
index 00000000000..ae44f86f3e4
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/storefiletracker/TestStoreFileListFilePrinter.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.storefiletracker;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.TableNameTestRule;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
+
+@Category({ RegionServerTests.class, MediumTests.class })
+public class TestStoreFileListFilePrinter {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestStoreFileListFilePrinter.class);
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  @Rule
+  public final TableNameTestRule tableName = new TableNameTestRule();
+  public static byte[] family = Bytes.toBytes("F");;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testPrintWithDirectPath() throws IOException {
+    createTable();
+    TableName tn = tableName.getTableName();
+    String fileName = getStoreFileName(tn, family);
+
+    String cf = new String(family);
+
+    Configuration conf = UTIL.getConfiguration();
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    PrintStream ps = new PrintStream(stream);
+    System.setOut(ps);
+    StoreFileListFilePrettyPrinter sftPrinter = new StoreFileListFilePrettyPrinter(conf);
+
+    FileSystem fs = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn))
+      .getRegionFileSystem().getFileSystem();
+    Path regionPath = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn))
+      .getRegionFileSystem().getRegionDir();
+    Path cfPath = new Path(regionPath, cf);
+    Path path = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR);
+    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, false);
+    while (iterator.hasNext()) {
+      LocatedFileStatus lfs = iterator.next();
+      if (lfs.getPath().getName().contains("f2") || lfs.getPath().getName().contains("f1")) {
+        String[] argsF = { "-f", lfs.getPath().toString() };
+        sftPrinter.run(argsF);
+        String result = new String(stream.toByteArray());
+        String expect = fileName + "\n";
+        assertEquals(expect, result);
+      }
+    }
+  }
+
+  @Test
+  public void testPrintWithRegionOption() throws IOException {
+    createTable();
+    String cf = new String(family);
+    TableName tn = tableName.getTableName();
+    String fileName = getStoreFileName(tn, family);
+
+    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName.getTableName());
+    String rn = regions.get(0).getRegionInfo().getEncodedName();
+    String table = tableName.getTableName().toString();
+
+    Configuration conf = UTIL.getConfiguration();
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    PrintStream ps = new PrintStream(stream);
+    System.setOut(ps);
+    StoreFileListFilePrettyPrinter sftPrinter = new StoreFileListFilePrettyPrinter(conf);
+    String[] args = { "-r", rn, "-t", table, "-cf", cf };
+    sftPrinter.run(args);
+    String result = new String(stream.toByteArray());
+
+    FileSystem fs = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn))
+      .getRegionFileSystem().getFileSystem();
+    Path regionPath = Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(tn))
+      .getRegionFileSystem().getRegionDir();
+    Path cfPath = new Path(regionPath, cf);
+    Path path = new Path(cfPath, StoreFileListFile.TRACK_FILE_DIR);
+    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, false);
+    String expect = "";
+    while (iterator.hasNext()) {
+      LocatedFileStatus lfs = iterator.next();
+      if (lfs.getPath().getName().contains("f2") || lfs.getPath().getName().contains("f1")) {
+        expect = expect + "Printing contents for file " + lfs.getPath() + "\n" + fileName + "\n";
+      }
+    }
+    assertEquals(expect, result);
+  }
+
+  private String getStoreFileName(TableName table, byte[] family) {
+    return Iterables
+      .getOnlyElement(Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(table))
+        .getStore(family).getStorefiles())
+      .getPath().getName();
+  }
+
+  private void createTable() throws IOException {
+    TableName tn = tableName.getTableName();
+    byte[] row = Bytes.toBytes("row");
+    byte[] qualifier = Bytes.toBytes("qualifier");
+    byte[] value = Bytes.toBytes("value");
+    TableDescriptor td = TableDescriptorBuilder.newBuilder(tn)
+      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family))
+      .setValue(StoreFileTrackerFactory.TRACKER_IMPL, StoreFileTrackerFactory.Trackers.FILE.name())
+      .build();
+    UTIL.getAdmin().createTable(td);
+    try (Table table = UTIL.getConnection().getTable(tn)) {
+      table.put(new Put(row).addColumn(family, qualifier, value));
+    }
+    UTIL.flush(tn);
+  }
+}