You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2020/03/26 01:10:15 UTC

[incubator-pinot] branch master updated: Add a simple PinotFS benchmark driver (#5160)

This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ed235d2  Add a simple PinotFS benchmark driver (#5160)
ed235d2 is described below

commit ed235d208d16d1e2a3f03c461f40bbc0df379f6c
Author: Seunghyun Lee <sn...@linkedin.com>
AuthorDate: Wed Mar 25 18:10:07 2020 -0700

    Add a simple PinotFS benchmark driver (#5160)
    
    * Add a simple PinotFS benchmark driver
    
    This benchmark tests the performance of pinotFS operations.
    
    * addressing comments
---
 .../org/apache/pinot/tools/PinotToolLauncher.java  |  10 +-
 .../tools/filesystem/PinotFSBenchmarkDriver.java   | 249 +++++++++++++++++++++
 .../tools/filesystem/PinotFSBenchmarkRunner.java   |  79 +++++++
 .../conf/sample_filesystem_benchmark.conf          |   5 +
 4 files changed, 342 insertions(+), 1 deletion(-)

diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/PinotToolLauncher.java b/pinot-tools/src/main/java/org/apache/pinot/tools/PinotToolLauncher.java
index 1cea9d1..ac8de23 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/PinotToolLauncher.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/PinotToolLauncher.java
@@ -20,6 +20,7 @@ package org.apache.pinot.tools;
 
 import java.lang.reflect.Field;
 import org.apache.pinot.spi.plugin.PluginManager;
+import org.apache.pinot.tools.filesystem.PinotFSBenchmarkRunner;
 import org.apache.pinot.tools.perf.PerfBenchmarkRunner;
 import org.apache.pinot.tools.perf.QueryRunner;
 import org.kohsuke.args4j.Argument;
@@ -38,7 +39,14 @@ public class PinotToolLauncher {
 
   // @formatter:off
   @Argument(handler = SubCommandHandler.class, metaVar = "<subCommand>")
-  @SubCommands({@SubCommand(name = "UpdateSegmentState", impl = UpdateSegmentState.class), @SubCommand(name = "AutoAddInvertedIndex", impl = AutoAddInvertedIndexTool.class), @SubCommand(name = "ValidateTableRetention", impl = ValidateTableRetention.class), @SubCommand(name = "PerfBenchmarkRunner", impl = PerfBenchmarkRunner.class), @SubCommand(name = "QueryRunner", impl = QueryRunner.class)})
+  @SubCommands({
+      @SubCommand(name = "UpdateSegmentState", impl = UpdateSegmentState.class),
+      @SubCommand(name = "AutoAddInvertedIndex", impl = AutoAddInvertedIndexTool.class),
+      @SubCommand(name = "ValidateTableRetention", impl = ValidateTableRetention.class),
+      @SubCommand(name = "PerfBenchmarkRunner", impl = PerfBenchmarkRunner.class),
+      @SubCommand(name = "QueryRunner", impl = QueryRunner.class),
+      @SubCommand(name = "PinotFSBenchmarkRunner", impl = PinotFSBenchmarkRunner.class)
+  })
   Command _subCommand;
   // @formatter:on
 
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkDriver.java b/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkDriver.java
new file mode 100644
index 0000000..1be57bc
--- /dev/null
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkDriver.java
@@ -0,0 +1,249 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.tools.filesystem;
+
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.security.MessageDigest;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.spi.filesystem.PinotFS;
+import org.apache.pinot.spi.filesystem.PinotFSFactory;
+import org.apache.pinot.spi.plugin.PluginManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class PinotFSBenchmarkDriver {
+  private static final Logger LOGGER = LoggerFactory.getLogger(PinotFSBenchmarkDriver.class);
+
+  private static final int DEFAULT_NUM_SEGMENTS_FOR_LIST_TEST = 1000;
+  private static final int DEFAULT_DATA_SIZE_IN_MB_FOR_COPY_TEST = 1024; // 1GB
+  private static final int DEFAULT_NUM_OPS = 5; // 5
+
+  private String _mode;
+  private PinotFS _pinotFS;
+  private URI _baseDirectoryUri;
+  private File _localTempDir;
+  private int _numSegmentsForListFilesTest;
+  private int _numOps;
+  private int _dataSizeInMBsForCopyTest;
+
+  public PinotFSBenchmarkDriver(String mode, String configFilePath, String baseDirectoryUri, String localTempDir,
+      Integer numSegmentsForListFilesTest, Integer dataSizeInMBsForCopyTest, Integer numOps) throws ConfigurationException {
+    Configuration configuration = new PropertiesConfiguration(new File(configFilePath));
+    PinotFSFactory.init(configuration);
+    _mode = mode;
+    _baseDirectoryUri = URI.create(baseDirectoryUri);
+    _pinotFS = PinotFSFactory.create(_baseDirectoryUri.getScheme());
+    _localTempDir =
+        (localTempDir != null) ? new File(localTempDir) : new File(FileUtils.getTempDirectory(), "benchmark");
+    _numSegmentsForListFilesTest =
+        (numSegmentsForListFilesTest != null) ? numSegmentsForListFilesTest : DEFAULT_NUM_SEGMENTS_FOR_LIST_TEST;
+    _numOps = (numOps != null) ? numOps : DEFAULT_NUM_OPS;
+    _dataSizeInMBsForCopyTest =
+        (dataSizeInMBsForCopyTest != null) ? dataSizeInMBsForCopyTest : DEFAULT_DATA_SIZE_IN_MB_FOR_COPY_TEST;
+    LOGGER.info("PinotFS has been initialized sucessfully. (mode = {}, pinotFSClass = {}, configFile = {}, "
+            + "baseDirectoryUri = {}, localTempDir = {}, numSegmentsForListFilesTest = {}, "
+            + "dataSizeInMBsForCopyTest = {}, numOps = {})", _mode, _pinotFS.getClass().getSimpleName(), configFilePath,
+        baseDirectoryUri, _localTempDir, _numSegmentsForListFilesTest, _dataSizeInMBsForCopyTest, _numOps);
+  }
+
+  public void run() throws Exception {
+    prepareBenchmark();
+
+    switch (_mode.toUpperCase()) {
+      case "ALL":
+        testListFilesInMultipleDirectories();
+        testListFiles();
+        testCopies();
+        break;
+      case "LISTFILES":
+        testListFiles();
+        break;
+      case "COPY":
+        testCopies();
+        break;
+      default:
+        throw new RuntimeException("Not Supported Mode: " + _mode);
+    }
+    cleanUpBenchmark();
+  }
+
+  private void prepareBenchmark() throws IOException {
+    // Clean up base directory
+    if (_pinotFS.exists(_baseDirectoryUri)) {
+      _pinotFS.delete(_baseDirectoryUri, true);
+    }
+
+    if (_localTempDir.exists()) {
+      _localTempDir.delete();
+    }
+
+    // Set up the base directory
+    _pinotFS.mkdir(_baseDirectoryUri);
+    _localTempDir.mkdir();
+  }
+
+  private void cleanUpBenchmark() throws IOException {
+    _pinotFS.delete(_baseDirectoryUri, true);
+    FileUtils.deleteQuietly(_localTempDir);
+    LOGGER.info("Working directories have been cleaned up successfully. (baseDirectoryUri={}, localTempDir={})",
+        _baseDirectoryUri, _localTempDir);
+  }
+
+  private void testListFilesInMultipleDirectories() throws Exception {
+    LOGGER.info("========= List Files in Multiple Directories ==========");
+    long prepareTime = System.currentTimeMillis();
+    URI listTestUri = combinePath(_baseDirectoryUri, "listTestMultipleFile");
+    _pinotFS.mkdir(listTestUri);
+    LOGGER.info("Created {} for list test...", listTestUri);
+
+    int numSegments = 1;
+    for (int i = 0; i < 5; i++) {
+      String directoryPath = "directory_" + i;
+      File tmpDirectory = new File(_localTempDir.getPath(), directoryPath);
+      URI directoryUri = combinePath(listTestUri, directoryPath);
+      tmpDirectory.mkdir();
+
+      for (int j = 0; j < numSegments; j++) {
+        String relativePath = "segment_" + j;
+        File tmpFile = new File(tmpDirectory, relativePath);
+        tmpFile.createNewFile();
+        _pinotFS.copyFromLocalFile(tmpFile, combinePath(directoryUri, relativePath));
+      }
+      LOGGER.info("Took {} ms to create {} segments for directory_{}",
+          System.currentTimeMillis() - prepareTime, numSegments, i);
+      numSegments *= 10;
+    }
+
+    // reset numSegments
+    numSegments = 1;
+    for (int i = 0; i < 5; i++) {
+      for (int j = 0; j < _numOps; j++) {
+        URI directoryUri = combinePath(listTestUri, "directory_" + i);
+        long listFilesStart = System.currentTimeMillis();
+        String[] lists = _pinotFS.listFiles(directoryUri, true);
+        LOGGER.info("{}: took {} ms to listFiles. directory_{} ({} segments)", j,
+            System.currentTimeMillis() - listFilesStart, i, lists.length);
+        Preconditions.checkState(lists.length == numSegments);
+      }
+      numSegments *= 10;
+    }
+  }
+
+  private void testListFiles() throws Exception {
+    LOGGER.info("========= List Files ==========");
+    long testStartTime = System.currentTimeMillis();
+    URI listTestUri = combinePath(_baseDirectoryUri, "listTest");
+    _pinotFS.mkdir(listTestUri);
+    LOGGER.info("Created {} for list test...", listTestUri);
+
+    for (int i = 0; i < _numSegmentsForListFilesTest; i++) {
+      String relativePath = "segment_" + i;
+      File tmpFile = new File(_localTempDir.getPath(), relativePath);
+      tmpFile.createNewFile();
+      _pinotFS.copyFromLocalFile(tmpFile, combinePath(listTestUri, relativePath));
+    }
+    LOGGER.info("Took {} ms to create {} segments.",
+        System.currentTimeMillis() - testStartTime, _numSegmentsForListFilesTest);
+
+    for (int i = 0; i < _numOps; i++) {
+      long listFilesStart = System.currentTimeMillis();
+      String[] lists = _pinotFS.listFiles(listTestUri, true);
+      LOGGER.info("{}: took {} ms to listFiles.", i, System.currentTimeMillis() - listFilesStart);
+      Preconditions.checkState(lists.length == _numSegmentsForListFilesTest);
+    }
+  }
+
+  private void testCopies() throws Exception {
+    LOGGER.info("\n========= Uploads and Downloads ==========");
+    URI copyTestUri = combinePath(_baseDirectoryUri, "copyFiles");
+    _pinotFS.mkdir(copyTestUri);
+    LOGGER.info("Created {} for copy test...", copyTestUri);
+
+    long fileSizeInBytes = _dataSizeInMBsForCopyTest * 1024 * 1024;
+    File largeTmpFile = createFileWithSize("largeFile", fileSizeInBytes);
+    for (int i = 0; i < _numOps; i++) {
+      URI largeFileDstUri = combinePath(copyTestUri, largeTmpFile.getName() + "_" + i);
+      long copyStart = System.currentTimeMillis();
+      _pinotFS.copyFromLocalFile(largeTmpFile, largeFileDstUri);
+      LOGGER.info("{}: took {} ms to copyFromLocal, fileSize: {} MB.", i, System.currentTimeMillis() - copyStart,
+          _dataSizeInMBsForCopyTest);
+    }
+
+    for (int i = 0; i < _numOps; i++) {
+      URI largeFileSrcUri = combinePath(copyTestUri, largeTmpFile.getName() + "_" + i);
+      File localTmpLargeFile = new File(_localTempDir, "largeFile_" + i);
+      long copyStart = System.currentTimeMillis();
+      _pinotFS.copyToLocalFile(largeFileSrcUri, localTmpLargeFile);
+      LOGGER.info("{}: took {} ms to copyToLocal, fileSize: {} MB.", i, System.currentTimeMillis() - copyStart,
+          _dataSizeInMBsForCopyTest);
+    }
+
+    for (int i = 0; i < _numOps; i++) {
+      URI largeFileSrcUri = combinePath(copyTestUri, largeTmpFile.getName() + "_" + i);
+      URI largeFileDstUri = combinePath(copyTestUri, largeTmpFile.getName() + "_copy_" + i);
+
+      long copyStart = System.currentTimeMillis();
+      _pinotFS.copy(largeFileSrcUri, largeFileDstUri);
+      LOGGER.info("{}: took {} ms to copy, fileSize: {} MB.", i, System.currentTimeMillis() - copyStart,
+          _dataSizeInMBsForCopyTest);
+    }
+
+    for (int i = 0; i < _numOps; i++) {
+      URI largeFileSrcUri = combinePath(copyTestUri, largeTmpFile.getName() + "_copy_" + i);
+      URI largeFileDstUri = combinePath(copyTestUri, largeTmpFile.getName() + "_rename_" + i);
+
+      long renameStart = System.currentTimeMillis();
+      _pinotFS.move(largeFileSrcUri, largeFileDstUri, true);
+      LOGGER.info("{}: took {} ms to rename, fileSize: {} MB.", i, System.currentTimeMillis() - renameStart,
+          _dataSizeInMBsForCopyTest);
+    }
+
+    for (int i = 0; i < _numOps; i++) {
+      URI largeFileDstUri = combinePath(copyTestUri, largeTmpFile.getName() + "_" + i);
+      long deleteStart = System.currentTimeMillis();
+      _pinotFS.delete(largeFileDstUri, true);
+      LOGGER.info("{}: took {} ms to delete, fileSize: {} MB.", i, System.currentTimeMillis() - deleteStart,
+          _dataSizeInMBsForCopyTest);
+    }
+  }
+
+  private File createFileWithSize(String fileName, long sizeInBytes) throws IOException {
+    File tmpLargeFile = new File(_localTempDir, fileName);
+    tmpLargeFile.createNewFile();
+    RandomAccessFile raf = new RandomAccessFile(tmpLargeFile, "rw");
+    raf.setLength(sizeInBytes);
+    raf.close();
+    return tmpLargeFile;
+  }
+
+  private URI combinePath(URI baseUri, String path) throws URISyntaxException {
+    return new URI(baseUri.getScheme(), baseUri.getHost(), baseUri.getPath() + File.separator + path, null);
+  }
+}
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkRunner.java b/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkRunner.java
new file mode 100644
index 0000000..ab76d4a
--- /dev/null
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/filesystem/PinotFSBenchmarkRunner.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.tools.filesystem;
+
+import org.apache.pinot.tools.AbstractBaseCommand;
+import org.apache.pinot.tools.Command;
+import org.kohsuke.args4j.Option;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class PinotFSBenchmarkRunner extends AbstractBaseCommand implements Command {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(PinotFSBenchmarkRunner.class);
+
+  @Option(name = "-mode", required = true, metaVar = "<String>", usage = "Test mode. (ALL|LISTFILES|READWRITE|DELETE|RENAME)")
+  private String _mode;
+
+  @Option(name = "-pinotFSConfigFile", required = true, metaVar = "<String>", usage = "Path for PinotFS configuration file")
+  private String _pinotFSConfigFile;
+
+  @Option(name = "-baseDirectoryUri", required = true, metaVar = "<String>", usage = "Temp directory path for running benchmark against. e.g. file:///path/to/test, abfss://host/path...")
+  private String _baseDirectoryUri;
+
+  @Option(name = "-localTempDir", required = false, metaVar = "<String>", usage = "Local temp directory for benchmark.")
+  private String _localTempDir;
+
+  @Option(name = "-numSegmentsForListTest", required = false, metaVar = "<Integer>", usage = "The number of segments to create before running listFiles test.")
+  private Integer _numSegmentsForListTest;
+
+  @Option(name = "-dataSizeInMBsForCopyTest", required = false, metaVar = "<Integer>", usage = "Data size in MB for copy test. (e.g. 1024 = 1GB)")
+  private Integer _dataSizeInMBsForCopyTest;
+
+  @Option(name = "-numOps", required = false, metaVar = "<Integer>", usage = "The number of trials of operations when running a benchmark.")
+  private Integer _numOps;
+
+  @Option(name = "-help", required = false, help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.")
+  private boolean _help = false;
+
+  @Override
+  public boolean execute() throws Exception {
+    try {
+      LOGGER.info("Run filesystem benchmark...");
+      PinotFSBenchmarkDriver driver = new PinotFSBenchmarkDriver(_mode, _pinotFSConfigFile, _baseDirectoryUri, _localTempDir,
+          _numSegmentsForListTest, _dataSizeInMBsForCopyTest, _numOps);
+      driver.run();
+    } catch (Exception e) {
+      LOGGER.error("Error while running benchmark: ", e);
+    }
+    return true;
+  }
+
+
+  @Override
+  public String description() {
+    return "Run Filesystem benchmark";
+  }
+
+  @Override
+  public boolean getHelp() {
+    return _help;
+  }
+}
diff --git a/pinot-tools/src/main/resources/conf/sample_filesystem_benchmark.conf b/pinot-tools/src/main/resources/conf/sample_filesystem_benchmark.conf
new file mode 100644
index 0000000..3583707
--- /dev/null
+++ b/pinot-tools/src/main/resources/conf/sample_filesystem_benchmark.conf
@@ -0,0 +1,5 @@
+// Azure Datalake Gen2
+class.abfss=org.apache.pinot.plugin.filesystem.AzureGen2PinotFS
+abfss.accountName=<account_name>
+abfss.accessKey=<access_key>
+abfss.fileSystemName=<file_system_name>
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org