You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by fj...@apache.org on 2020/02/01 07:51:04 UTC

[druid] branch master updated: Allow HdfsDataSegmentKiller to be instantiated without storageDirectory set. (#9296)

This is an automated email from the ASF dual-hosted git repository.

fjy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 660f883  Allow HdfsDataSegmentKiller to be instantiated without storageDirectory set. (#9296)
660f883 is described below

commit 660f8838f4b3c313f8608ed5ccf35ae25920ab9e
Author: Gian Merlino <gi...@gmail.com>
AuthorDate: Fri Jan 31 23:50:48 2020 -0800

    Allow HdfsDataSegmentKiller to be instantiated without storageDirectory set. (#9296)
    
    This is important because if a user has the hdfs extension loaded, but is not
    using hdfs deep storage, then they will not have storageDirectory set and will
    get the following error:
    
      IllegalArgumentException: Can not create a Path from an empty string
        at io.druid.storage.hdfs.HdfsDataSegmentKiller.<init>(HdfsDataSegmentKiller.java:47)
    
    This scenario is realistic: it comes up when someone has the hdfs extension
    loaded because they want to use HdfsInputSource, but don't want to use hdfs for
    deep storage.
    
    Fixes #4694.
---
 .../druid/storage/hdfs/HdfsDataSegmentKiller.java  | 15 ++++-
 .../storage/hdfs/HdfsDataSegmentKillerTest.java    | 74 +++++++++++++++++++++-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKiller.java b/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKiller.java
index 08cd97f..ba150be 100644
--- a/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKiller.java
+++ b/extensions-core/hdfs-storage/src/main/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKiller.java
@@ -20,9 +20,11 @@
 package org.apache.druid.storage.hdfs;
 
 import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
 import com.google.inject.Inject;
 import org.apache.commons.lang.StringUtils;
 import org.apache.druid.guice.Hdfs;
+import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.emitter.EmittingLogger;
 import org.apache.druid.segment.loading.DataSegmentKiller;
 import org.apache.druid.segment.loading.SegmentLoadingException;
@@ -31,6 +33,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 
 public class HdfsDataSegmentKiller implements DataSegmentKiller
@@ -41,13 +44,19 @@ public class HdfsDataSegmentKiller implements DataSegmentKiller
 
   private final Configuration config;
 
+  @Nullable
   private final Path storageDirectory;
 
   @Inject
   public HdfsDataSegmentKiller(@Hdfs final Configuration config, final HdfsDataSegmentPusherConfig pusherConfig)
   {
     this.config = config;
-    this.storageDirectory = new Path(pusherConfig.getStorageDirectory());
+
+    if (!Strings.isNullOrEmpty(pusherConfig.getStorageDirectory())) {
+      this.storageDirectory = new Path(pusherConfig.getStorageDirectory());
+    } else {
+      this.storageDirectory = null;
+    }
   }
 
   private static Path getPath(DataSegment segment)
@@ -116,6 +125,10 @@ public class HdfsDataSegmentKiller implements DataSegmentKiller
   @Override
   public void killAll() throws IOException
   {
+    if (storageDirectory == null) {
+      throw new ISE("Cannot delete all segment files since druid.storage.storageDirectory is not set.");
+    }
+
     log.info("Deleting all segment files from hdfs dir [%s].", storageDirectory.toUri().toString());
     final FileSystem fs = storageDirectory.getFileSystem(config);
     fs.delete(storageDirectory, true);
diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKillerTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKillerTest.java
index dd9e7bc..746ebb5 100644
--- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKillerTest.java
+++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/storage/hdfs/HdfsDataSegmentKillerTest.java
@@ -23,21 +23,25 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.segment.loading.SegmentLoadingException;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.NoneShardSpec;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import java.io.IOException;
 import java.util.UUID;
 
-/**
- */
 public class HdfsDataSegmentKillerTest
 {
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
   @Test
   public void testKill() throws Exception
   {
@@ -193,9 +197,75 @@ public class HdfsDataSegmentKillerTest
           }
         }
     );
+
+    // Should do nothing.
     killer.kill(getSegmentWithPath(new Path("/xxx/", "index.zip").toString()));
   }
 
+  @Test
+  public void testKillNonZipSegment() throws Exception
+  {
+    Configuration config = new Configuration();
+    HdfsDataSegmentKiller killer = new HdfsDataSegmentKiller(
+        config,
+        new HdfsDataSegmentPusherConfig()
+        {
+          @Override
+          public String getStorageDirectory()
+          {
+            return "/tmp";
+          }
+        }
+    );
+
+    expectedException.expect(SegmentLoadingException.class);
+    expectedException.expectMessage("Unknown file type");
+    killer.kill(getSegmentWithPath(new Path("/xxx/", "index.beep").toString()));
+  }
+
+  @Test
+  public void testNoStorageDirectory() throws Exception
+  {
+    Configuration config = new Configuration();
+    HdfsDataSegmentKiller killer = new HdfsDataSegmentKiller(
+        config,
+        new HdfsDataSegmentPusherConfig()
+        {
+          @Override
+          public String getStorageDirectory()
+          {
+            return "";
+          }
+        }
+    );
+
+    FileSystem fs = FileSystem.get(config);
+    Path dataSourceDir = new Path("/tmp/dataSourceNew");
+
+    Path interval1Dir = new Path(dataSourceDir, "intervalNew");
+    Path version11Dir = new Path(interval1Dir, "v1");
+
+    Assert.assertTrue(fs.mkdirs(version11Dir));
+    fs.createNewFile(new Path(version11Dir, StringUtils.format("%s_index.zip", 3)));
+
+    // 'kill' should work even if storageDirectory is not set.
+    killer.kill(getSegmentWithPath(new Path(version11Dir, "3_index.zip").toString()));
+
+    // Verify the segment no longer exists, but that its datasource directory does.
+    // Then delete its datasource directory.
+    Assert.assertFalse(fs.exists(version11Dir));
+    Assert.assertFalse(fs.exists(interval1Dir));
+    Assert.assertTrue(fs.exists(dataSourceDir));
+    Assert.assertTrue(fs.exists(new Path("/tmp")));
+    Assert.assertTrue(fs.exists(dataSourceDir));
+    Assert.assertTrue(fs.delete(dataSourceDir, false));
+
+    // killAll should *not* work when storageDirectory is not set.
+    expectedException.expect(IllegalStateException.class);
+    expectedException.expectMessage("Cannot delete all segment files since druid.storage.storageDirectory is not set");
+    killer.killAll();
+  }
+
   private void makePartitionDirWithIndex(FileSystem fs, Path path) throws IOException
   {
     Assert.assertTrue(fs.mkdirs(path));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org