You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2020/02/22 01:25:57 UTC

[GitHub] [druid] jon-wei commented on a change in pull request #9360: Create splits of multiple files for parallel indexing

jon-wei commented on a change in pull request #9360: Create splits of multiple files for parallel indexing
URL: https://github.com/apache/druid/pull/9360#discussion_r382869968
 
 

 ##########
 File path: core/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java
 ##########
 @@ -21,40 +21,64 @@
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOCase;
+import org.apache.commons.io.filefilter.AndFileFilter;
+import org.apache.commons.io.filefilter.IOFileFilter;
+import org.apache.commons.io.filefilter.NameFileFilter;
+import org.apache.commons.io.filefilter.NotFileFilter;
 import org.apache.commons.io.filefilter.TrueFileFilter;
 import org.apache.commons.io.filefilter.WildcardFileFilter;
 import org.apache.druid.data.input.AbstractInputSource;
+import org.apache.druid.data.input.InputFileAttribute;
 import org.apache.druid.data.input.InputFormat;
 import org.apache.druid.data.input.InputRowSchema;
 import org.apache.druid.data.input.InputSourceReader;
 import org.apache.druid.data.input.InputSplit;
 import org.apache.druid.data.input.SplitHintSpec;
+import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.utils.CollectionUtils;
+import org.apache.druid.utils.Streams;
 
 import javax.annotation.Nullable;
 import java.io.File;
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Objects;
-import java.util.Spliterator;
-import java.util.Spliterators;
+import java.util.Set;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
 
-public class LocalInputSource extends AbstractInputSource implements SplittableInputSource<File>
+public class LocalInputSource extends AbstractInputSource implements SplittableInputSource<List<File>>
 {
   private final File baseDir;
   private final String filter;
+  private final Set<File> files;
 
   @JsonCreator
   public LocalInputSource(
       @JsonProperty("baseDir") File baseDir,
-      @JsonProperty("filter") String filter
+      @JsonProperty("filter") String filter,
+      @JsonProperty("files") Set<File> files
 
 Review comment:
   Can you add this new property to the LocalInputSource property docs?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org