You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by jk...@apache.org on 2017/03/28 23:37:50 UTC
[1/2] beam git commit: Improves logging of FileBasedSource size
estimates
Repository: beam
Updated Branches:
refs/heads/master c489686e4 -> 99056df36
Improves logging of FileBasedSource size estimates
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/91fb481b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/91fb481b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/91fb481b
Branch: refs/heads/master
Commit: 91fb481b3c0bf217320ad772b2c5a55eb90e1ac5
Parents: c489686
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Mar 28 15:20:05 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Mar 28 16:37:14 2017 -0700
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/FileBasedSource.java | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/91fb481b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
index 5659d5b..35629d8 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSource.java
@@ -222,6 +222,11 @@ public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
fileOrPatternSpec,
System.currentTimeMillis() - startTime);
}
+ LOG.info(
+ "Filepattern {} matched {} files with total size {}",
+ fileOrPatternSpec.get(),
+ inputs.size(),
+ totalSize);
return totalSize;
} else {
long start = getStartOffset();
@@ -286,8 +291,18 @@ public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
Collections.shuffle(selectedFiles);
selectedFiles = selectedFiles.subList(0, sampleSize);
- return files.size() * getExactTotalSizeOfFiles(selectedFiles, ioChannelFactory)
- / selectedFiles.size();
+ long exactTotalSampleSize = getExactTotalSizeOfFiles(selectedFiles, ioChannelFactory);
+ double avgSize = 1.0 * exactTotalSampleSize / selectedFiles.size();
+ long totalSize = Math.round(files.size() * avgSize);
+ LOG.info(
+ "Sampling {} files gave {} total bytes ({} average per file), "
+ + "inferring total size of {} files to be {}",
+ selectedFiles.size(),
+ exactTotalSampleSize,
+ avgSize,
+ files.size(),
+ totalSize);
+ return totalSize;
}
@Override
[2/2] beam git commit: This closes #2351
Posted by jk...@apache.org.
This closes #2351
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/99056df3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/99056df3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/99056df3
Branch: refs/heads/master
Commit: 99056df36efccba26f4853912ffca41e12147949
Parents: c489686 91fb481
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Mar 28 16:37:29 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Mar 28 16:37:29 2017 -0700
----------------------------------------------------------------------
.../org/apache/beam/sdk/io/FileBasedSource.java | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------