You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by mm...@apache.org on 2021/02/11 12:45:25 UTC
[accumulo] 01/01: Merge remote-tracking branch 'upstream/1.10' into
main
This is an automated email from the ASF dual-hosted git repository.
mmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 2ff2618b1c49d01a3ded44173b522c5670cfdeac
Merge: c7b1d85 fbdbda9
Author: Mike Miller <mm...@apache.org>
AuthorDate: Thu Feb 11 07:42:33 2021 -0500
Merge remote-tracking branch 'upstream/1.10' into main
core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java | 4 ++--
.../main/java/org/apache/accumulo/core/client/rfile/RFileWriter.java | 3 ++-
.../org/apache/accumulo/core/client/rfile/RFileWriterBuilder.java | 4 ++++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --cc core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
index fbde1a8,7ea9b81..25c5a52
--- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFile.java
@@@ -209,163 -196,17 +209,163 @@@ public class RFile
/**
* This is an intermediate interface in a larger builder pattern. Supports setting the required
+ * input sources for reading summary data from an RFile.
+ *
+ * @since 2.0.0
+ */
+ public interface SummaryInputArguments {
+ /**
+ * Specify RFiles to read from. When multiple inputs are specified the summary data will be
+ * merged.
+ *
+ * @param inputs
+ * one or more RFiles to read.
+ * @return this
+ */
+ SummaryOptions from(RFileSource... inputs);
+
+ /**
+ * Specify RFiles to read from. When multiple are specified the summary data will be merged.
+ *
+ * @param files
+ * one or more RFiles to read.
+ * @return this
+ */
+ SummaryFSOptions from(String... files);
+ }
+
+ /**
+ * This is an intermediate interface in a larger builder pattern. Enables optionally setting a
+ * FileSystem to read RFile summary data from.
+ *
+ * @since 2.0.0
+ */
+ public interface SummaryFSOptions extends SummaryOptions {
+ /**
+ * Optionally provide a FileSystem to open RFiles. If not specified, the FileSystem will be
+ * constructed using configuration on the classpath.
+ *
+ * @param fs
+ * use this FileSystem to open files.
+ * @return this
+ */
+ SummaryOptions withFileSystem(FileSystem fs);
+ }
+
+ /**
+ * This is an intermediate interface in a large builder pattern. Allows setting options for
+ * retrieving summary data.
+ *
+ * @since 2.0.0
+ */
+ public interface SummaryOptions {
+ /**
+ * Retrieve summaries with provided tables properties. Properties for a table can be obtained by
+ * calling {@link TableOperations#getProperties(String)}. Any property that impacts file
+ * behavior regardless of whether it has the {@link Property#TABLE_PREFIX} may be accepted and
+ * used. For example, cache and crypto properties could be passed here.
+ *
+ * @param props
+ * iterable over Accumulo table key value properties.
+ * @return this
+ */
+ SummaryOptions withTableProperties(Iterable<Entry<String,String>> props);
+
+ /**
+ * @see #withTableProperties(Iterable) Any property that impacts file behavior regardless of
+ * whether it has the {@link Property#TABLE_PREFIX} may be accepted and used. For example,
+ * cache and crypto properties could be passed here.
+ * @param props
+ * a map instead of an Iterable
+ * @return this
+ */
+ SummaryOptions withTableProperties(Map<String,String> props);
+
+ /**
+ * This method allows retrieving a subset of summary data from a file. If a file has lots of
+ * separate summaries, reading a subset may be faster.
+ *
+ * @param summarySelector
+ * Only read summary data that was generated with configuration that this predicate
+ * matches.
+ * @return this
+ */
+ SummaryOptions selectSummaries(Predicate<SummarizerConfiguration> summarySelector);
+
+ /**
+ * Summary data may possibly be stored at a more granular level than the entire file. However
+ * there is no guarantee of this. If the data was stored at a more granular level, then this
+ * will get a subset of the summary data. The subset will very likely be an inaccurate
+ * approximation.
+ *
+ * @param startRow
+ * A non-null start row. The startRow is used exclusively.
+ * @return this
+ *
+ * @see FileStatistics#getExtra()
+ */
+ SummaryOptions startRow(Text startRow);
+
+ /**
+ * @param startRow
+ * UTF-8 encodes startRow. The startRow is used exclusively.
+ * @return this
+ * @see #startRow(Text)
+ */
+ SummaryOptions startRow(CharSequence startRow);
+
+ /**
+ * Summary data may possibly be stored at a more granular level than the entire file. However
+ * there is no guarantee of this. If the data was stored at a more granular level, then this
+ * will get a subset of the summary data. The subset will very likely be an inaccurate
+ * approximation.
+ *
+ * @param endRow
+ * A non-null end row. The end row is used inclusively.
+ * @return this
+ *
+ * @see FileStatistics#getExtra()
+ */
+ SummaryOptions endRow(Text endRow);
+
+ /**
+ * @param endRow
+ * UTF-8 encodes endRow. The end row is used inclusively.
+ * @return this
+ * @see #endRow(Text)
+ */
+ SummaryOptions endRow(CharSequence endRow);
+
+ /**
+ * Reads summary data from file.
+ *
+ * @return The summary data in the file that satisfied the selection criteria.
+ */
+ Collection<Summary> read() throws IOException;
+ }
+
+ /**
+ * Entry point for reading summary data from RFiles.
+ *
+ * @since 2.0.0
+ */
+ public static SummaryInputArguments summaries() {
+ return new RFileSummariesRetriever();
+ }
+
+ /**
+ * This is an intermediate interface in a larger builder pattern. Supports setting the required
- * output sink to write a RFile to.
+ * output sink to write a RFile to. The filename parameter requires the ".rf" extension.
*
* @since 1.8.0
*/
- public static interface OutputArguments {
+ public interface OutputArguments {
/**
* @param filename
- * name of file to write RFile data
+ * name of file to write RFile data, ending with the ".rf" extension
* @return this
*/
- public WriterFSOptions to(String filename);
+ WriterFSOptions to(String filename);
/**
* @param out