You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by jw...@apache.org on 2012/12/07 20:51:05 UTC
[3/3] git commit: CRUNCH-124: Document From/At/To factory methods and
add additional convenience methods.
CRUNCH-124: Document From/At/To factory methods and add additional convenience methods.
Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/a988ce2d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/a988ce2d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/a988ce2d
Branch: refs/heads/master
Commit: a988ce2d261f4a3938e7bb71416724d5c6fdc711
Parents: 374bf3d
Author: Josh Wills <jw...@apache.org>
Authored: Tue Dec 4 20:43:06 2012 -0800
Committer: Josh Wills <jw...@apache.org>
Committed: Tue Dec 4 20:43:06 2012 -0800
----------------------------------------------------------------------
crunch/src/main/java/org/apache/crunch/io/At.java | 158 +++++++++++-
.../src/main/java/org/apache/crunch/io/From.java | 213 ++++++++++++++-
crunch/src/main/java/org/apache/crunch/io/To.java | 61 ++++-
3 files changed, 424 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a988ce2d/crunch/src/main/java/org/apache/crunch/io/At.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/At.java b/crunch/src/main/java/org/apache/crunch/io/At.java
index 951b740..702e3be 100644
--- a/crunch/src/main/java/org/apache/crunch/io/At.java
+++ b/crunch/src/main/java/org/apache/crunch/io/At.java
@@ -17,6 +17,7 @@
*/
package org.apache.crunch.io;
+import org.apache.avro.specific.SpecificRecord;
import org.apache.crunch.SourceTarget;
import org.apache.crunch.io.avro.AvroFileSourceTarget;
import org.apache.crunch.io.seq.SeqFileSourceTarget;
@@ -25,51 +26,204 @@ import org.apache.crunch.io.text.TextFileSourceTarget;
import org.apache.crunch.types.PType;
import org.apache.crunch.types.PTypeFamily;
import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
import org.apache.crunch.types.writable.Writables;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
/**
- * Static factory methods for creating various {@link SourceTarget} types.
- *
+ * Static factory methods for creating common {@link SourceTarget} types, which may be treated as both a {@code Source}
+ * and a {@code Target}.
*/
public class At {
+
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+ * @return A new {@code AvroFileSourceTarget<T>} instance
+ */
+ public static <T extends SpecificRecord> AvroFileSourceTarget<T> avroFile(String pathName, Class<T> avroClass) {
+ return avroFile(new Path(pathName), avroClass);
+ }
+
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+ * @return A new {@code AvroFileSourceTarget<T>} instance
+ */
+ public static <T extends SpecificRecord> AvroFileSourceTarget<T> avroFile(Path path, Class<T> avroClass) {
+ return avroFile(path, Avros.specifics(avroClass));
+ }
+
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param avroType The {@code AvroType} for the Avro records
+ * @return A new {@code AvroFileSourceTarget<T>} instance
+ */
public static <T> AvroFileSourceTarget<T> avroFile(String pathName, AvroType<T> avroType) {
return avroFile(new Path(pathName), avroType);
}
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the Avro file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param avroType The {@code AvroType} for the Avro records
+ * @return A new {@code AvroFileSourceTarget<T>} instance
+ */
public static <T> AvroFileSourceTarget<T> avroFile(Path path, AvroType<T> avroType) {
return new AvroFileSourceTarget<T>(path, avroType);
}
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code SeqFileSourceTarget<T>} instance
+ */
+ public static <T extends Writable> SeqFileSourceTarget<T> sequenceFile(String pathName, Class<T> valueClass) {
+ return sequenceFile(new Path(pathName), valueClass);
+ }
+
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code SeqFileSourceTarget<T>} instance
+ */
+ public static <T extends Writable> SeqFileSourceTarget<T> sequenceFile(Path path, Class<T> valueClass) {
+ return sequenceFile(path, Writables.writables(valueClass));
+ }
+
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given path name
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param ptype The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SeqFileSourceTarget<T>} instance
+ */
public static <T> SeqFileSourceTarget<T> sequenceFile(String pathName, PType<T> ptype) {
return sequenceFile(new Path(pathName), ptype);
}
+ /**
+ * Creates a {@code SourceTarget<T>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param ptype The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SeqFileSourceTarget<T>} instance
+ */
public static <T> SeqFileSourceTarget<T> sequenceFile(Path path, PType<T> ptype) {
return new SeqFileSourceTarget<T>(path, ptype);
}
+ /**
+ * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
+ * from the key-value pairs in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code SeqFileTableSourceTarget<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> SeqFileTableSourceTarget<K, V> sequenceFile(
+ String pathName, Class<K> keyClass, Class<V> valueClass) {
+ return sequenceFile(new Path(pathName), keyClass, valueClass);
+ }
+
+ /**
+ * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the key-value pairs in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param keyClass The {@code Writable} type for the key of the SequenceFile entry
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code SeqFileTableSourceTarget<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> SeqFileTableSourceTarget<K, V> sequenceFile(
+ Path path, Class<K> keyClass, Class<V> valueClass) {
+ return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
+ }
+
+ /**
+ * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given path name
+ * from the key-value pairs in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param keyType The {@code PType} for the key of the SequenceFile entry
+ * @param valueType The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SeqFileTableSourceTarget<K, V>} instance
+ */
public static <K, V> SeqFileTableSourceTarget<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
return sequenceFile(new Path(pathName), keyType, valueType);
}
+ /**
+ * Creates a {@code TableSourceTarget<K, V>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the key-value pairs in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param keyType The {@code PType} for the key of the SequenceFile entry
+ * @param valueType The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SeqFileTableSourceTarget<K, V>} instance
+ */
public static <K, V> SeqFileTableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
PTypeFamily ptf = keyType.getFamily();
return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType));
}
+ /**
+ * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @return A new {@code TextFileSourceTarget<String>} instance
+ */
public static TextFileSourceTarget<String> textFile(String pathName) {
return textFile(new Path(pathName));
}
+ /**
+ * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @return A new {@code TextFileSourceTarget<String>} instance
+ */
public static TextFileSourceTarget<String> textFile(Path path) {
return textFile(path, Writables.strings());
}
+ /**
+ * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given path name using
+ * the provided {@code PType<T>} to convert the input text.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param ptype The {@code PType<T>} to use to process the input text
+ * @return A new {@code TextFileSourceTarget<T>} instance
+ */
public static <T> TextFileSourceTarget<T> textFile(String pathName, PType<T> ptype) {
return textFile(new Path(pathName), ptype);
}
+ /**
+ * Creates a {@code SourceTarget<T>} instance for the text file(s) at the given {@code Path} using
+ * the provided {@code PType<T>} to convert the input text.
+ *
+ * @param path The {@code Path} to the data
+ * @param ptype The {@code PType<T>} to use to process the input text
+ * @return A new {@code TextFileSourceTarget<T>} instance
+ */
public static <T> TextFileSourceTarget<T> textFile(Path path, PType<T> ptype) {
return new TextFileSourceTarget<T>(path, ptype);
}
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a988ce2d/crunch/src/main/java/org/apache/crunch/io/From.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/From.java b/crunch/src/main/java/org/apache/crunch/io/From.java
index a59e869..371f934 100644
--- a/crunch/src/main/java/org/apache/crunch/io/From.java
+++ b/crunch/src/main/java/org/apache/crunch/io/From.java
@@ -17,6 +17,7 @@
*/
package org.apache.crunch.io;
+import org.apache.avro.specific.SpecificRecord;
import org.apache.crunch.Source;
import org.apache.crunch.TableSource;
import org.apache.crunch.io.avro.AvroFileSource;
@@ -28,64 +29,268 @@ import org.apache.crunch.types.PTableType;
import org.apache.crunch.types.PType;
import org.apache.crunch.types.PTypeFamily;
import org.apache.crunch.types.avro.AvroType;
+import org.apache.crunch.types.avro.Avros;
import org.apache.crunch.types.writable.Writables;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/**
- * Static factory methods for creating various {@link Source} types.
- *
+ * Static factory methods for creating common {@link Source} types.
*/
public class From {
- public static <K, V> TableSource<K, V> formattedFile(String path, Class<? extends FileInputFormat> formatClass,
+ /**
+ * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+ * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+ * and {@code Source} factory methods.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param formatClass The {@code FileInputFormat} implementation
+ * @param keyClass The {@code Writable} to use for the key
+ * @param valueClass The {@code Writable} to use for the value
+ * @return A new {@code TableSource<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
+ String pathName, Class<? extends FileInputFormat> formatClass,
+ Class<K> keyClass, Class<V> valueClass) {
+ return formattedFile(new Path(pathName), formatClass, keyClass, valueClass);
+ }
+
+ /**
+ * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+ * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+ * and {@code Source} factory methods.
+ *
+ * @param The {@code Path} to the data
+ * @param formatClass The {@code FileInputFormat} implementation
+ * @param keyClass The {@code Writable} to use for the key
+ * @param valueClass The {@code Writable} to use for the value
+ * @return A new {@code TableSource<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile(
+ Path path, Class<? extends FileInputFormat> formatClass,
+ Class<K> keyClass, Class<V> valueClass) {
+ return formattedFile(path, formatClass, Writables.writables(keyClass),
+ Writables.writables(valueClass));
+ }
+
+ /**
+ * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+ * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+ * and {@code Source} factory methods.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param formatClass The {@code FileInputFormat} implementation
+ * @param keyType The {@code PType} to use for the key
+ * @param valueType The {@code PType} to use for the value
+ * @return A new {@code TableSource<K, V>} instance
+ */
+ public static <K, V> TableSource<K, V> formattedFile(String pathName, Class<? extends FileInputFormat> formatClass,
PType<K> keyType, PType<V> valueType) {
- return formattedFile(new Path(path), formatClass, keyType, valueType);
+ return formattedFile(new Path(pathName), formatClass, keyType, valueType);
}
+ /**
+ * Creates a {@code TableSource<K, V>} for reading data from files that have custom
+ * {@code FileInputFormat} implementations not covered by the provided {@code TableSource}
+ * and {@code Source} factory methods.
+ *
+ * @param The {@code Path} to the data
+ * @param formatClass The {@code FileInputFormat} implementation
+ * @param keyType The {@code PType} to use for the key
+ * @param valueType The {@code PType} to use for the value
+ * @return A new {@code TableSource<K, V>} instance
+ */
public static <K, V> TableSource<K, V> formattedFile(Path path, Class<? extends FileInputFormat> formatClass,
PType<K> keyType, PType<V> valueType) {
PTableType<K, V> tableType = keyType.getFamily().tableOf(keyType, valueType);
return new FileTableSourceImpl<K, V>(path, tableType, formatClass);
}
+ /**
+ * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+ * @return A new {@code Source<T>} instance
+ */
+ public static <T extends SpecificRecord> Source<T> avroFile(String pathName, Class<T> avroClass) {
+ return avroFile(new Path(pathName), avroClass);
+ }
+
+ /**
+ * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file
+ * @return A new {@code Source<T>} instance
+ */
+ public static <T extends SpecificRecord> Source<T> avroFile(Path path, Class<T> avroClass) {
+ return avroFile(path, Avros.specifics(avroClass));
+ }
+
+ /**
+ * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param avroType The {@code AvroType} for the Avro records
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> avroFile(String pathName, AvroType<T> avroType) {
return avroFile(new Path(pathName), avroType);
}
+ /**
+ * Creates a {@code Source<T>} instance from the Avro file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param avroType The {@code AvroType} for the Avro records
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> avroFile(Path path, AvroType<T> avroType) {
return new AvroFileSource<T>(path, avroType);
}
+ /**
+ * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code Source<T>} instance
+ */
+ public static <T extends Writable> Source<T> sequenceFile(String pathName, Class<T> valueClass) {
+ return sequenceFile(new Path(pathName), valueClass);
+ }
+
+ /**
+ * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param valueClass The {@code Writable} type for the value of the SequenceFile entry
+ * @return A new {@code Source<T>} instance
+ */
+ public static <T extends Writable> Source<T> sequenceFile(Path path, Class<T> valueClass) {
+ return sequenceFile(path, Writables.writables(valueClass));
+ }
+
+ /**
+ * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param ptype The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> sequenceFile(String pathName, PType<T> ptype) {
return sequenceFile(new Path(pathName), ptype);
}
+ /**
+ * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given {@code Path}
+ * from the value field of each key-value pair in the SequenceFile(s).
+ *
+ * @param path The {@code Path} to the data
+ * @param ptype The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> sequenceFile(Path path, PType<T> ptype) {
return new SeqFileSource<T>(path, ptype);
}
+ /**
+ * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
+ * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
+ * @return A new {@code SourceTable<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
+ String pathName, Class<K> keyClass, Class<V> valueClass) {
+ return sequenceFile(new Path(pathName), keyClass, valueClass);
+ }
+
+ /**
+ * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry
+ * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry
+ * @return A new {@code SourceTable<K, V>} instance
+ */
+ public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile(
+ Path path, Class<K> keyClass, Class<V> valueClass) {
+ return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass));
+ }
+
+ /**
+ * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param keyType The {@code PType} for the key of the SequenceFile entry
+ * @param valueType The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SourceTable<K, V>} instance
+ */
public static <K, V> TableSource<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) {
return sequenceFile(new Path(pathName), keyType, valueType);
}
+ /**
+ * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @param keyType The {@code PType} for the key of the SequenceFile entry
+ * @param valueType The {@code PType} for the value of the SequenceFile entry
+ * @return A new {@code SourceTable<K, V>} instance
+ */
public static <K, V> TableSource<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
PTypeFamily ptf = keyType.getFamily();
return new SeqFileTableSource<K, V>(path, ptf.tableOf(keyType, valueType));
}
+ /**
+ * Creates a {@code Source<String>} instance for the text file(s) at the given path name.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @return A new {@code Source<String>} instance
+ */
public static Source<String> textFile(String pathName) {
return textFile(new Path(pathName));
}
+ /**
+ * Creates a {@code Source<String>} instance for the text file(s) at the given {@code Path}.
+ *
+ * @param path The {@code Path} to the data
+ * @return A new {@code Source<String>} instance
+ */
public static Source<String> textFile(Path path) {
return textFile(path, Writables.strings());
}
+ /**
+ * Creates a {@code Source<T>} instance for the text file(s) at the given path name using
+ * the provided {@code PType<T>} to convert the input text.
+ *
+ * @param pathName The name of the path to the data on the filesystem
+ * @param ptype The {@code PType<T>} to use to process the input text
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> textFile(String pathName, PType<T> ptype) {
return textFile(new Path(pathName), ptype);
}
+ /**
+ * Creates a {@code Source<T>} instance for the text file(s) at the given {@code Path} using
+ * the provided {@code PType<T>} to convert the input text.
+ *
+ * @param path The {@code Path} to the data
+ * @param ptype The {@code PType<T>} to use to process the input text
+ * @return A new {@code Source<T>} instance
+ */
public static <T> Source<T> textFile(Path path, PType<T> ptype) {
return new TextFileSource<T>(path, ptype);
}
http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a988ce2d/crunch/src/main/java/org/apache/crunch/io/To.java
----------------------------------------------------------------------
diff --git a/crunch/src/main/java/org/apache/crunch/io/To.java b/crunch/src/main/java/org/apache/crunch/io/To.java
index da92727..d7af01b 100644
--- a/crunch/src/main/java/org/apache/crunch/io/To.java
+++ b/crunch/src/main/java/org/apache/crunch/io/To.java
@@ -26,39 +26,96 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
- * Static factory methods for creating various {@link Target} types.
- *
+ * Static factory methods for creating common {@link Target} types.
*/
public class To {
+ /**
+ * Creates a {@code Target} at the given path name that writes data to
+ * a custom {@code FileOutputFormat}.
+ *
+ * @param pathName The name of the path to write the data to on the filesystem
+ * @param formatClass The {@code FileOutputFormat} to write the data to
+ * @return A new {@code Target} instance
+ */
public static Target formattedFile(String pathName, Class<? extends FileOutputFormat> formatClass) {
return formattedFile(new Path(pathName), formatClass);
}
+ /**
+ * Creates a {@code Target} at the given {@code Path} that writes data to
+ * a custom {@code FileOutputFormat}.
+ *
+ * @param path The {@code Path} to write the data to
+ * @param formatClass The {@code FileOutputFormat} to write the data to
+ * @return A new {@code Target} instance
+ */
public static Target formattedFile(Path path, Class<? extends FileOutputFormat> formatClass) {
return new FileTargetImpl(path, formatClass, new SequentialFileNamingScheme());
}
+ /**
+ * Creates a {@code Target} at the given path name that writes data to
+ * Avro files. The {@code PType} for the written data must be for Avro records.
+ *
+ * @param pathName The name of the path to write the data to on the filesystem
+ * @return A new {@code Target} instance
+ */
public static Target avroFile(String pathName) {
return avroFile(new Path(pathName));
}
+ /**
+ * Creates a {@code Target} at the given {@code Path} that writes data to
+ * Avro files. The {@code PType} for the written data must be for Avro records.
+ *
+ * @param path The {@code Path} to write the data to
+ * @return A new {@code Target} instance
+ */
public static Target avroFile(Path path) {
return new AvroFileTarget(path);
}
+ /**
+ * Creates a {@code Target} at the given path name that writes data to
+ * SequenceFiles.
+ *
+ * @param pathName The name of the path to write the data to on the filesystem
+ * @return A new {@code Target} instance
+ */
public static Target sequenceFile(String pathName) {
return sequenceFile(new Path(pathName));
}
+ /**
+ * Creates a {@code Target} at the given {@code Path} that writes data to
+ * SequenceFiles.
+ *
+ * @param path The {@code Path} to write the data to
+ * @return A new {@code Target} instance
+ */
public static Target sequenceFile(Path path) {
return new SeqFileTarget(path);
}
+ /**
+ * Creates a {@code Target} at the given path name that writes data to
+ * text files.
+ *
+ * @param pathName The name of the path to write the data to on the filesystem
+ * @return A new {@code Target} instance
+ */
public static Target textFile(String pathName) {
return textFile(new Path(pathName));
}
+ /**
+ * Creates a {@code Target} at the given {@code Path} that writes data to
+ * text files.
+ *
+ * @param path The {@code Path} to write the data to
+ * @return A new {@code Target} instance
+ */
public static Target textFile(Path path) {
return new TextFileTarget(path);
}