You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/01/08 19:42:13 UTC
[2/3] hive git commit: HIVE-16826: Improvements for
SeparatedValuesOutputFormat (BELUGA BEHR, reviewed by Aihua Xu)
HIVE-16826: Improvements for SeparatedValuesOutputFormat (BELUGA BEHR, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ea2f288
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ea2f288
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ea2f288
Branch: refs/heads/master
Commit: 0ea2f288a01768a4f431f7a5634b050bc8c3e47a
Parents: aa45b02
Author: Aihua Xu <ai...@apache.org>
Authored: Mon Jan 8 10:46:10 2018 -0800
Committer: Aihua Xu <ai...@apache.org>
Committed: Mon Jan 8 10:46:10 2018 -0800
----------------------------------------------------------------------
.../beeline/SeparatedValuesOutputFormat.java | 141 +++++++++----------
1 file changed, 70 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0ea2f288/beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java b/beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java
index 9c24a20..172b5b5 100644
--- a/beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java
+++ b/beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java
@@ -22,112 +22,111 @@
*/
package org.apache.hive.beeline;
-import java.io.IOException;
-import java.io.StringWriter;
-
-import org.apache.hadoop.io.IOUtils;
+import org.apache.commons.io.output.StringBuilderWriter;
+import org.apache.commons.lang.BooleanUtils;
+import org.apache.commons.lang.StringUtils;
import org.supercsv.encoder.CsvEncoder;
+import org.supercsv.encoder.DefaultCsvEncoder;
import org.supercsv.encoder.SelectiveCsvEncoder;
import org.supercsv.io.CsvListWriter;
import org.supercsv.prefs.CsvPreference;
/**
- * OutputFormat for values separated by a delimiter.
+ * OutputFormat for values separated by a configurable delimiter
*/
class SeparatedValuesOutputFormat implements OutputFormat {
+
+ public final static String DSV_OPT_OUTPUT_FORMAT = "dsv";
public final static String DISABLE_QUOTING_FOR_SV = "disable.quoting.for.sv";
+ private final static char DEFAULT_QUOTE_CHAR = '"';
private final BeeLine beeLine;
- private CsvPreference quotedCsvPreference;
- private CsvPreference unquotedCsvPreference;
+ private final StringBuilderWriter buffer;
+ private final char defaultSeparator;
SeparatedValuesOutputFormat(BeeLine beeLine, char separator) {
this.beeLine = beeLine;
- unquotedCsvPreference = getUnquotedCsvPreference(separator);
- quotedCsvPreference = new CsvPreference.Builder('"', separator, "").build();
+ this.defaultSeparator = separator;
+ this.buffer = new StringBuilderWriter();
}
- private static CsvPreference getUnquotedCsvPreference(char delimiter) {
- CsvEncoder noEncoder = new SelectiveCsvEncoder();
- return new CsvPreference.Builder('\0', delimiter, "").useEncoder(noEncoder).build();
- }
+ private CsvPreference getCsvPreference() {
+ char separator = this.defaultSeparator;
+ char quoteChar = DEFAULT_QUOTE_CHAR;
+ CsvEncoder encoder;
- private void updateCsvPreference() {
- if (beeLine.getOpts().getOutputFormat().equals("dsv")) {
- // check whether delimiter changed by user
- char curDel = (char) getCsvPreference().getDelimiterChar();
- char newDel = beeLine.getOpts().getDelimiterForDSV();
- // if delimiter changed, rebuild the csv preference
- if (newDel != curDel) {
- // "" is passed as the end of line symbol in following function, as
- // beeline itself adds newline
- if (isQuotingDisabled()) {
- unquotedCsvPreference = getUnquotedCsvPreference(newDel);
- } else {
- quotedCsvPreference = new CsvPreference.Builder('"', newDel, "").build();
- }
- }
+ if (DSV_OPT_OUTPUT_FORMAT.equals(beeLine.getOpts().getOutputFormat())) {
+ separator = beeLine.getOpts().getDelimiterForDSV();
}
+
+ if (isQuotingDisabled()) {
+ quoteChar = '\0';
+ encoder = new SelectiveCsvEncoder();
+ } else {
+ encoder = new DefaultCsvEncoder();
+ }
+
+ return new CsvPreference.Builder(quoteChar, separator, StringUtils.EMPTY).useEncoder(encoder).build();
}
@Override
public int print(Rows rows) {
- updateCsvPreference();
-
+ CsvPreference csvPreference = getCsvPreference();
+ CsvListWriter writer = new CsvListWriter(this.buffer, csvPreference);
int count = 0;
+
+ Rows.Row labels = (Rows.Row) rows.next();
+ if (beeLine.getOpts().getShowHeader()) {
+ fillBuffer(writer, labels);
+ String line = getLine(this.buffer);
+ beeLine.output(line);
+ }
+
while (rows.hasNext()) {
- if (count == 0 && !beeLine.getOpts().getShowHeader()) {
- rows.next();
- count++;
- continue;
- }
- printRow((Rows.Row) rows.next());
+ fillBuffer(writer, (Rows.Row) rows.next());
+ String line = getLine(this.buffer);
+ beeLine.output(line);
count++;
}
- return count - 1; // sans header row
+
+ return count;
}
- private String getFormattedStr(String[] vals) {
- StringWriter strWriter = new StringWriter();
- CsvListWriter writer = new CsvListWriter(strWriter, getCsvPreference());
- if (vals.length > 0) {
- try {
- writer.write(vals);
- } catch (IOException e) {
- beeLine.error(e);
- } finally {
- IOUtils.closeStream(writer);
- }
+ /**
+ * Fills the class's internal buffer with a DSV line
+ */
+ private void fillBuffer(CsvListWriter writer, Rows.Row row) {
+ String[] vals = row.values;
+
+ try {
+ writer.write(vals);
+ writer.flush();
+ } catch (Exception e) {
+ beeLine.error(e);
}
- return strWriter.toString();
}
- private void printRow(Rows.Row row) {
- String[] vals = row.values;
- String formattedStr = getFormattedStr(vals);
- beeLine.output(formattedStr);
+ private String getLine(StringBuilderWriter buf) {
+ String line = buf.toString();
+ buf.getBuilder().setLength(0);
+ return line;
}
+ /**
+ * Default is disabling the double quoting for separated value
+ */
private boolean isQuotingDisabled() {
+ Boolean quotingDisabled = Boolean.TRUE;
String quotingDisabledStr = System.getProperty(SeparatedValuesOutputFormat.DISABLE_QUOTING_FOR_SV);
- if (quotingDisabledStr == null || quotingDisabledStr.isEmpty()) {
- // default is disabling the double quoting for separated value
- return true;
- }
- String parsedOptionStr = quotingDisabledStr.toLowerCase();
- if (parsedOptionStr.equals("false") || parsedOptionStr.equals("true")) {
- return Boolean.parseBoolean(parsedOptionStr);
- } else {
- beeLine.error("System Property disable.quoting.for.sv is now " + parsedOptionStr
- + " which only accepts boolean value");
- return true;
- }
- }
- private CsvPreference getCsvPreference() {
- if (isQuotingDisabled()) {
- return unquotedCsvPreference;
- } else {
- return quotedCsvPreference;
+ if (StringUtils.isNotBlank(quotingDisabledStr)) {
+ quotingDisabled = BooleanUtils.toBooleanObject(quotingDisabledStr);
+
+ if (quotingDisabled == null) {
+ beeLine.error("System Property " + SeparatedValuesOutputFormat.DISABLE_QUOTING_FOR_SV + " is now "
+ + quotingDisabledStr + " which only accepts boolean values");
+ quotingDisabled = Boolean.TRUE;
+ }
}
+ return quotingDisabled;
}
}