You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2020/05/24 20:08:45 UTC
[commons-csv] branch master updated: Sort methods in AB order.
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push:
new 21f4f58 Sort methods in AB order.
21f4f58 is described below
commit 21f4f584ba1e7ebaef08dc797d9f102a896dd715
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sun May 24 16:08:41 2020 -0400
Sort methods in AB order.
---
.../java/org/apache/commons/csv/CSVParser.java | 44 +--
.../java/org/apache/commons/csv/CSVRecord.java | 22 +-
.../apache/commons/csv/ExtendedBufferedReader.java | 128 ++++-----
src/main/java/org/apache/commons/csv/Lexer.java | 316 ++++++++++-----------
src/main/java/org/apache/commons/csv/Token.java | 6 +-
5 files changed, 258 insertions(+), 258 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 8345a15..bf6eb6d 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -185,6 +185,26 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
}
/**
+ * Header information based on name and position.
+ */
+ private static final class Headers {
+ /**
+ * Header column positions (0-based)
+ */
+ final Map<String, Integer> headerMap;
+
+ /**
+ * Header names in column order
+ */
+ final List<String> headerNames;
+
+ Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
+ this.headerMap = headerMap;
+ this.headerNames = headerNames;
+ }
+ }
+
+ /**
* Creates a parser for the given {@link File}.
*
* @param file
@@ -281,6 +301,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(reader, format);
}
+ // the following objects are shared to reduce garbage
+
/**
* Creates a parser for the given {@link String}.
*
@@ -301,8 +323,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(new StringReader(string), format);
}
- // the following objects are shared to reduce garbage
-
/**
* Creates and returns a parser for the given URL, which the caller MUST close.
*
@@ -449,26 +469,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
}
/**
- * Header information based on name and position.
- */
- private static final class Headers {
- /**
- * Header column positions (0-based)
- */
- final Map<String, Integer> headerMap;
-
- /**
- * Header names in column order
- */
- final List<String> headerNames;
-
- Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
- this.headerMap = headerMap;
- this.headerNames = headerNames;
- }
- }
-
- /**
* Creates the name to index mapping if the format defines a header.
*
* @return null if the format has no header.
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index efd36bb..5181bc9 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -229,17 +229,6 @@ public final class CSVRecord implements Serializable, Iterable<String> {
}
/**
- * Checks whether a given columns is mapped and has a value.
- *
- * @param name
- * the name of the column to be retrieved.
- * @return whether a given columns is mapped and has a value
- */
- public boolean isSet(final String name) {
- return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
- }
-
- /**
* Checks whether a column with given index has a value.
*
* @param index
@@ -251,6 +240,17 @@ public final class CSVRecord implements Serializable, Iterable<String> {
}
/**
+ * Checks whether a given columns is mapped and has a value.
+ *
+ * @param name
+ * the name of the column to be retrieved.
+ * @return whether a given columns is mapped and has a value
+ */
+ public boolean isSet(final String name) {
+ return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
+ }
+
+ /**
* Returns an iterator over the values of this record.
*
* @return an iterator over the values of this record.
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 23a4042..b9ca79d 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -53,15 +53,31 @@ final class ExtendedBufferedReader extends BufferedReader {
super(reader);
}
+ /**
+ * Closes the stream.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
@Override
- public int read() throws IOException {
- final int current = super.read();
- if (current == CR || current == LF && lastChar != CR) {
- eolCounter++;
+ public void close() throws IOException {
+ // Set ivars before calling super close() in case close() throws an IOException.
+ closed = true;
+ lastChar = END_OF_STREAM;
+ super.close();
+ }
+
+ /**
+ * Returns the current line number
+ *
+ * @return the current line number
+ */
+ long getCurrentLineNumber() {
+ // Check if we are at EOL or EOF or just starting
+ if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
+ return eolCounter; // counter is accurate
}
- lastChar = current;
- this.position++;
- return lastChar;
+ return eolCounter + 1; // Allow for counter being incremented only at EOL
}
/**
@@ -76,6 +92,47 @@ final class ExtendedBufferedReader extends BufferedReader {
return lastChar;
}
+ /**
+ * Gets the character position in the reader.
+ *
+ * @return the current position in the reader (counting characters, not bytes since this is a Reader)
+ */
+ long getPosition() {
+ return this.position;
+ }
+
+ public boolean isClosed() {
+ return closed;
+ }
+
+ /**
+ * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
+ * still return this value. Does not affect line number or last character.
+ *
+ * @return the next character
+ *
+ * @throws IOException
+ * if there is an error in reading
+ */
+ int lookAhead() throws IOException {
+ super.mark(1);
+ final int c = super.read();
+ super.reset();
+
+ return c;
+ }
+
+ @Override
+ public int read() throws IOException {
+ final int current = super.read();
+ if (current == CR || current == LF && lastChar != CR) {
+ eolCounter++;
+ }
+ lastChar = current;
+ this.position++;
+ return lastChar;
+ }
+
@Override
public int read(final char[] buf, final int offset, final int length) throws IOException {
if (length == 0) {
@@ -131,61 +188,4 @@ final class ExtendedBufferedReader extends BufferedReader {
return line;
}
- /**
- * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
- * still return this value. Does not affect line number or last character.
- *
- * @return the next character
- *
- * @throws IOException
- * if there is an error in reading
- */
- int lookAhead() throws IOException {
- super.mark(1);
- final int c = super.read();
- super.reset();
-
- return c;
- }
-
- /**
- * Returns the current line number
- *
- * @return the current line number
- */
- long getCurrentLineNumber() {
- // Check if we are at EOL or EOF or just starting
- if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
- return eolCounter; // counter is accurate
- }
- return eolCounter + 1; // Allow for counter being incremented only at EOL
- }
-
- /**
- * Gets the character position in the reader.
- *
- * @return the current position in the reader (counting characters, not bytes since this is a Reader)
- */
- long getPosition() {
- return this.position;
- }
-
- public boolean isClosed() {
- return closed;
- }
-
- /**
- * Closes the stream.
- *
- * @throws IOException
- * If an I/O error occurs
- */
- @Override
- public void close() throws IOException {
- // Set ivars before calling super close() in case close() throws an IOException.
- closed = true;
- lastChar = END_OF_STREAM;
- super.close();
- }
-
}
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index b29fc14..2795ca2 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -60,10 +60,6 @@ final class Lexer implements Closeable {
private final ExtendedBufferedReader reader;
private String firstEol;
- String getFirstEol(){
- return firstEol;
- }
-
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiter();
@@ -75,6 +71,94 @@ final class Lexer implements Closeable {
}
/**
+ * Closes resources.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ /**
+ * Returns the current character position
+ *
+ * @return the current character position
+ */
+ long getCharacterPosition() {
+ return reader.getPosition();
+ }
+
+ /**
+ * Returns the current line number
+ *
+ * @return the current line number
+ */
+ long getCurrentLineNumber() {
+ return reader.getCurrentLineNumber();
+ }
+
+ String getFirstEol(){
+ return firstEol;
+ }
+
+ boolean isClosed() {
+ return reader.isClosed();
+ }
+
+ boolean isCommentStart(final int ch) {
+ return ch == commentStart;
+ }
+
+ boolean isDelimiter(final int ch) {
+ return ch == delimiter;
+ }
+
+ /**
+ * @return true if the given character indicates end of file
+ */
+ boolean isEndOfFile(final int ch) {
+ return ch == END_OF_STREAM;
+ }
+
+ boolean isEscape(final int ch) {
+ return ch == escape;
+ }
+
+ private boolean isMetaChar(final int ch) {
+ return ch == delimiter ||
+ ch == escape ||
+ ch == quoteChar ||
+ ch == commentStart;
+ }
+
+ boolean isQuoteChar(final int ch) {
+ return ch == quoteChar;
+ }
+
+ /**
+ * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
+ *
+ * @param ch the character to check
+ * @return true if the character is at the start of a line.
+ */
+ boolean isStartOfLine(final int ch) {
+ return ch == LF || ch == CR || ch == UNDEFINED;
+ }
+
+ /**
+ * @return true if the given char is a whitespace character
+ */
+ boolean isWhitespace(final int ch) {
+ return !isDelimiter(ch) && Character.isWhitespace((char) ch);
+ }
+
+ private char mapNullToDisabled(final Character c) {
+ return c == null ? DISABLED : c.charValue();
+ }
+
+ /**
* Returns the next token.
* <p>
* A token corresponds to a term, a record change or an end-of-file indicator.
@@ -171,59 +255,6 @@ final class Lexer implements Closeable {
}
/**
- * Parses a simple token.
- * <p/>
- * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
- * delimiters (as \, or \;). The token is finished when one of the following conditions become true:
- * <ul>
- * <li>end of line has been reached (EORECORD)</li>
- * <li>end of stream has been reached (EOF)</li>
- * <li>an unescaped delimiter has been reached (TOKEN)</li>
- * </ul>
- *
- * @param token
- * the current token
- * @param ch
- * the current character
- * @return the filled token
- * @throws IOException
- * on stream access error
- */
- private Token parseSimpleToken(final Token token, int ch) throws IOException {
- // Faster to use while(true)+break than while(token.type == INVALID)
- while (true) {
- if (readEndOfLine(ch)) {
- token.type = EORECORD;
- break;
- } else if (isEndOfFile(ch)) {
- token.type = EOF;
- token.isReady = true; // There is data at EOF
- break;
- } else if (isDelimiter(ch)) {
- token.type = TOKEN;
- break;
- } else if (isEscape(ch)) {
- final int unescaped = readEscape();
- if (unescaped == END_OF_STREAM) { // unexpected char after escape
- token.content.append((char) ch).append((char) reader.getLastChar());
- } else {
- token.content.append((char) unescaped);
- }
- ch = reader.read(); // continue
- } else {
- token.content.append((char) ch);
- ch = reader.read(); // continue
- }
- }
-
- if (ignoreSurroundingSpaces) {
- trimTrailingSpaces(token.content);
- }
-
- return token;
- }
-
- /**
* Parses an encapsulated token.
* <p/>
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
@@ -294,26 +325,84 @@ final class Lexer implements Closeable {
}
}
- private char mapNullToDisabled(final Character c) {
- return c == null ? DISABLED : c.charValue();
- }
-
/**
- * Returns the current line number
+ * Parses a simple token.
+ * <p/>
+ * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
+ * delimiters (as \, or \;). The token is finished when one of the following conditions become true:
+ * <ul>
+ * <li>end of line has been reached (EORECORD)</li>
+ * <li>end of stream has been reached (EOF)</li>
+ * <li>an unescaped delimiter has been reached (TOKEN)</li>
+ * </ul>
*
- * @return the current line number
+ * @param token
+ * the current token
+ * @param ch
+ * the current character
+ * @return the filled token
+ * @throws IOException
+ * on stream access error
*/
- long getCurrentLineNumber() {
- return reader.getCurrentLineNumber();
+ private Token parseSimpleToken(final Token token, int ch) throws IOException {
+ // Faster to use while(true)+break than while(token.type == INVALID)
+ while (true) {
+ if (readEndOfLine(ch)) {
+ token.type = EORECORD;
+ break;
+ } else if (isEndOfFile(ch)) {
+ token.type = EOF;
+ token.isReady = true; // There is data at EOF
+ break;
+ } else if (isDelimiter(ch)) {
+ token.type = TOKEN;
+ break;
+ } else if (isEscape(ch)) {
+ final int unescaped = readEscape();
+ if (unescaped == END_OF_STREAM) { // unexpected char after escape
+ token.content.append((char) ch).append((char) reader.getLastChar());
+ } else {
+ token.content.append((char) unescaped);
+ }
+ ch = reader.read(); // continue
+ } else {
+ token.content.append((char) ch);
+ ch = reader.read(); // continue
+ }
+ }
+
+ if (ignoreSurroundingSpaces) {
+ trimTrailingSpaces(token.content);
+ }
+
+ return token;
}
/**
- * Returns the current character position
+ * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
- * @return the current character position
+ * @return true if the given or next character is a line-terminator
*/
- long getCharacterPosition() {
- return reader.getPosition();
+ boolean readEndOfLine(int ch) throws IOException {
+ // check if we have \r\n...
+ if (ch == CR && reader.lookAhead() == LF) {
+ // note: does not change ch outside of this method!
+ ch = reader.read();
+ // Save the EOL state
+ if (firstEol == null) {
+ this.firstEol = Constants.CRLF;
+ }
+ }
+ // save EOL state here.
+ if (firstEol == null) {
+ if (ch == LF) {
+ this.firstEol = LF_STRING;
+ } else if (ch == CR) {
+ this.firstEol = CR_STRING;
+ }
+ }
+
+ return ch == LF || ch == CR;
}
// TODO escape handling needs more work
@@ -369,93 +458,4 @@ final class Lexer implements Closeable {
buffer.setLength(length);
}
}
-
- /**
- * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
- *
- * @return true if the given or next character is a line-terminator
- */
- boolean readEndOfLine(int ch) throws IOException {
- // check if we have \r\n...
- if (ch == CR && reader.lookAhead() == LF) {
- // note: does not change ch outside of this method!
- ch = reader.read();
- // Save the EOL state
- if (firstEol == null) {
- this.firstEol = Constants.CRLF;
- }
- }
- // save EOL state here.
- if (firstEol == null) {
- if (ch == LF) {
- this.firstEol = LF_STRING;
- } else if (ch == CR) {
- this.firstEol = CR_STRING;
- }
- }
-
- return ch == LF || ch == CR;
- }
-
- boolean isClosed() {
- return reader.isClosed();
- }
-
- /**
- * @return true if the given char is a whitespace character
- */
- boolean isWhitespace(final int ch) {
- return !isDelimiter(ch) && Character.isWhitespace((char) ch);
- }
-
- /**
- * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
- *
- * @param ch the character to check
- * @return true if the character is at the start of a line.
- */
- boolean isStartOfLine(final int ch) {
- return ch == LF || ch == CR || ch == UNDEFINED;
- }
-
- /**
- * @return true if the given character indicates end of file
- */
- boolean isEndOfFile(final int ch) {
- return ch == END_OF_STREAM;
- }
-
- boolean isDelimiter(final int ch) {
- return ch == delimiter;
- }
-
- boolean isEscape(final int ch) {
- return ch == escape;
- }
-
- boolean isQuoteChar(final int ch) {
- return ch == quoteChar;
- }
-
- boolean isCommentStart(final int ch) {
- return ch == commentStart;
- }
-
- private boolean isMetaChar(final int ch) {
- return ch == delimiter ||
- ch == escape ||
- ch == quoteChar ||
- ch == commentStart;
- }
-
- /**
- * Closes resources.
- *
- * @throws IOException
- * If an I/O error occurs
- */
- @Override
- public void close() throws IOException {
- reader.close();
- }
}
diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java
index 861e097..dff7d01 100644
--- a/src/main/java/org/apache/commons/csv/Token.java
+++ b/src/main/java/org/apache/commons/csv/Token.java
@@ -26,9 +26,6 @@ import static org.apache.commons.csv.Token.Type.INVALID;
*/
final class Token {
- /** length of the initial token (content-)buffer */
- private static final int INITIAL_TOKEN_LENGTH = 50;
-
enum Type {
/** Token has no valid content, i.e. is in its initialized state. */
INVALID,
@@ -46,6 +43,9 @@ final class Token {
COMMENT
}
+ /** length of the initial token (content-)buffer */
+ private static final int INITIAL_TOKEN_LENGTH = 50;
+
/** Token type */
Token.Type type = INVALID;