You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2013/08/02 14:24:04 UTC

[2/9] git commit: Performance improvement to CSV reading when values are only single line based.

Performance improvement to CSV reading when values are only single line
based.

Project: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/commit/72d0608b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/tree/72d0608b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/diff/72d0608b

Branch: refs/heads/master
Commit: 72d0608b2ba1dd3878bcef3eb51e6369c4ccdaf3
Parents: 10b5b77
Author: kaspers <ka...@kaspers-think.humaninference.com>
Authored: Fri Jul 26 10:44:29 2013 +0200
Committer: kaspers <ka...@kaspers-think.humaninference.com>
Committed: Fri Jul 26 10:44:29 2013 +0200

----------------------------------------------------------------------
 .../apache/metamodel/csv/CsvConfiguration.java  | 270 ++++++++++---------
 .../apache/metamodel/csv/CsvDataContext.java    |  43 ++-
 .../org/apache/metamodel/csv/CsvDataSet.java    |   2 +-
 .../metamodel/csv/SingleLineCsvDataSet.java     | 124 +++++++++
 .../apache/metamodel/csv/SingleLineCsvRow.java  |  57 ++++
 .../metamodel/csv/CsvBigFileMemoryTest.java     | 173 +++++++-----
 6 files changed, 463 insertions(+), 206 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/main/java/org/apache/metamodel/csv/CsvConfiguration.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/CsvConfiguration.java b/csv/src/main/java/org/apache/metamodel/csv/CsvConfiguration.java
index e2cd846..1662eab 100644
--- a/csv/src/main/java/org/apache/metamodel/csv/CsvConfiguration.java
+++ b/csv/src/main/java/org/apache/metamodel/csv/CsvConfiguration.java
@@ -31,130 +31,148 @@ import org.apache.metamodel.util.FileHelper;
  */
 public final class CsvConfiguration extends BaseObject implements Serializable {
 
-	private static final long serialVersionUID = 1L;
-
-	/**
-	 * The value is '\\uFFFF', the "not a character" value which should not
-	 * occur in any valid Unicode string. This special char can be used to
-	 * disable either quote chars or escape chars.
-	 */
-	public static final char NOT_A_CHAR = '\uFFFF';
-	public static final int NO_COLUMN_NAME_LINE = 0;
-	public static final int DEFAULT_COLUMN_NAME_LINE = 1;
-	public static final char DEFAULT_SEPARATOR_CHAR = ',';
-	public static final char DEFAULT_QUOTE_CHAR = '"';
-	public static final char DEFAULT_ESCAPE_CHAR = '\\';
-
-	private final int columnNameLineNumber;
-	private final String encoding;
-	private final char separatorChar;
-	private final char quoteChar;
-	private final char escapeChar;
-	private final boolean failOnInconsistentRowLength;
-
-	public CsvConfiguration() {
-		this(DEFAULT_COLUMN_NAME_LINE);
-	}
-
-	public CsvConfiguration(int columnNameLineNumber) {
-		this(columnNameLineNumber, FileHelper.DEFAULT_ENCODING,
-				DEFAULT_SEPARATOR_CHAR, DEFAULT_QUOTE_CHAR, DEFAULT_ESCAPE_CHAR);
-	}
-
-	public CsvConfiguration(int columnNameLineNumber, String encoding,
-			char separatorChar, char quoteChar, char escapeChar) {
-		this(columnNameLineNumber, encoding, separatorChar, quoteChar,
-				escapeChar, false);
-	}
-
-	public CsvConfiguration(int columnNameLineNumber, String encoding,
-			char separatorChar, char quoteChar, char escapeChar,
-			boolean failOnInconsistentRowLength) {
-		this.columnNameLineNumber = columnNameLineNumber;
-		this.encoding = encoding;
-		this.separatorChar = separatorChar;
-		this.quoteChar = quoteChar;
-		this.escapeChar = escapeChar;
-		this.failOnInconsistentRowLength = failOnInconsistentRowLength;
-	}
-
-	/**
-	 * Determines whether to fail (by throwing an
-	 * {@link InconsistentRowLengthException}) if a line in the CSV file has
-	 * inconsistent amounts of columns.
-	 * 
-	 * If set to false (default) MetaModel will gracefully fill in missing null
-	 * values in or ignore additional values in a line.
-	 * 
-	 * @return a boolean indicating whether to fail or gracefully compensate for
-	 *         inconsistent lines in the CSV files.
-	 */
-	public boolean isFailOnInconsistentRowLength() {
-		return failOnInconsistentRowLength;
-	}
-
-	/**
-	 * The line number (1 based) from which to get the names of the columns.
-	 * 
-	 * @return the line number (1 based)
-	 */
-	public int getColumnNameLineNumber() {
-		return columnNameLineNumber;
-	}
-
-	/**
-	 * Gets the file encoding to use for reading the file.
-	 * 
-	 * @return the text encoding of the file.
-	 */
-	public String getEncoding() {
-		return encoding;
-	}
-
-	/**
-	 * Gets the separator char (typically comma or semicolon) for separating
-	 * values.
-	 * 
-	 * @return the separator char
-	 */
-	public char getSeparatorChar() {
-		return separatorChar;
-	}
-
-	/**
-	 * Gets the quote char, used for encapsulating values.
-	 * 
-	 * @return the quote char
-	 */
-	public char getQuoteChar() {
-		return quoteChar;
-	}
-
-	/**
-	 * Gets the escape char, used for escaping eg. quote chars inside values.
-	 * 
-	 * @return the escape char
-	 */
-	public char getEscapeChar() {
-		return escapeChar;
-	}
-
-	@Override
-	protected void decorateIdentity(List<Object> identifiers) {
-		identifiers.add(columnNameLineNumber);
-		identifiers.add(encoding);
-		identifiers.add(separatorChar);
-		identifiers.add(quoteChar);
-		identifiers.add(escapeChar);
-		identifiers.add(failOnInconsistentRowLength);
-	}
-
-	@Override
-	public String toString() {
-		return "CsvConfiguration[columnNameLineNumber=" + columnNameLineNumber
-				+ ", encoding=" + encoding + ", separatorChar=" + separatorChar
-				+ ", quoteChar=" + quoteChar + ", escapeChar=" + escapeChar
-				+ ", failOnInconsistentRowLength="
-				+ failOnInconsistentRowLength + "]";
-	}
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The value is '\\uFFFF', the "not a character" value which should not
+     * occur in any valid Unicode string. This special char can be used to
+     * disable either quote chars or escape chars.
+     */
+    public static final char NOT_A_CHAR = '\uFFFF';
+    public static final int NO_COLUMN_NAME_LINE = 0;
+    public static final int DEFAULT_COLUMN_NAME_LINE = 1;
+    public static final char DEFAULT_SEPARATOR_CHAR = ',';
+    public static final char DEFAULT_QUOTE_CHAR = '"';
+    public static final char DEFAULT_ESCAPE_CHAR = '\\';
+
+    private final int columnNameLineNumber;
+    private final String encoding;
+    private final char separatorChar;
+    private final char quoteChar;
+    private final char escapeChar;
+    private final boolean failOnInconsistentRowLength;
+    private final boolean multilineValues;
+
+    public CsvConfiguration() {
+        this(DEFAULT_COLUMN_NAME_LINE);
+    }
+
+    public CsvConfiguration(int columnNameLineNumber) {
+        this(columnNameLineNumber, FileHelper.DEFAULT_ENCODING, DEFAULT_SEPARATOR_CHAR, DEFAULT_QUOTE_CHAR,
+                DEFAULT_ESCAPE_CHAR);
+    }
+
+    public CsvConfiguration(int columnNameLineNumber, boolean failOnInconsistentRowLength, boolean multilineValues) {
+        this(columnNameLineNumber, FileHelper.DEFAULT_ENCODING, DEFAULT_SEPARATOR_CHAR, DEFAULT_QUOTE_CHAR,
+                DEFAULT_ESCAPE_CHAR, failOnInconsistentRowLength, multilineValues);
+    }
+
+    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
+            char escapeChar) {
+        this(columnNameLineNumber, encoding, separatorChar, quoteChar, escapeChar, false);
+    }
+
+    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
+            char escapeChar, boolean failOnInconsistentRowLength) {
+        this(columnNameLineNumber, encoding, separatorChar, quoteChar, escapeChar, failOnInconsistentRowLength, true);
+    }
+
+    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
+            char escapeChar, boolean failOnInconsistentRowLength, boolean multilineValues) {
+        this.columnNameLineNumber = columnNameLineNumber;
+        this.encoding = encoding;
+        this.separatorChar = separatorChar;
+        this.quoteChar = quoteChar;
+        this.escapeChar = escapeChar;
+        this.failOnInconsistentRowLength = failOnInconsistentRowLength;
+        this.multilineValues = multilineValues;
+    }
+
+    /**
+     * Determines whether to fail (by throwing an
+     * {@link InconsistentRowLengthException}) if a line in the CSV file has
+     * inconsistent amounts of columns.
+     * 
+     * If set to false (default) MetaModel will gracefully fill in missing null
+     * values in or ignore additional values in a line.
+     * 
+     * @return a boolean indicating whether to fail or gracefully compensate for
+     *         inconsistent lines in the CSV files.
+     */
+    public boolean isFailOnInconsistentRowLength() {
+        return failOnInconsistentRowLength;
+    }
+
+    /**
+     * Determines whether the CSV files read using this configuration should be
+     * allowed to have multiline values in them.
+     * 
+     * @return
+     */
+    public boolean isMultilineValues() {
+        return multilineValues;
+    }
+
+    /**
+     * The line number (1 based) from which to get the names of the columns.
+     * 
+     * @return the line number (1 based)
+     */
+    public int getColumnNameLineNumber() {
+        return columnNameLineNumber;
+    }
+
+    /**
+     * Gets the file encoding to use for reading the file.
+     * 
+     * @return the text encoding of the file.
+     */
+    public String getEncoding() {
+        return encoding;
+    }
+
+    /**
+     * Gets the separator char (typically comma or semicolon) for separating
+     * values.
+     * 
+     * @return the separator char
+     */
+    public char getSeparatorChar() {
+        return separatorChar;
+    }
+
+    /**
+     * Gets the quote char, used for encapsulating values.
+     * 
+     * @return the quote char
+     */
+    public char getQuoteChar() {
+        return quoteChar;
+    }
+
+    /**
+     * Gets the escape char, used for escaping eg. quote chars inside values.
+     * 
+     * @return the escape char
+     */
+    public char getEscapeChar() {
+        return escapeChar;
+    }
+
+    @Override
+    protected void decorateIdentity(List<Object> identifiers) {
+        identifiers.add(columnNameLineNumber);
+        identifiers.add(encoding);
+        identifiers.add(separatorChar);
+        identifiers.add(quoteChar);
+        identifiers.add(escapeChar);
+        identifiers.add(failOnInconsistentRowLength);
+    }
+
+    @Override
+    public String toString() {
+        return "CsvConfiguration[columnNameLineNumber=" + columnNameLineNumber + ", encoding=" + encoding
+                + ", separatorChar=" + separatorChar + ", quoteChar=" + quoteChar + ", escapeChar=" + escapeChar
+                + ", failOnInconsistentRowLength=" + failOnInconsistentRowLength + "]";
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/main/java/org/apache/metamodel/csv/CsvDataContext.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/CsvDataContext.java b/csv/src/main/java/org/apache/metamodel/csv/CsvDataContext.java
index 2b275a9..5effa2c 100644
--- a/csv/src/main/java/org/apache/metamodel/csv/CsvDataContext.java
+++ b/csv/src/main/java/org/apache/metamodel/csv/CsvDataContext.java
@@ -35,6 +35,7 @@ import org.apache.metamodel.QueryPostprocessDataContext;
 import org.apache.metamodel.UpdateScript;
 import org.apache.metamodel.UpdateableDataContext;
 import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.data.EmptyDataSet;
 import org.apache.metamodel.query.FilterItem;
 import org.apache.metamodel.schema.Column;
 import org.apache.metamodel.schema.Table;
@@ -46,6 +47,7 @@ import org.apache.metamodel.util.UrlResource;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import au.com.bytecode.opencsv.CSVParser;
 import au.com.bytecode.opencsv.CSVReader;
 
 /**
@@ -336,23 +338,50 @@ public final class CsvDataContext extends QueryPostprocessDataContext implements
     @Override
     public DataSet materializeMainSchemaTable(Table table, Column[] columns, int maxRows) {
         final int lineNumber = _configuration.getColumnNameLineNumber();
-        final CSVReader reader = createCsvReader(lineNumber);
         final int columnCount = table.getColumnCount();
+
+        final BufferedReader reader = FileHelper.getBufferedReader(_resource.read(), _configuration.getEncoding());
+
+        try {
+            // skip column header lines
+            for (int i = 0; i < lineNumber; i++) {
+                String line = reader.readLine();
+                if (line == null) {
+                    return new EmptyDataSet(columns);
+                }
+            }
+        } catch (IOException e) {
+            throw new MetaModelException("IOException occurred while reading from CSV resource: " + _resource, e);
+        }
+
         final boolean failOnInconsistentRowLength = _configuration.isFailOnInconsistentRowLength();
-        if (maxRows < 0) {
-            return new CsvDataSet(reader, columns, null, columnCount, failOnInconsistentRowLength);
-        } else {
-            return new CsvDataSet(reader, columns, maxRows, columnCount, failOnInconsistentRowLength);
+
+        final Integer maxRowsOrNull = (maxRows > 0 ? maxRows : null);
+
+        if (_configuration.isMultilineValues()) {
+            final CSVReader csvReader = createCsvReader(reader);
+            return new CsvDataSet(csvReader, columns, maxRowsOrNull, columnCount, failOnInconsistentRowLength);
         }
+
+        final CSVParser csvParser = new CSVParser(_configuration.getSeparatorChar(), _configuration.getQuoteChar(),
+                _configuration.getEscapeChar());
+        return new SingleLineCsvDataSet(reader, csvParser, columns, maxRowsOrNull, columnCount,
+                failOnInconsistentRowLength);
     }
 
     protected CSVReader createCsvReader(int skipLines) {
-        final Reader fileReader = FileHelper.getReader(_resource.read(), _configuration.getEncoding());
-        final CSVReader csvReader = new CSVReader(fileReader, _configuration.getSeparatorChar(),
+        final Reader reader = FileHelper.getReader(_resource.read(), _configuration.getEncoding());
+        final CSVReader csvReader = new CSVReader(reader, _configuration.getSeparatorChar(),
                 _configuration.getQuoteChar(), _configuration.getEscapeChar(), skipLines);
         return csvReader;
     }
 
+    protected CSVReader createCsvReader(BufferedReader reader) {
+        final CSVReader csvReader = new CSVReader(reader, _configuration.getSeparatorChar(),
+                _configuration.getQuoteChar(), _configuration.getEscapeChar());
+        return csvReader;
+    }
+
     @Override
     protected CsvSchema getMainSchema() throws MetaModelException {
         CsvSchema schema = new CsvSchema(getMainSchemaName(), this);

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/main/java/org/apache/metamodel/csv/CsvDataSet.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/CsvDataSet.java b/csv/src/main/java/org/apache/metamodel/csv/CsvDataSet.java
index b69ab13..4bfb18c 100644
--- a/csv/src/main/java/org/apache/metamodel/csv/CsvDataSet.java
+++ b/csv/src/main/java/org/apache/metamodel/csv/CsvDataSet.java
@@ -83,7 +83,7 @@ final class CsvDataSet extends AbstractDataSet {
 			return false;
 		}
 	}
-
+	
 	private boolean nextInternal() {
 		if (_reader == null) {
 			return false;

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvDataSet.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvDataSet.java b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvDataSet.java
new file mode 100644
index 0000000..2d8a800
--- /dev/null
+++ b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvDataSet.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.csv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+
+import org.apache.metamodel.MetaModelException;
+import org.apache.metamodel.data.AbstractDataSet;
+import org.apache.metamodel.data.DataSetHeader;
+import org.apache.metamodel.data.Row;
+import org.apache.metamodel.schema.Column;
+import org.apache.metamodel.util.FileHelper;
+
+import au.com.bytecode.opencsv.CSVParser;
+
+/**
+ * A specialized DataSet implementation for the CSV module under circumstances
+ * where multiline values are disabled. In this case we can use a optimized
+ * CSVParser and also lazy evaluate lines read from the file.
+ */
+final class SingleLineCsvDataSet extends AbstractDataSet {
+
+    private final BufferedReader _reader;
+    private final CSVParser _csvParser;
+    private final int _columnsInTable;
+    private final boolean _failOnInconsistentRowLength;
+
+    private volatile int _rowNumber;
+    private volatile Integer _rowsRemaining;
+    private volatile Row _row;
+
+    public SingleLineCsvDataSet(BufferedReader reader, CSVParser csvParser, Column[] columns, Integer maxRows,
+            int columnsInTable, boolean failOnInconsistentRowLength) {
+        super(columns);
+        _reader = reader;
+        _csvParser = csvParser;
+        _columnsInTable = columnsInTable;
+        _failOnInconsistentRowLength = failOnInconsistentRowLength;
+        _rowNumber = 0;
+        _rowsRemaining = maxRows;
+    }
+
+    @Override
+    public void close() {
+        FileHelper.safeClose(_reader);
+        _row = null;
+        _rowsRemaining = null;
+    }
+
+    @Override
+    public boolean next() {
+        if (_rowsRemaining != null && _rowsRemaining > 0) {
+            _rowsRemaining--;
+            return nextInternal();
+        } else if (_rowsRemaining == null) {
+            return nextInternal();
+        } else {
+            return false;
+        }
+    }
+
+    @Override
+    protected DataSetHeader getHeader() {
+        // re-make this method protected so that it's visible for
+        // SingleLineCsvRow.
+        return super.getHeader();
+    }
+
+    protected boolean isFailOnInconsistentRowLength() {
+        return _failOnInconsistentRowLength;
+    }
+
+    protected int getColumnsInTable() {
+        return _columnsInTable;
+    }
+
+    protected CSVParser getCsvParser() {
+        return _csvParser;
+    }
+
+    public boolean nextInternal() {
+        if (_reader == null) {
+            return false;
+        }
+
+        try {
+            final String line = _reader.readLine();
+            if (line == null) {
+                close();
+                return false;
+            }
+
+            _rowNumber++;
+            _row = new SingleLineCsvRow(this, line, _columnsInTable, _failOnInconsistentRowLength, _rowNumber);
+            return true;
+        } catch (IOException e) {
+            close();
+            throw new MetaModelException("IOException occurred while reading next line of CSV resource", e);
+        }
+    }
+
+    @Override
+    public Row getRow() {
+        return _row;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
new file mode 100644
index 0000000..ee93974
--- /dev/null
+++ b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
@@ -0,0 +1,57 @@
+package org.apache.metamodel.csv;
+
+import org.apache.metamodel.data.AbstractRow;
+import org.apache.metamodel.data.DataSetHeader;
+import org.apache.metamodel.data.Style;
+import org.apache.metamodel.util.LazyRef;
+
+import au.com.bytecode.opencsv.CSVParser;
+
+/**
+ * Specialized row implementation for single-line CSV values
+ */
+final class SingleLineCsvRow extends AbstractRow {
+
+    private static final long serialVersionUID = 1L;
+
+    private final SingleLineCsvDataSet _dataSet;
+    private final LazyRef<String[]> _valuesRef;
+
+    public SingleLineCsvRow(SingleLineCsvDataSet dataSet, final String line, final int columnsInTable,
+            final boolean failOnInconsistentRowLength, final int rowNumber) {
+        _dataSet = dataSet;
+        _valuesRef = new LazyRef<String[]>() {
+            @Override
+            protected String[] fetch() throws Throwable {
+                final CSVParser parser = _dataSet.getCsvParser();
+                final String[] values = parser.parseLine(line);
+
+                if (failOnInconsistentRowLength) {
+                    if (columnsInTable != values.length) {
+                        throw new InconsistentRowLengthException(columnsInTable, SingleLineCsvRow.this, values,
+                                rowNumber);
+                    }
+                }
+
+                return values;
+            }
+        };
+    }
+
+    @Override
+    public Object getValue(int index) throws IndexOutOfBoundsException {
+        String[] values = _valuesRef.get();
+        return values[index];
+    }
+
+    @Override
+    public Style getStyle(int index) throws IndexOutOfBoundsException {
+        return Style.NO_STYLE;
+    }
+
+    @Override
+    protected DataSetHeader getHeader() {
+        return _dataSet.getHeader();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/72d0608b/csv/src/test/java/org/apache/metamodel/csv/CsvBigFileMemoryTest.java
----------------------------------------------------------------------
diff --git a/csv/src/test/java/org/apache/metamodel/csv/CsvBigFileMemoryTest.java b/csv/src/test/java/org/apache/metamodel/csv/CsvBigFileMemoryTest.java
index 54c6987..c7243bd 100644
--- a/csv/src/test/java/org/apache/metamodel/csv/CsvBigFileMemoryTest.java
+++ b/csv/src/test/java/org/apache/metamodel/csv/CsvBigFileMemoryTest.java
@@ -19,85 +19,114 @@
 package org.apache.metamodel.csv;
 
 import java.io.File;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import junit.framework.TestCase;
 
 import org.apache.metamodel.DataContext;
-import org.apache.metamodel.csv.CsvConfiguration;
-import org.apache.metamodel.csv.CsvDataContext;
 import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.data.Row;
 import org.apache.metamodel.query.Query;
 import org.apache.metamodel.query.SelectItem;
 import org.apache.metamodel.schema.Table;
 
-import junit.framework.TestCase;
-
 public class CsvBigFileMemoryTest extends TestCase {
 
-	private final int hugeFileRows = 3000;
-	private final int hugeFileCols = 2000;
-
-	private File getHugeFile() {
-		final File file = new File("target/huge_csv.csv");
-		if (!file.exists()) {
-
-			final ExampleDataGenerator exampleDataGenerator = new ExampleDataGenerator(
-					hugeFileRows, hugeFileCols);
-			exampleDataGenerator.createFile(file);
-		}
-		return file;
-	}
-
-	/**
-	 * Runs a performance test based on the data created by the
-	 * ExampleDataCreator utility.
-	 * 
-	 * @see ExampleDataGenerator
-	 * @throws Exception
-	 */
-	public void testHugeFile() throws Exception {
-		final File file = getHugeFile();
-
-		final long timeAtStart = System.currentTimeMillis();
-		System.out.println("time at start: " + timeAtStart);
-
-		final DataContext dc = new CsvDataContext(file, new CsvConfiguration());
-		final Table t = dc.getDefaultSchema().getTables()[0];
-
-		final long timeAfterDataContext = System.currentTimeMillis();
-		System.out.println("time after DataContext: " + timeAfterDataContext);
-
-		final Query q = new Query().select(t.getColumns()).from(t);
-		DataSet ds = dc.executeQuery(q);
-
-		long timeAfterQuery = System.currentTimeMillis();
-		System.out.println("time after query: " + timeAfterQuery);
-
-		while (ds.next()) {
-			assertEquals(hugeFileCols, ds.getRow().getValues().length);
-		}
-		ds.close();
-
-		long timeAfterDataSet = System.currentTimeMillis();
-		System.out.println("time after dataSet: " + timeAfterDataSet);
-
-		if (!file.delete()) {
-			file.deleteOnExit();
-		}
-	}
-
-	public void testApproximatedCountHugeFile() throws Exception {
-		DataContext dc = new CsvDataContext(getHugeFile());
-
-		Table table = dc.getDefaultSchema().getTables()[0];
-		Query q = dc.query().from(table).selectCount().toQuery();
-		SelectItem selectItem = q.getSelectClause().getItem(0);
-		selectItem.setFunctionApproximationAllowed(true);
-
-		DataSet ds = dc.executeQuery(q);
-		assertTrue(ds.next());
-		Object[] values = ds.getRow().getValues();
-		assertEquals(1, values.length);
-		assertEquals(3332, ((Long) ds.getRow().getValue(selectItem)).intValue());
-		assertEquals(3332, ((Long) values[0]).intValue());
-		assertFalse(ds.next());
-	}
+    private final int hugeFileRows = 30000;
+    private final int hugeFileCols = 2000;
+
+    private File getHugeFile() {
+        final File file = new File("target/huge_csv.csv");
+        if (!file.exists()) {
+
+            final ExampleDataGenerator exampleDataGenerator = new ExampleDataGenerator(hugeFileRows, hugeFileCols);
+            exampleDataGenerator.createFile(file);
+        }
+        return file;
+    }
+
+    /**
+     * Runs a performance test based on the data created by the
+     * ExampleDataCreator utility.
+     * 
+     * @see ExampleDataGenerator
+     * @throws Exception
+     */
+    public void testHugeFile() throws Exception {
+        final File file = getHugeFile();
+
+        final long timeAtStart = System.currentTimeMillis();
+        System.out.println("time at start: " + timeAtStart);
+
+        final DataContext dc = new CsvDataContext(file, new CsvConfiguration(1, false, false));
+        final Table t = dc.getDefaultSchema().getTables()[0];
+
+        final long timeAfterDataContext = System.currentTimeMillis();
+        System.out.println("time after DataContext: " + timeAfterDataContext);
+
+        final Query q = new Query().select(t.getColumns()).from(t);
+        DataSet ds = dc.executeQuery(q);
+
+        long timeAfterQuery = System.currentTimeMillis();
+        System.out.println("time after query: " + timeAfterQuery);
+
+        final CountDownLatch countDown = new CountDownLatch(hugeFileRows);
+        final AtomicBoolean success = new AtomicBoolean(true);
+
+        ExecutorService executorService = Executors.newFixedThreadPool(30);
+
+        while (ds.next()) {
+            final Row row = ds.getRow();
+            executorService.submit(new Runnable() {
+                @Override
+                public void run() {
+                    if (hugeFileCols != row.getValues().length) {
+                        System.out.println("Weird row: " + row);
+                        success.set(false);
+                    }
+                    countDown.countDown();
+                }
+            });
+        }
+        ds.close();
+
+        countDown.await();
+        assertTrue(success.get());
+
+        executorService.shutdown();
+
+        long timeAfterDataSet = System.currentTimeMillis();
+        System.out.println("time after dataSet: " + timeAfterDataSet);
+
+        long totalTime = timeAfterDataSet - timeAfterDataContext;
+        System.out.println("Total time to process large file: " + totalTime + " millis");
+
+        // results with old impl: [13908, 13827, 14577]
+        
+        // results with new impl: [8567, 8965, 8154]
+
+        if (!file.delete()) {
+            file.deleteOnExit();
+        }
+    }
+
+    public void testApproximatedCountHugeFile() throws Exception {
+        DataContext dc = new CsvDataContext(getHugeFile());
+
+        Table table = dc.getDefaultSchema().getTables()[0];
+        Query q = dc.query().from(table).selectCount().toQuery();
+        SelectItem selectItem = q.getSelectClause().getItem(0);
+        selectItem.setFunctionApproximationAllowed(true);
+
+        DataSet ds = dc.executeQuery(q);
+        assertTrue(ds.next());
+        Object[] values = ds.getRow().getValues();
+        assertEquals(1, values.length);
+        assertEquals(3332, ((Long) ds.getRow().getValue(selectItem)).intValue());
+        assertEquals(3332, ((Long) values[0]).intValue());
+        assertFalse(ds.next());
+    }
 }