You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2019/03/25 02:29:57 UTC
[metamodel] 03/09: METAMODEL-1210: Added support for applying date
format.
This is an automated email from the ASF dual-hosted git repository.
kaspersor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metamodel.git
commit fa98e2958709a85b53280e5cb592c820d0a779e8
Author: Kasper Sørensen <i....@gmail.com>
AuthorDate: Wed Mar 13 03:10:55 2019 -0700
METAMODEL-1210: Added support for applying date format.
---
.../org/apache/metamodel/arff/ArffDataContext.java | 6 +-
.../org/apache/metamodel/arff/ArffDataSet.java | 72 +++++++++++++---------
2 files changed, 46 insertions(+), 32 deletions(-)
diff --git a/arff/src/main/java/org/apache/metamodel/arff/ArffDataContext.java b/arff/src/main/java/org/apache/metamodel/arff/ArffDataContext.java
index 6d4f5ed..0b4ca5d 100644
--- a/arff/src/main/java/org/apache/metamodel/arff/ArffDataContext.java
+++ b/arff/src/main/java/org/apache/metamodel/arff/ArffDataContext.java
@@ -55,7 +55,7 @@ public class ArffDataContext extends QueryPostprocessDataContext {
private static final String SECTION_ANNOTATION_DATA = "@data";
private static final Charset CHARSET = FileHelper.UTF_8_CHARSET;
private static final Pattern ATTRIBUTE_DEF_W_DATATYPE_PARAM =
- Pattern.compile("\\'?(.+)\\'?\\s+([a-zA-Z]+)\\s+\\'?(.+)\\'?");
+ Pattern.compile("\\'?(.+)\\'?\\s+([a-zA-Z]+)\\s+\\'(.+)\\'");
private final Splitter whitespaceSplitter = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
@@ -188,7 +188,7 @@ public class ArffDataContext extends QueryPostprocessDataContext {
if (isIgnoreLine(line)) {
continue;
}
- if (line.equals(SECTION_ANNOTATION_DATA)) {
+ if (line.toLowerCase().equals(SECTION_ANNOTATION_DATA)) {
// start of the data
break;
}
@@ -196,7 +196,7 @@ public class ArffDataContext extends QueryPostprocessDataContext {
} catch (IOException e) {
throw new UncheckedIOException(e);
}
- final ArffDataSet dataSet = new ArffDataSet(columns, reader);
+ final ArffDataSet dataSet = new ArffDataSet(resource, columns, reader);
if (maxRows > -1) {
return new MaxRowsDataSet(dataSet, maxRows);
} else {
diff --git a/arff/src/main/java/org/apache/metamodel/arff/ArffDataSet.java b/arff/src/main/java/org/apache/metamodel/arff/ArffDataSet.java
index abf1e5d..2d1df07 100644
--- a/arff/src/main/java/org/apache/metamodel/arff/ArffDataSet.java
+++ b/arff/src/main/java/org/apache/metamodel/arff/ArffDataSet.java
@@ -15,6 +15,7 @@ import org.apache.metamodel.query.SelectItem;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.util.NumberComparator;
+import org.apache.metamodel.util.Resource;
import com.opencsv.CSVParser;
import com.opencsv.ICSVParser;
@@ -22,16 +23,16 @@ import com.opencsv.ICSVParser;
public class ArffDataSet extends AbstractDataSet {
private final ICSVParser csvParser = new CSVParser(',', '\'');
+ private final Resource resource;
private final BufferedReader reader;
- private final int[] valueIndices;
- private final ColumnType[] valueTypes;
+ private final List<Column> columns;
private String line;
- public ArffDataSet(List<Column> columns, BufferedReader reader) {
+ public ArffDataSet(Resource resource, List<Column> columns, BufferedReader reader) {
super(columns.stream().map(c -> new SelectItem(c)).collect(Collectors.toList()));
- this.valueIndices = columns.stream().mapToInt(Column::getColumnNumber).toArray();
- this.valueTypes = columns.stream().map(Column::getType).toArray(ColumnType[]::new);
+ this.resource = resource;
+ this.columns = columns;
this.reader = reader;
}
@@ -57,38 +58,51 @@ public class ArffDataSet extends AbstractDataSet {
try {
stringValues = csvParser.parseLine(line);
} catch (IOException e) {
- throw new UncheckedIOException(e);
+ throw new UncheckedIOException(resource.getName() + ": Failed to CSV-parse data line: " + line, e);
}
- final Object[] values = new Object[valueIndices.length];
- for (int i = 0; i < valueIndices.length; i++) {
- final int index = valueIndices[i];
+ final Object[] values = new Object[columns.size()];
+ for (int i = 0; i < values.length; i++) {
+ final Column column = columns.get(i);
+ final int index = column.getColumnNumber();
final String stringValue = stringValues[index];
- final ColumnType type = valueTypes[i];
- if (type.isNumber()) {
- if (stringValue.isEmpty() || "?".equals(stringValue)) {
- values[i] = null;
+ values[i] = convertValue(stringValue, column);
+ }
+ return new DefaultRow(getHeader(), values);
+ }
+
+ private Object convertValue(String stringValue, Column column) {
+ final ColumnType type = column.getType();
+ if (type.isNumber()) {
+ if (stringValue.isEmpty() || "?".equals(stringValue)) {
+ return null;
+ } else {
+ final Number n = NumberComparator.toNumber(stringValue);
+ if (type == ColumnType.INTEGER) {
+ return n.intValue();
} else {
- final Number n = NumberComparator.toNumber(stringValue);
- if (type == ColumnType.INTEGER) {
- values[i] = n.intValue();
- } else {
- values[i] = n;
- }
- }
- } else if (type.isTimeBased()) {
- // TODO: extract format from column remarks
- try {
- values[i] = new SimpleDateFormat("yyyy-MM-dd").parse(stringValue);
- } catch (ParseException e) {
- throw new IllegalStateException(e);
+ return n;
}
+ }
+ } else if (type.isTimeBased()) {
+ final String columnRemarks = column.getRemarks();
+ final SimpleDateFormat dateFormat;
+ if (columnRemarks.toLowerCase().startsWith("date ")) {
+ // date format follows "date "
+ dateFormat = new SimpleDateFormat(columnRemarks.substring(5));
} else {
- values[i] = stringValue;
+ // assume standard date format
+ dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ }
+ try {
+ return dateFormat.parse(stringValue);
+ } catch (ParseException e) {
+ throw new IllegalStateException(resource.getName() + ": Failed to parse '" + stringValue
+ + "' using format '" + dateFormat.toPattern() + "'", e);
}
+ } else {
+ return stringValue;
}
-
- return new DefaultRow(getHeader(), values);
}
@Override