You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@taverna.apache.org by st...@apache.org on 2015/02/17 12:37:09 UTC
[49/70] [abbrv] incubator-taverna-common-activities git commit:
taverna-spreadsheet-import-activity/
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java b/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
deleted file mode 100644
index 1c3a8f5..0000000
--- a/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2009 The University of Manchester
- *
- * Modifications to the initial code base are copyright of their
- * respective authors, or their employers as appropriate.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- ******************************************************************************/
-package net.sf.taverna.t2.activities.spreadsheet;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.node.ArrayNode;
-import com.fasterxml.jackson.databind.node.JsonNodeFactory;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-/**
- * Unit tests for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils}.
- *
- * @author David Withers
- */
-public class SpreadsheetUtilsTest {
-
- @Test
- public void testSpreadsheetUtils() {
- assertNotNull(new SpreadsheetUtils());
- }
-
- /**
- * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getColumnIndex(java.lang.String)}.
- */
- @Test
- public void testGetColumnIndex() {
- assertEquals(0, SpreadsheetUtils.getColumnIndex("A"));
- assertEquals(4, SpreadsheetUtils.getColumnIndex("E"));
- assertEquals(25, SpreadsheetUtils.getColumnIndex("Z"));
- assertEquals(26, SpreadsheetUtils.getColumnIndex("AA"));
- assertEquals(457833, SpreadsheetUtils.getColumnIndex("ZAFZ"));
- }
-
- /**
- * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getColumnLabel(int)}.
- */
- @Test
- public void testGetColumnLabel() {
- assertEquals("A", SpreadsheetUtils.getColumnLabel(0));
- assertEquals("E", SpreadsheetUtils.getColumnLabel(4));
- assertEquals("Z", SpreadsheetUtils.getColumnLabel(25));
- assertEquals("AA", SpreadsheetUtils.getColumnLabel(26));
- assertEquals("ZAFZ", SpreadsheetUtils.getColumnLabel(457833));
- }
-
- /**
- * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getPortName(java.lang.String, java.util.Map)}.
- */
- @Test
- public void testGetPortNameStringMapOfStringString() {
- assertEquals("A", SpreadsheetUtils.getPortName("A", null));
- assertEquals("AABR", SpreadsheetUtils.getPortName("AABR", null));
- ObjectNode configuration = JsonNodeFactory.instance.objectNode();
- ArrayNode columnNames = configuration.arrayNode();
- columnNames.addObject().put("column", "B").put("port", "beta");
- configuration.put("columnNames", columnNames);
- assertEquals("beta", SpreadsheetUtils.getPortName("B", configuration));
- assertEquals("T", SpreadsheetUtils.getPortName("T", configuration));
- }
-
- /**
- * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getPortName(int, java.util.Map)}.
- */
- @Test
- public void testGetPortNameIntMapOfStringString() {
- assertEquals("A", SpreadsheetUtils.getPortName(0, null));
- assertEquals("AA", SpreadsheetUtils.getPortName(26, null));
- ObjectNode configuration = JsonNodeFactory.instance.objectNode();
- ArrayNode columnNames = configuration.arrayNode();
- columnNames.addObject().put("column", "D").put("port", "delta");
- configuration.put("columnNames", columnNames);
- assertEquals("delta", SpreadsheetUtils.getPortName(3, configuration));
- assertEquals("AB", SpreadsheetUtils.getPortName(27, configuration));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.csv
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.csv b/src/test/resources/test-spreadsheet.csv
deleted file mode 100644
index bbbc3f3..0000000
--- a/src/test/resources/test-spreadsheet.csv
+++ /dev/null
@@ -1 +0,0 @@
-A,5,C,1
A,5,C,1
TRUE,15/06/09,,2
X,X,X,X
X,X,X,X
y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.ods
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.ods b/src/test/resources/test-spreadsheet.ods
deleted file mode 100644
index e4dbd98..0000000
Binary files a/src/test/resources/test-spreadsheet.ods and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.xls
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.xls b/src/test/resources/test-spreadsheet.xls
deleted file mode 100644
index 12a2536..0000000
Binary files a/src/test/resources/test-spreadsheet.xls and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.xlsx
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.xlsx b/src/test/resources/test-spreadsheet.xlsx
deleted file mode 100644
index 2accc38..0000000
Binary files a/src/test/resources/test-spreadsheet.xlsx and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/pom.xml
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/pom.xml b/taverna-spreadsheet-import-activity/pom.xml
new file mode 100644
index 0000000..7369bf6
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/pom.xml
@@ -0,0 +1,138 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>net.sf.taverna</groupId>
+ <artifactId>taverna-parent</artifactId>
+ <version>3.0.1-SNAPSHOT</version>
+ </parent>
+ <groupId>net.sf.taverna.t2.activities</groupId>
+ <artifactId>spreadsheet-import-activity</artifactId>
+ <version>2.0.1-SNAPSHOT</version>
+ <packaging>bundle</packaging>
+ <name>Taverna 2 Spreadsheet Import Activity</name>
+ <description>Activity to import data from spreadsheet files</description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Private-Package>org.apache.poi.*;-split-package:=merge-first,org.openxmlformats.schemas.*,schemasMicrosoftCom*,schemaorg_apache_xmlbeans.system.*,org.odftoolkit.*,com.csvreader.*</Private-Package>
+ <Import-Package>!org.apache.poi.hdgf.extractor,
+ !org.apache.poi.hslf.extractor,
+ !org.apache.poi.hwpf.extractor,
+ !org.apache.poi.sl.*,
+ org.apache.xmlbeans.impl.schema,
+ !junit.*,
+ !org.openxmlformats.schemas.*,
+ !schemasMicrosoftComOfficePowerpoint,
+ !schemasMicrosoftComOfficeWord,
+ *
+ </Import-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>net.sf.taverna.t2.core</groupId>
+ <artifactId>reference-api</artifactId>
+ <version>${t2.core.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.taverna.t2.core</groupId>
+ <artifactId>workflowmodel-api</artifactId>
+ <version>${t2.core.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>${poi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.xmlbeans</groupId>
+ <artifactId>xmlbeans</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.odftoolkit</groupId>
+ <artifactId>odfdom-java</artifactId>
+ <version>${odfdom.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.javacsv</groupId>
+ <artifactId>javacsv</artifactId>
+ <version>${javacsv.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.xmlbeans</groupId>
+ <artifactId>com.springsource.org.apache.xmlbeans</artifactId>
+ <version>${xmlbeans.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.log4j</groupId>
+ <artifactId>com.springsource.org.apache.log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.taverna.t2.activities</groupId>
+ <artifactId>activity-test-utils</artifactId>
+ <version>${t2.activities.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <repositories>
+ <!-- This repository is down (2012-03-19) and as the project has moved
+ to apache it's unclear if it will return. odfdom-java-0.7 artifact has been
+ deployed to the mygrid repository. -->
+ <!-- <repository> <id>odftoolkit</id> <name>Odftoolkit Repository</name>
+ <url>https://svn.odftoolkit.org/svn/odfdom~maven2/release</url> <releases>
+ <checksumPolicy>fail</checksumPolicy> </releases> <snapshots> <checksumPolicy>fail</checksumPolicy>
+ </snapshots> </repository> -->
+ <repository>
+ <releases />
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ <id>mygrid-repository</id>
+ <name>myGrid Repository</name>
+ <url>http://www.mygrid.org.uk/maven/repository</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>false</enabled>
+ </releases>
+ <snapshots />
+ <id>mygrid-snapshot-repository</id>
+ <name>myGrid Snapshot Repository</name>
+ <url>http://www.mygrid.org.uk/maven/snapshot-repository</url>
+ </repository>
+ </repositories>
+ <scm>
+ <connection>scm:git:https://github.com/taverna/taverna-spreadsheet-import-activity.git</connection>
+ <developerConnection>scm:git:ssh://git@github.com/taverna/taverna-spreadsheet-import-activity.git</developerConnection>
+ <url>https://github.com/taverna/taverna-spreadsheet-import-activity/</url>
+ <tag>HEAD</tag>
+ </scm>
+
+</project>
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
new file mode 100644
index 0000000..154019a
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import com.csvreader.CsvReader;
+
+/**
+ * Reads CSV files.
+ *
+ * @author David Withers
+ */
+public class CSVSpreadsheetReader implements SpreadsheetReader {
+
+ public void read(InputStream inputStream, Range rowRange,
+ Range columnRange, boolean ignoreBlankRows,
+ SpreadsheetRowProcessor rowProcessor)
+ throws SpreadsheetReadException {
+ CsvReader csvReader = new CsvReader(new InputStreamReader(inputStream));
+ csvReader.setSkipEmptyRecords(false);
+
+ SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+
+ try {
+ while(csvReader.readRecord()) {
+ int rowIndex = (int) csvReader.getCurrentRecord();
+ boolean blankRow = true;
+ if (rowRange.contains(rowIndex)) {
+ for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+ if (columnRange.contains(columnIndex)) {
+ String value = csvReader.get(columnIndex);
+ value = "".equals(value) ? null : value;
+ if (value != null) {
+ blankRow = false;
+ }
+ currentDataRow.put(columnIndex, value);
+ if (columnIndex == columnRange.getEnd()) {
+ if (!ignoreBlankRows || !blankRow) {
+ rowProcessor.processRow(rowIndex, currentDataRow);
+ }
+ currentDataRow = new TreeMap<Integer, String>();
+ }
+ }
+ }
+
+ if (rowIndex == rowRange.getEnd()) {
+ break;
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new SpreadsheetReadException("Unable to read CSV file", e);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
new file mode 100644
index 0000000..8237ec1
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
@@ -0,0 +1,324 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.log4j.Logger;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+
+/**
+ * Reads Excel '97 (.xls) or Excel '07 (.xlsx) spreadsheet files.
+ *
+ * @author David Withers
+ */
+public class ExcelSpreadsheetReader implements SpreadsheetReader {
+
+ private static Logger logger = Logger.getLogger(ExcelSpreadsheetReader.class);
+
+ public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor)
+ throws SpreadsheetReadException {
+ Workbook workbook;
+ try {
+ workbook = WorkbookFactory.create(inputStream);
+ } catch (InvalidFormatException e) {
+ throw new SpreadsheetReadException(
+ "The file does not have a compatible spreadsheet format", e);
+ } catch (IOException e) {
+ throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
+ } catch (IllegalArgumentException e) {
+ throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
+ }
+
+ DataFormatter dataFormatter = new DataFormatter();
+
+ workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
+ Sheet sheet = workbook.getSheetAt(0);
+
+ if (rowRange.getEnd() < 0) {
+ rowRange.setEnd(sheet.getLastRowNum());
+ logger.debug("No end of row range specified, setting to " + rowRange.getEnd());
+ }
+
+ SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+
+ for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
+ boolean blankRow = true;
+ if (rowRange.contains(rowIndex)) {
+ Row row = sheet.getRow(rowIndex);
+ for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+ if (columnRange.contains(columnIndex)) {
+ String value = null;
+ if (row != null) {
+ Cell cell = row.getCell(columnIndex);
+ if (cell != null) {
+ value = getCellValue(cell, dataFormatter);
+ }
+ }
+ if (value != null) {
+ blankRow = false;
+ }
+ currentDataRow.put(columnIndex, value);
+ if (columnIndex == columnRange.getEnd()) {
+ if (!ignoreBlankRows || !blankRow) {
+ rowProcessor.processRow(rowIndex, currentDataRow);
+ }
+ currentDataRow = new TreeMap<Integer, String>();
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ private String getCellValue(Cell cell, DataFormatter dataFormatter) {
+ String value = null;
+ switch (cell.getCellType()) {
+ case Cell.CELL_TYPE_BOOLEAN:
+ value = Boolean.toString(cell.getBooleanCellValue());
+ break;
+ case Cell.CELL_TYPE_NUMERIC:
+ if (DateUtil.isCellDateFormatted(cell)) {
+// value = cell.getDateCellValue().toString();
+ value = dataFormatter.formatCellValue(cell);
+ } else {
+ value = Double.toString(cell.getNumericCellValue());
+ }
+ break;
+ case Cell.CELL_TYPE_STRING:
+ value = cell.getStringCellValue();
+ break;
+ case Cell.CELL_TYPE_FORMULA:
+ switch (cell.getCachedFormulaResultType()) {
+ case Cell.CELL_TYPE_BOOLEAN:
+ value = Boolean.toString(cell.getBooleanCellValue());
+ break;
+ case Cell.CELL_TYPE_NUMERIC:
+ if (DateUtil.isCellDateFormatted(cell)) {
+ value = cell.getDateCellValue().toString();
+ } else {
+ value = Double.toString(cell.getNumericCellValue());
+ }
+ break;
+ case Cell.CELL_TYPE_STRING:
+ value = cell.getStringCellValue();
+ break;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ // value = dataFormatter.formatCellValue(cell);
+ // if ("".equals(value)) value = null;
+ return value;
+ }
+
+
+// /**
+// * Reads data from an HSSF stream.
+// *
+// * @param inputStream
+// * @param spreradsheetRowProcessor
+// * @throws IOException
+// * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
+// * duplicated) data is contained in one event so memory footprint isn't much smaller
+// */
+// public void readHSSF(InputStream inputStream, SpreadsheetRowProcessor spreradsheetRowProcessor)
+// throws IOException {
+// POIFSFileSystem poifs = new POIFSFileSystem(inputStream);
+//
+// // get the workbook part of the stream
+// InputStream documentInputStream = poifs.createDocumentInputStream("Workbook");
+//
+// RecordProcessor recordProcessor = new RecordProcessor(spreradsheetRowProcessor);
+// MissingRecordAwareHSSFListener hssfListener = new MissingRecordAwareHSSFListener(
+// recordProcessor);
+//
+// // listen for all records
+// HSSFRequest request = new HSSFRequest();
+// request.addListenerForAllRecords(hssfListener);
+//
+// HSSFEventFactory factory = new HSSFEventFactory();
+// factory.processEvents(request, documentInputStream);
+//
+// inputStream.close();
+// documentInputStream.close();
+// }
+//
+// /**
+// * Listener for processing events from an HSSF stream.
+// *
+// * @author David Withers
+// * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
+// * duplicated) data is contained in one event so memory footprint isn't much smaller
+// */
+// class RecordProcessor implements HSSFListener {
+//
+// private SpreadsheetRowProcessor spreradsheetRowProcessor;
+//
+// private SSTRecord sstrec;
+//
+// private boolean worksheetOpen = false;
+//
+// private int row, column;
+//
+// private Map<Integer, String> currentDataRow = new HashMap<Integer, String>();
+//
+// public RecordProcessor(SpreadsheetRowProcessor spreradsheetRowProcessor) {
+// this.spreradsheetRowProcessor = spreradsheetRowProcessor;
+// }
+//
+// public void processRecord(Record record) {
+// switch (record.getSid()) {
+// // the BOFRecord can represent either the beginning of a sheet or
+// // the workbook
+// case BOFRecord.sid:
+// BOFRecord bof = (BOFRecord) record;
+// if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
+// worksheetOpen = true;
+// }
+// break;
+// case EOFRecord.sid:
+// if (worksheetOpen) {
+// while (row < rowRange.getEnd()) {
+// row++;
+// if (rowRange.contains(row)) {
+// for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+// processCell(row, column, null);
+// }
+// spreradsheetRowProcessor.processRow(row, currentDataRow);
+// }
+// currentDataRow = new HashMap<Integer, String>();
+// }
+// worksheetOpen = false;
+// }
+// break;
+// // don't care about sheet name for now
+// // case BoundSheetRecord.sid:
+// // BoundSheetRecord bsr = (BoundSheetRecord) record;
+// // logger.info("New sheet named: " + bsr.getSheetname());
+// // break;
+// case RowRecord.sid:
+// // RowRecord rowRecord = (RowRecord) record;
+// // if (readAllRows) {
+// // int rowNumber = row.getRowNumber();
+// // if (rowNumber < minRow) {
+// // minRow = rowNumber;
+// // currentRow = rowNumber;
+// // }
+// // if (rowNumber > maxRow) {
+// // maxRow = rowNumber;
+// // }
+// // }
+// // if (readAllColumns) {
+// // int firstColumn = row.getFirstCol();
+// // int lastColumn = row.getLastCol() - 1;
+// // if (firstColumn < minColumn) {
+// // minColumn = firstColumn;
+// // currentColumn = firstColumn;
+// // }
+// // if (lastColumn > maxColumn) {
+// // maxColumn = lastColumn;
+// // }
+// // }
+//
+// break;
+// case NumberRecord.sid:
+// NumberRecord number = (NumberRecord) record;
+// row = number.getRow();
+// column = number.getColumn();
+// processCell(row, column, String.valueOf(number.getValue()));
+// break;
+// case SSTRecord.sid:
+// // SSTRecords store a array of unique strings used in Excel.
+// sstrec = (SSTRecord) record;
+// break;
+// case LabelSSTRecord.sid:
+// LabelSSTRecord label = (LabelSSTRecord) record;
+// row = label.getRow();
+// column = label.getColumn();
+// processCell(row, column, sstrec.getString(label.getSSTIndex()).getString());
+// break;
+// case BlankRecord.sid:
+// BlankRecord blank = (BlankRecord) record;
+// row = blank.getRow();
+// column = blank.getColumn();
+// processCell(row, column, null);
+// break;
+// }
+//
+// // Missing column
+// if (record instanceof MissingCellDummyRecord) {
+// MissingCellDummyRecord cell = (MissingCellDummyRecord) record;
+// row = cell.getRow();
+// column = cell.getColumn();
+// processCell(row, column, null);
+// }
+//
+// // Missing row
+// if (record instanceof MissingRowDummyRecord) {
+// MissingRowDummyRecord missingRow = (MissingRowDummyRecord) record;
+// row = missingRow.getRowNumber();
+// if (rowRange.contains(row)) {
+// for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+// processCell(row, column, null);
+// }
+// spreradsheetRowProcessor.processRow(row, currentDataRow);
+// }
+// currentDataRow = new HashMap<Integer, String>();
+// }
+//
+// // End of row
+// if (record instanceof LastCellOfRowDummyRecord) {
+// LastCellOfRowDummyRecord lastCell = (LastCellOfRowDummyRecord) record;
+// row = lastCell.getRow();
+// if (rowRange.contains(row)) {
+// int lastColumn = lastCell.getLastColumnNumber();
+// for (column = lastColumn + 1; column <= columnRange.getEnd(); column++) {
+// processCell(row, column, null);
+// }
+// spreradsheetRowProcessor.processRow(row, currentDataRow);
+// }
+// currentDataRow = new HashMap<Integer, String>();
+// }
+// }
+//
+// private void processCell(int row, int column, String value) {
+// if (rowRange.contains(row) && columnRange.contains(column)) {
+// currentDataRow.put(column, value);
+// }
+// }
+//
+// }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
new file mode 100644
index 0000000..2b01e00
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
@@ -0,0 +1,193 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.InputStream;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+
+import org.apache.log4j.Logger;
+import org.odftoolkit.odfdom.OdfFileDom;
+import org.odftoolkit.odfdom.doc.OdfDocument;
+import org.odftoolkit.odfdom.doc.table.OdfTableCell;
+import org.odftoolkit.odfdom.doc.table.OdfTableRow;
+import org.w3c.dom.NodeList;
+
+/**
+ * Reads Open Document Format (ODF) spreadsheet files.
+ *
+ * @author David Withers
+ */
+public class ODFSpreadsheetReader implements SpreadsheetReader {
+
+ private static Logger logger = Logger.getLogger(ODFSpreadsheetReader.class);
+
+ public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor)
+ throws SpreadsheetReadException {
+ NodeList rowList = null;
+ try {
+ // Load the ODF document
+ OdfDocument odfDoc = OdfDocument.loadDocument(inputStream);
+ logger.debug("Reading document of type : " + odfDoc.getMediaType());
+ // Get the content as DOM tree
+ OdfFileDom odfContent = odfDoc.getContentDom();
+ // Initialize XPath
+ XPath xpath = odfDoc.getXPath();
+ // Get the rows of the first table
+ String rowsPath = ("//table:table[1]/table:table-row");
+ rowList = (NodeList) xpath.evaluate(rowsPath, odfContent, XPathConstants.NODESET);
+ } catch (Exception e) {
+ if (e instanceof RuntimeException) {
+ throw (RuntimeException) e;
+ }
+ throw new SpreadsheetReadException("The spreadsheet file could not be read", e);
+ }
+
+ if (rowRange.getEnd() < 0) {
+ rowRange.setEnd(calculateRowCount(rowList) - 1);
+ }
+
+ SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+ int rowRep = 0;
+ for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
+ boolean blankRow = true;
+ OdfTableRow row = (OdfTableRow) rowList.item(rowIndex);
+ int columnRep = 0;
+ for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+ String value = null;
+ OdfTableCell cell = null;
+ if (row != null) {
+ cell = (OdfTableCell) row.getCellAt(columnIndex);
+ if (cell != null) {
+ String type = cell.getOfficeValueTypeAttribute();
+ if ("float".equals(type)) {
+ value = cell.getOfficeValueAttribute().toString();
+ } else if ("percentage".equals(type)) {
+ value = cell.getOfficeValueAttribute().toString();
+ } else if ("currency".equals(type)) {
+ value = cell.getOfficeValueAttribute().toString();
+ } else if ("date".equals(type)) {
+ value = cell.getOfficeDateValueAttribute();
+ } else if ("time".equals(type)) {
+ value = cell.getOfficeTimeValueAttribute();
+ } else if ("boolean".equals(type)) {
+ value = cell.getOfficeBooleanValueAttribute().toString();
+ } else if ("string".equals(type)) {
+ value = cell.getOfficeStringValueAttribute();
+ if (value == null) {
+ value = cell.getTextContent();
+ }
+ } else {
+ value = cell.getTextContent();
+ }
+ }
+ }
+ value = "".equals(value) ? null : value;
+ if (value != null) {
+ blankRow = false;
+ }
+ // if the cell is within the column range add it to the row values
+ if (columnRange.contains(columnIndex + columnRep)) {
+ currentDataRow.put(columnIndex + columnRep, value);
+ }
+ // check if this cell is repeated
+ int repeatedCells = cell == null ? 0 : cell
+ .getTableNumberColumnsRepeatedAttribute() - 1;
+ while (repeatedCells > 0 && columnIndex + columnRep < columnRange.getEnd()) {
+ columnRep++;
+ if (columnRange.contains(columnIndex + columnRep)) {
+ currentDataRow
+ .put(columnIndex + columnRep, value);
+ }
+ repeatedCells--;
+ }
+ // if it's the last cell in the range process the row
+ if (columnIndex == columnRange.getEnd()) {
+ if (rowRange.contains(rowIndex + rowRep)) {
+ if (!ignoreBlankRows || !blankRow) {
+ rowProcessor.processRow(rowIndex + rowRep, currentDataRow);
+ }
+ }
+ // check if this row is repeated
+ int repeatedRows = row == null ? 0
+ : row.getTableNumberRowsRepeatedAttribute() - 1;
+ while (repeatedRows > 0 && rowIndex + rowRep < rowRange.getEnd()) {
+ rowRep++;
+ if (rowRange.contains(rowIndex + rowRep)) {
+ if (!ignoreBlankRows || !blankRow) {
+ rowProcessor.processRow(rowIndex + rowRep, currentDataRow);
+ }
+ }
+ repeatedRows--;
+ }
+ currentDataRow = new TreeMap<Integer, String>();
+ }
+
+ }
+ }
+
+ }
+
+ /**
+ * Calculates the number of rows in a table, ignoring blank rows at the end of the table.
+ *
+ * @param rowList
+ * the list of rows in a table
+ * @return the number of rows in a table
+ */
+ private int calculateRowCount(NodeList rowList) {
+ int rowCount = 0;
+ int blankRows = 0;
+ for (int i = 0; i < rowList.getLength(); i++) {
+ OdfTableRow row = (OdfTableRow) rowList.item(i);
+ int repeatedRows = row.getTableNumberRowsRepeatedAttribute();
+ if (isBlankRow(row)) {
+ blankRows += repeatedRows;
+ } else {
+ rowCount += repeatedRows + blankRows;
+ blankRows = 0;
+ }
+ }
+ return rowCount;
+ }
+
+ /**
+ * Returns <code>true</code> if a row is blank (non of the cells contain data).
+ *
+ * @param row
+ * @return <code>true</code> if a row is blank
+ */
+ private boolean isBlankRow(OdfTableRow row) {
+ OdfTableCell cell = (OdfTableCell) row.getCellAt(0);
+ for (int i = 1; cell != null; i++) {
+ String cellContent = cell.getTextContent();
+ if (cellContent != null && !cellContent.equals("")) {
+ return false;
+ }
+ cell = (OdfTableCell) row.getCellAt(i);
+ }
+ return true;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
new file mode 100644
index 0000000..29f5e05
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
@@ -0,0 +1,298 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationBean;
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationProperty;
+
+/**
+ * A range of integer values.
+ *
+ * @author David Withers
+ */
+@ConfigurationBean(uri = SpreadsheetImportActivity.URI + "/Range")
+public class Range {
+
+ /**
+ * The (inclusive) start and end of this <code>Range</code>.
+ */
+ private int start, end;
+
+ /**
+ * <code>Range</code>s that are excluded from this <code>Range</code>.
+ */
+ private List<Range> excludes = new ArrayList<Range>();
+
+ /**
+ * Constructs a <code>Range</code>.
+ */
+ public Range() {
+ }
+
+ /**
+ * Constructs a <code>Range</code> with the specified start and end values.
+ *
+ * @param start
+ * the start of the range
+ * @param end
+ * the end of the range
+ */
+ public Range(int start, int end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ /**
+ * Constructs a <code>Range</code> with the specified start and end values and a
+ * <code>Range</code> of excluded values.
+ *
+ * @param start
+ * the start of the range
+ * @param end
+ * the end of the range
+ * @param exclude
+ * the range to exclude
+ */
+ public Range(int start, int end, Range exclude) {
+ this.start = start;
+ this.end = end;
+ if (exclude != null) {
+ excludes.add(exclude);
+ }
+ }
+
+ /**
+ * Constructs a <code>Range</code> with the specified start and end values and
+ * <code>Range</code>s of excluded values.
+ *
+ * @param start
+ * the start of the range
+ * @param end
+ * the end of the range
+ * @param excludes
+ * the ranges to exclude
+ */
+ public Range(int start, int end, List<Range> excludes) {
+ this.start = start;
+ this.end = end;
+ if (excludes != null) {
+ for (Range range : excludes) {
+ this.excludes.add(range);
+ }
+ }
+ }
+
+ /**
+ * Constructs a <code>Range</code> that is a deep copy of the specified range.
+ *
+ * @param range
+ * the <code>Range</code> to copy
+ */
+ public Range(Range range) {
+ this.start = range.start;
+ this.end = range.end;
+ if (range.excludes != null) {
+ for (Range excludeRange : range.excludes) {
+ excludes.add(new Range(excludeRange));
+ }
+ }
+ }
+
+ /**
+ * Returns <code>true</code> if <code>value</code> is included in this <code>Range</code>.
+ *
+ * @param value
+ * @return
+ */
+ public boolean contains(int value) {
+ if (value >= start && (value <= end || end < 0)) {
+ for (Range exclude : excludes) {
+ if (exclude.contains(value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns an array of the values contained in this <code>Range</code>.
+ *
+ * @return an array of the values contained in this <code>Range</code>
+ */
+ public int[] getRangeValues() {
+ List<Integer> rangeList = new ArrayList<Integer>();
+ for (int i = start; i <= end; i++) {
+ boolean excluded = false;
+ for (Range range : excludes) {
+ if (range.contains(i)) {
+ excluded = true;
+ break;
+ }
+ }
+ if (!excluded) {
+ rangeList.add(i);
+ }
+ }
+ int[] rangeArray = new int[rangeList.size()];
+ for (int i = 0; i < rangeArray.length; i++) {
+ rangeArray[i] = rangeList.get(i);
+ }
+ return rangeArray;
+ }
+
+ /**
+ * Returns the start of the <code>Range</code>.
+ *
+ * @return the start of the <code>Range</code>
+ */
+ public int getStart() {
+ return start;
+ }
+
+ /**
+ * Sets the start of the <code>Range</code>.
+ *
+ * @param start
+ * the new value for start of the <code>Range</code>
+ */
+ @ConfigurationProperty(name = "start", label = "Start", description = "The start of the range")
+ public void setStart(int start) {
+ this.start = start;
+ }
+
+ /**
+ * Returns the end of the <code>Range</code>.
+ *
+ * @return the end of the <code>Range</code>
+ */
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ * Sets the end of the range.
+ *
+ * @param end
+ * the new value for end of the <code>Range</code>
+ */
+ @ConfigurationProperty(name = "end", label = "End", description = "The end of the range")
+ public void setEnd(int end) {
+ this.end = end;
+ }
+
+ /**
+ * Adds a <code>Range</code> to be excluded from this <code>Range</code>.
+ *
+ * @param exclude
+ * a <code>Range</code> to be excluded
+ */
+ public void addExclude(Range exclude) {
+ excludes.add(exclude);
+ }
+
+ /**
+ * Removes a <code>Range</code> from the exclusions for this range.
+ *
+ * @param exclude
+ * a <code>Range</code> to be removed from the exclusions
+ */
+ public void removeExclude(Range exclude) {
+ excludes.remove(exclude);
+ }
+
+ /**
+ * Returns the exclusions for this range.
+ *
+ * @return the exclusions for this range
+ */
+ public List<Range> getExcludes() {
+ return excludes;
+ }
+
+ /**
+ * Sets the exclusions for this range.
+ *
+ * @param excludes
+ * the exclusions for this range
+ */
+ @ConfigurationProperty(name = "excludes", label = "Excludes Ranges", description = "The ranges the exclude from this range", required = false)
+ public void setExcludes(List<Range> excludes) {
+ this.excludes = excludes;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append('[');
+ result.append(start);
+ result.append("..");
+ result.append(end);
+ result.append(']');
+ return result.toString();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + end;
+ result = prime * result + ((excludes == null) ? 0 : excludes.hashCode());
+ result = prime * result + start;
+ return result;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ Range other = (Range) obj;
+ if (end != other.end)
+ return false;
+ if (excludes == null) {
+ if (other.excludes != null)
+ return false;
+ } else if (!excludes.equals(other.excludes))
+ return false;
+ if (start != other.start)
+ return false;
+ return true;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
new file mode 100644
index 0000000..ece75e5
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Enumeration of the policies for handling empty/missing cells in a spreadsheet.
+ * <p>
+ * <dl>
+ * <dt>EMPTY_STRING</dt>
+ * <dd>Use an empty string value ("")</dd>
+ * <dt>USER_DEFINED</dt>
+ * <dd>Use a value defined by the user</dd>
+ * <dt>GENERATE_ERROR</dt>
+ * <dd>Generate an ErrorDocument</dd>
+ * </dl>
+ *
+ * @author David Withers
+ */
+public enum SpreadsheetEmptyCellPolicy {
+ EMPTY_STRING, USER_DEFINED, GENERATE_ERROR
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
new file mode 100644
index 0000000..1f48ce8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
@@ -0,0 +1,358 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedMap;
+
+import net.sf.taverna.t2.invocation.InvocationContext;
+import net.sf.taverna.t2.reference.ExternalReferenceSPI;
+import net.sf.taverna.t2.reference.Identified;
+import net.sf.taverna.t2.reference.ReferenceService;
+import net.sf.taverna.t2.reference.ReferenceServiceException;
+import net.sf.taverna.t2.reference.ReferenceSet;
+import net.sf.taverna.t2.reference.T2Reference;
+import net.sf.taverna.t2.reference.ValueCarryingExternalReference;
+import net.sf.taverna.t2.workflowmodel.OutputPort;
+import net.sf.taverna.t2.workflowmodel.Port;
+import net.sf.taverna.t2.workflowmodel.processor.activity.AbstractAsynchronousActivity;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityConfigurationException;
+import net.sf.taverna.t2.workflowmodel.processor.activity.AsynchronousActivityCallback;
+
+import org.apache.log4j.Logger;
+
+import com.csvreader.CsvWriter;
+import com.fasterxml.jackson.databind.JsonNode;
+
+
+/**
+ * An {@link net.sf.taverna.t2.workflowmodel.processor.activity.Activity} that reads spreadsheet
+ * files.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportActivity extends AbstractAsynchronousActivity<JsonNode> {
+
+ public static final String URI = "http://ns.taverna.org.uk/2010/activity/spreadsheet-import";
+
+ public static final String INPUT_PORT_NAME = "fileurl";
+
+ public static final String OUTPUT_PORT_NAME = "output";
+
+ private static Logger logger = Logger.getLogger(SpreadsheetImportActivity.class);
+
+ private JsonNode configurationBean;
+
+ private Range rowRange, columnRange;
+
+ private boolean ignoreBlankRows;
+
+ private String missingCellValue;
+
+ private SpreadsheetEmptyCellPolicy emptyCellPolicy;
+
+ private SpreadsheetOutputFormat outputFormat;
+
+ private String csvDelimiter;
+
+ /**
+ * Constructs a SpreadsheetImport activity.
+ */
+ public SpreadsheetImportActivity() {
+ }
+
+ @Override
+ public void configure(JsonNode configurationBean)
+ throws ActivityConfigurationException {
+ this.configurationBean = configurationBean;
+ rowRange = SpreadsheetUtils.getRange(configurationBean.get("rowRange"));
+ logger.debug("Setting row range to " + rowRange);
+ columnRange = SpreadsheetUtils.getRange(configurationBean.get("columnRange"));
+ logger.debug("Setting column range to " + columnRange);
+ ignoreBlankRows = configurationBean.get("ignoreBlankRows").booleanValue();
+ missingCellValue = configurationBean.get("emptyCellValue").textValue();
+ logger.debug("Setting empty cell value to '" + missingCellValue + "'");
+ emptyCellPolicy = SpreadsheetEmptyCellPolicy.valueOf(configurationBean.get("emptyCellPolicy").textValue());
+ logger.debug("Setting empty cell policy to " + emptyCellPolicy);
+ outputFormat = SpreadsheetOutputFormat.valueOf(configurationBean.get("outputFormat").textValue());
+ logger.debug("Setting output format to " + outputFormat);
+ csvDelimiter = configurationBean.get("csvDelimiter").textValue();
+ logger.debug("Setting csv delimiter to '" + csvDelimiter + "'");
+// configurePorts();
+ }
+
+ private void configurePorts() {
+ removeInputs();
+ addInput(INPUT_PORT_NAME, 0, false, null, null);
+
+ removeOutputs();
+ if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+ for (int column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+ if (columnRange.contains(column)) {
+ addOutput(SpreadsheetUtils.getPortName(column, configurationBean), 1, 1);
+ }
+ }
+ } else {
+ addOutput(OUTPUT_PORT_NAME, 0, 0);
+ }
+ }
+
+ @Override
+ public JsonNode getConfiguration() {
+ return configurationBean;
+ }
+
+ @Override
+ public void executeAsynch(final Map<String, T2Reference> data,
+ final AsynchronousActivityCallback callback) {
+ callback.requestRun(new Runnable() {
+
+ public void run() {
+
+ Map<String, T2Reference> outputData = new HashMap<String, T2Reference>();
+
+ InvocationContext context = callback.getContext();
+ ReferenceService referenceService = context.getReferenceService();
+
+ try {
+ T2Reference inputRef = data.get(INPUT_PORT_NAME);
+
+ SpreadsheetRowProcessor spreadsheetRowProcessor = null;
+ Map<String, List<T2Reference>> outputLists = null;
+ StringWriter output = null;
+
+ if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+ outputLists = new HashMap<String, List<T2Reference>>();
+ for (Port port : getOutputPorts()) {
+ outputLists.put(port.getName(), new ArrayList<T2Reference>());
+ }
+ spreadsheetRowProcessor = new MultiplePortRowProcessor(referenceService, outputLists, context);
+ } else {
+ output = new StringWriter();
+ char csvDelimiterCharacter = ',';
+ if (csvDelimiter != null && csvDelimiter.length() > 0) {
+ csvDelimiterCharacter = csvDelimiter.charAt(0);
+ }
+ CsvWriter csvWriter = new CsvWriter(output, csvDelimiterCharacter);
+ csvWriter.setEscapeMode(CsvWriter.ESCAPE_MODE_DOUBLED);
+ csvWriter.setTextQualifier('"');
+ csvWriter.setUseTextQualifier(true);
+ spreadsheetRowProcessor = new SingleOutputRowProcessor(csvWriter);
+ }
+
+ InputStream inputStream = getInputStream(context, referenceService, inputRef);
+ if (inputStream == null) {
+ logger.warn("Input is not a file reference or a file name");
+ callback.fail("Input is not a file reference or a file name");
+ return;
+ }
+ try {
+ try {
+ new ExcelSpreadsheetReader().read(inputStream, new Range(rowRange),
+ new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+ } catch (SpreadsheetReadException e) {
+ inputStream.close();
+ inputStream = getInputStream(context, referenceService, inputRef);
+ try {
+ new ODFSpreadsheetReader().read(inputStream, new Range(rowRange),
+ new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+ } catch (SpreadsheetReadException e2) {
+ inputStream.close();
+ inputStream = getInputStream(context, referenceService, inputRef);
+ new CSVSpreadsheetReader().read(inputStream, new Range(rowRange),
+ new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+ }
+ } finally {
+ inputStream.close();
+ }
+ } catch (IOException e1) {
+ logger.warn("Failed to close spereadsheet stream", e1);
+ }
+
+ // get outputs
+ if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+ for (OutputPort outputPort : getOutputPorts()) {
+ String name = outputPort.getName();
+ Object value = outputLists.get(name);
+ T2Reference id = referenceService.register(value, outputPort.getDepth(),
+ true, context);
+ outputData.put(name, id);
+ }
+ } else {
+ T2Reference id = referenceService.register(output.toString(), 0, true, context);
+ outputData.put(OUTPUT_PORT_NAME, id);
+ }
+ callback.receiveResult(outputData, new int[0]);
+ } catch (ReferenceServiceException e) {
+ logger.warn("Error accessing spreadsheet input/output data", e);
+ callback.fail("Error accessing spreadsheet input/output data", e);
+ } catch (SpreadsheetReadException e) {
+ logger.warn("Spreadsheet input cannot be read", e);
+ callback.fail("Spreadsheet input cannot be read", e);
+ } catch (FileNotFoundException e) {
+ logger.warn("Input spreadsheet file does not exist", e);
+ callback.fail("Input spreadsheet file does not exist", e);
+ } catch (IOException e) {
+ logger.warn("Error reading spreadsheet", e);
+ callback.fail("Error reading spreadsheet", e);
+ }
+ }
+
+
+ });
+ }
+
+ private InputStream getInputStream(InvocationContext context,
+ ReferenceService referenceService, T2Reference inputRef)
+ throws IOException {
+ InputStream inputStream = null;
+
+ Identified identified = referenceService.resolveIdentifier(inputRef, null, context);
+ if (identified instanceof ReferenceSet) {
+ ReferenceSet referenceSet = (ReferenceSet) identified;
+ Set<ExternalReferenceSPI> externalReferences = referenceSet
+ .getExternalReferences();
+ for (ExternalReferenceSPI externalReference : externalReferences) {
+ if (externalReference instanceof ValueCarryingExternalReference<?>) {
+ ValueCarryingExternalReference<?> vcer = (ValueCarryingExternalReference<?>) externalReference;
+ if (String.class.isAssignableFrom(vcer.getValueType())) {
+ String input = (String) vcer.getValue();
+ try {
+ URL url = new URL(input);
+ inputStream = url.openStream();
+ logger.debug("Input spreadsheet url is '" + input + "'");
+ } catch (MalformedURLException e) {
+ logger.debug("Input spreadsheet file name is '" + input + "'");
+ inputStream = new FileInputStream(input);
+ }
+ }
+ break;
+ } else {
+ inputStream = externalReference.openStream(context);
+ break;
+ }
+ }
+ }
+ return inputStream;
+ }
+
+ /**
+ * SpreadsheetRowProcessor for handling a single output formatted as csv.
+ *
+ * @author David Withers
+ */
+ private final class SingleOutputRowProcessor implements SpreadsheetRowProcessor {
+
+ private final CsvWriter csvWriter;
+
+ /**
+ * Constructs a new SingleOutputRowProcessor.
+ *
+ * @param csvWriter
+ */
+ private SingleOutputRowProcessor(CsvWriter csvWriter) {
+ this.csvWriter = csvWriter;
+ }
+
+ public void processRow(int rowIndex, SortedMap<Integer, String> row) {
+ try {
+ for (String value : row.values()) {
+ if (value == null) {
+ if (emptyCellPolicy.equals(SpreadsheetEmptyCellPolicy.GENERATE_ERROR)) {
+ value = "ERROR";
+ } else if (emptyCellPolicy.equals(SpreadsheetEmptyCellPolicy.EMPTY_STRING)) {
+ value = "";
+ } else {
+ value = missingCellValue;
+ }
+ }
+ csvWriter.write(value, true);
+ }
+ csvWriter.endRecord();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ /**
+ * SpreadsheetRowProcessor for handling multiple outputs.
+ *
+ * @author David Withers
+ */
+ private final class MultiplePortRowProcessor implements SpreadsheetRowProcessor {
+
+ private final ReferenceService referenceService;
+ private final Map<String, List<T2Reference>> outputLists;
+ private final InvocationContext context;
+
+ /**
+ * Constructs a new MultiplePortRowProcessor.
+ *
+ * @param referenceService
+ * @param outputLists
+ * @param context
+ */
+ private MultiplePortRowProcessor(ReferenceService referenceService,
+ Map<String, List<T2Reference>> outputLists, InvocationContext context) {
+ this.referenceService = referenceService;
+ this.outputLists = outputLists;
+ this.context = context;
+ }
+
+ public void processRow(int rowIndex, SortedMap<Integer, String> row) {
+ for (Entry<Integer, String> entry : row.entrySet()) {
+ String column = SpreadsheetUtils.getPortName(entry.getKey(),
+ configurationBean);
+ Object value = entry.getValue();
+ if (value == null) {
+ if (emptyCellPolicy
+ .equals(SpreadsheetEmptyCellPolicy.GENERATE_ERROR)) {
+ value = referenceService.getErrorDocumentService()
+ .registerError(
+ "Missing data for spreadsheet cell "
+ + column + row, 0, context);
+ } else if (emptyCellPolicy
+ .equals(SpreadsheetEmptyCellPolicy.EMPTY_STRING)) {
+ value = "";
+ } else {
+ value = missingCellValue;
+ }
+ }
+ T2Reference id = referenceService.register(value, 0, true, context);
+ outputLists.get(column).add(id);
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
new file mode 100644
index 0000000..a4b1ab8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * Copyright (C) 2011 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import net.sf.taverna.t2.workflowmodel.Edits;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityConfigurationException;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityFactory;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityInputPort;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityOutputPort;
+
+/**
+ * An {@link ActivityFactory} for creating <code>SpreadsheetImportActivity</code>.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportActivityFactory implements ActivityFactory {
+
+ private Edits edits;
+
+ @Override
+ public SpreadsheetImportActivity createActivity() {
+ return new SpreadsheetImportActivity();
+ }
+
+ @Override
+ public URI getActivityType() {
+ return URI.create(SpreadsheetImportActivity.URI);
+ }
+
+ @Override
+ public JsonNode getActivityConfigurationSchema() {
+ ObjectMapper objectMapper = new ObjectMapper();
+ try {
+ return objectMapper.readTree(getClass().getResource("/schema.json"));
+ } catch (IOException e) {
+ return objectMapper.createObjectNode();
+ }
+ }
+
+ @Override
+ public Set<ActivityInputPort> getInputPorts(JsonNode configuration)
+ throws ActivityConfigurationException {
+ Set<ActivityInputPort> inputPorts = new HashSet<>();
+ inputPorts.add(edits.createActivityInputPort(SpreadsheetImportActivity.INPUT_PORT_NAME, 0, false, null, null));
+ return inputPorts;
+ }
+
+ @Override
+ public Set<ActivityOutputPort> getOutputPorts(JsonNode configuration)
+ throws ActivityConfigurationException {
+ Set<ActivityOutputPort> outputPorts = new HashSet<>();
+ if ("PORT_PER_COLUMN".equals(configuration.get("outputFormat").textValue())) {
+ Range columnRange = SpreadsheetUtils.getRange(configuration.get("columnRange"));
+ for (int column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+ if (columnRange.contains(column)) {
+ outputPorts.add(edits.createActivityOutputPort(SpreadsheetUtils.getPortName(column, configuration), 1, 1));
+ }
+ }
+ } else {
+ outputPorts.add(edits.createActivityOutputPort(SpreadsheetImportActivity.OUTPUT_PORT_NAME, 0, 0));
+ }
+ return outputPorts;
+ }
+
+ public void setEdits(Edits edits) {
+ this.edits = edits;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
new file mode 100644
index 0000000..1cf37ae
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
@@ -0,0 +1,416 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationBean;
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationProperty;
+
+/**
+ * Configuration for the SpreadsheetImport activity.
+ *
+ * @author David Withers
+ */
+@ConfigurationBean(uri = SpreadsheetImportActivity.URI + "#Config")
+public class SpreadsheetImportConfiguration {
+
+ private Range columnRange;
+ private Range rowRange;
+ private String emptyCellValue;
+ private Map<String, String> columnNames;
+ private boolean allRows;
+ private boolean excludeFirstRow;
+ private boolean ignoreBlankRows;
+ private SpreadsheetEmptyCellPolicy emptyCellPolicy;
+ private SpreadsheetOutputFormat outputFormat;
+ private String csvDelimiter;
+
+ /**
+ * Constructs a new SpreadsheetImportConfiguration.
+ */
+ public SpreadsheetImportConfiguration() {
+ columnRange = new Range(0, 1);
+ rowRange = new Range(0, -1);
+ emptyCellValue = "";
+ columnNames = new HashMap<String, String>();
+ allRows = true;
+ excludeFirstRow = false;
+ ignoreBlankRows = false;
+ emptyCellPolicy = SpreadsheetEmptyCellPolicy.EMPTY_STRING;
+ outputFormat = SpreadsheetOutputFormat.PORT_PER_COLUMN;
+ csvDelimiter = ",";
+ }
+
+ /**
+ * Constructs a new SpreadsheetImportConfiguration that copies the values from the given
+ * configuration.
+ *
+ * @param configuration
+ */
+ public SpreadsheetImportConfiguration(SpreadsheetImportConfiguration configuration) {
+ columnRange = new Range(configuration.columnRange);
+ rowRange = new Range(configuration.rowRange);
+ emptyCellValue = configuration.emptyCellValue;
+ columnNames = new HashMap<String, String>(configuration.columnNames);
+ allRows = configuration.allRows;
+ excludeFirstRow = configuration.excludeFirstRow;
+ emptyCellPolicy = configuration.emptyCellPolicy;
+ ignoreBlankRows = configuration.ignoreBlankRows;
+ outputFormat = configuration.outputFormat;
+ csvDelimiter = configuration.csvDelimiter;
+ }
+
+ /**
+ * Returns the columnRange.
+ *
+ * @return the value of columnRange
+ */
+ public Range getColumnRange() {
+ return columnRange;
+ }
+
+ /**
+ * Sets the columnRange.
+ *
+ * @param columnRange
+ * the new value for columnRange
+ */
+ @ConfigurationProperty(name = "columnRange", label = "Column Range", description = "The range of columns to be imported (e.g. columns 2 to 7")
+ public void setColumnRange(Range columnRange) {
+ this.columnRange = columnRange;
+ }
+
+ /**
+ * Returns the rowRange.
+ *
+ * @return the value of rowRange
+ */
+ public Range getRowRange() {
+ return rowRange;
+ }
+
+ /**
+ * Sets the rowRange.
+ *
+ * @param rowRange
+ * the new value for rowRange
+ */
+ @ConfigurationProperty(name = "rowRange", label = "Row Range", description = "The range of rows to be imported (e.g. rows 1 to 15", required = false)
+ public void setRowRange(Range rowRange) {
+ this.rowRange = rowRange;
+ }
+
+ /**
+ * Returns the emptyCellValue. The default value is "".
+ *
+ * @return the value of emptyCellValue
+ */
+ public String getEmptyCellValue() {
+ return emptyCellValue;
+ }
+
+ /**
+ * Sets the emptyCellValue.
+ *
+ * @param emptyCellValue
+ * the new value for emptyCellValue
+ */
+ @ConfigurationProperty(name = "emptyCellValue", label = "Empty Cell Value", description = "The value to use for empty cells. The default is \"\"", required = false)
+ public void setEmptyCellValue(String emptyCellValue) {
+ this.emptyCellValue = emptyCellValue;
+ }
+
+ /**
+ * Returns the columnNames. The default value is an empty map.
+ *
+ * @return the value of columnNames
+ */
+ public Map<String, String> getColumnNames() {
+ return columnNames;
+ }
+
+ /**
+ * Sets the columnNames.
+ *
+ * @param columnNames
+ * the new value for columnNames
+ */
+ public void setColumnNames(Map<String, String> columnNames) {
+ this.columnNames = columnNames;
+ }
+
+ /**
+ * Sets the columnNames.
+ *
+ * @param columnNames
+ * the new value for columnNames
+ */
+ @ConfigurationProperty(name = "columnNames", label = "Column Name Mapping", description = "Mapping from column to port names", required = false)
+ public void setColumnNames(Set<Mapping> columnNames) {
+ Map<String, String> columnNamesMap = new HashMap<String, String>();
+ for (Mapping mapping : columnNames) {
+ columnNamesMap.put(mapping.column, mapping.port);
+ }
+ this.columnNames = columnNamesMap;
+ }
+
+ /**
+ * Returns the allRows property. The default value is <code>true</code>.
+ *
+ * @return the value of allRows
+ */
+ public boolean isAllRows() {
+ return allRows;
+ }
+
+ /**
+ * Sets the allRows property.
+ *
+ * @param allRows
+ * the new value for allRows
+ */
+ @ConfigurationProperty(name = "allRows", label = "Import All Rows", description = "Imports all the rows containing data", required = false)
+ public void setAllRows(boolean allRows) {
+ this.allRows = allRows;
+ }
+
+ /**
+ * Returns the excludeFirstRow property. The default value is <code>false</code>.
+ *
+ * @return the value of excludeFirstRow
+ */
+ public boolean isExcludeFirstRow() {
+ return excludeFirstRow;
+ }
+
+ /**
+ * Sets the excludeFirstRow property.
+ *
+ * @param excludeFirstRow
+ * the new value for excludeFirstRow
+ */
+ @ConfigurationProperty(name = "excludeFirstRow", label = "Exclude First Row", description = "Excludes the first row from the import", required = false)
+ public void setExcludeFirstRow(boolean excludeFirstRow) {
+ this.excludeFirstRow = excludeFirstRow;
+ }
+
+ /**
+ * Returns the ignoreBlankRows property. The default value is <code>false</code>.
+ *
+ * @return the value of ignoreBlankRows
+ */
+ public boolean isIgnoreBlankRows() {
+ return ignoreBlankRows;
+ }
+
+ /**
+ * Sets the ignoreBlankRows property.
+ *
+ * @param ignoreBlankRows the new value for ignoreBlankRows
+ */
+ @ConfigurationProperty(name = "ignoreBlankRows", label = "Ignore Blank Rows", description = "Excludes blank rows from the import", required = false)
+ public void setIgnoreBlankRows(boolean ignoreBlankRows) {
+ this.ignoreBlankRows = ignoreBlankRows;
+ }
+
+ /**
+ * Returns the emptyCellPolicy. The default value is
+ * <code>SpreadsheetEmptyCellPolicy.EMPTY_STRING</code>.
+ *
+ * @return the value of emptyCellPolicy
+ */
+ public SpreadsheetEmptyCellPolicy getEmptyCellPolicy() {
+ return emptyCellPolicy;
+ }
+
+ /**
+ * Sets the emptyCellPolicy.
+ *
+ * @param emptyCellPolicy
+ * the new value for emptyCellPolicy
+ */
+ @ConfigurationProperty(name = "emptyCellPolicy", label = "Empty Cell Policy", description = "Policy for handling empty cells", required = false)
+ public void setEmptyCellPolicy(SpreadsheetEmptyCellPolicy emptyCellPolicy) {
+ this.emptyCellPolicy = emptyCellPolicy;
+ }
+
+ /**
+ * Returns the outputFormat. The default value is
+ * <code>SpreadsheetOutputFormat.PORT_PER_COLUMN</code>.
+ *
+ * @return the value of outputFormat
+ */
+ public SpreadsheetOutputFormat getOutputFormat() {
+ return outputFormat == null ? SpreadsheetOutputFormat.PORT_PER_COLUMN : outputFormat;
+ }
+
+ /**
+ * Sets the outputFormat.
+ *
+ * @param outputFormat
+ * the new value for outputFormat
+ */
+ @ConfigurationProperty(name = "outputFormat", label = "Output Format", description = "How the activity outputs are to be formatted", required = false)
+ public void setOutputFormat(SpreadsheetOutputFormat outputFormat) {
+ this.outputFormat = outputFormat;
+ }
+
+ /**
+ * Returns the delimiter for CSV formatted output. The default value is
+ * <code>","</code>.
+ *
+ * @return the delimiter for CSV formatted output
+ */
+ public String getCsvDelimiter() {
+ return csvDelimiter == null ? "," : csvDelimiter;
+ }
+
+ /**
+ * Sets the delimiter for CSV formatted output.
+ *
+ * @param outputFormat
+ * the new delimiter for CSV formatted output
+ */
+ @ConfigurationProperty(name = "csvDelimiter", label = "CSV Delimiter", description = "The delimiter to use for CSV input files. The default is ','", required = false)
+ public void setCsvDelimiter(String csvDelimiter) {
+ this.csvDelimiter = csvDelimiter;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (allRows ? 1231 : 1237);
+ result = prime * result + ((columnNames == null) ? 0 : columnNames.hashCode());
+ result = prime * result + ((columnRange == null) ? 0 : columnRange.hashCode());
+ result = prime * result + ((csvDelimiter == null) ? 0 : csvDelimiter.hashCode());
+ result = prime * result + ((emptyCellPolicy == null) ? 0 : emptyCellPolicy.hashCode());
+ result = prime * result + ((emptyCellValue == null) ? 0 : emptyCellValue.hashCode());
+ result = prime * result + (excludeFirstRow ? 1231 : 1237);
+ result = prime * result + (ignoreBlankRows ? 1231 : 1237);
+ result = prime * result + ((outputFormat == null) ? 0 : outputFormat.hashCode());
+ result = prime * result + ((rowRange == null) ? 0 : rowRange.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ SpreadsheetImportConfiguration other = (SpreadsheetImportConfiguration) obj;
+ if (allRows != other.allRows) {
+ return false;
+ }
+ if (columnNames == null) {
+ if (other.columnNames != null) {
+ return false;
+ }
+ } else if (!columnNames.equals(other.columnNames)) {
+ return false;
+ }
+ if (columnRange == null) {
+ if (other.columnRange != null) {
+ return false;
+ }
+ } else if (!columnRange.equals(other.columnRange)) {
+ return false;
+ }
+ if (csvDelimiter == null) {
+ if (other.csvDelimiter != null) {
+ return false;
+ }
+ } else if (!csvDelimiter.equals(other.csvDelimiter)) {
+ return false;
+ }
+ if (emptyCellPolicy == null) {
+ if (other.emptyCellPolicy != null) {
+ return false;
+ }
+ } else if (!emptyCellPolicy.equals(other.emptyCellPolicy)) {
+ return false;
+ }
+ if (emptyCellValue == null) {
+ if (other.emptyCellValue != null) {
+ return false;
+ }
+ } else if (!emptyCellValue.equals(other.emptyCellValue)) {
+ return false;
+ }
+ if (excludeFirstRow != other.excludeFirstRow) {
+ return false;
+ }
+ if (ignoreBlankRows != other.ignoreBlankRows) {
+ return false;
+ }
+ if (outputFormat == null) {
+ if (other.outputFormat != null) {
+ return false;
+ }
+ } else if (!outputFormat.equals(other.outputFormat)) {
+ return false;
+ }
+ if (rowRange == null) {
+ if (other.rowRange != null) {
+ return false;
+ }
+ } else if (!rowRange.equals(other.rowRange)) {
+ return false;
+ }
+ return true;
+ }
+
+ @ConfigurationBean(uri = SpreadsheetImportActivity.URI + "/Mapping")
+ public static class Mapping {
+ private String column, port;
+
+ public Mapping() {
+ }
+
+ public String getColumn() {
+ return column;
+ }
+
+ @ConfigurationProperty(name = "column", label = "Column", description = "The name of the column")
+ public void setColumn(String column) {
+ this.column = column;
+ }
+
+ public String getPort() {
+ return port;
+ }
+
+ @ConfigurationProperty(name = "port", label = "Port", description = "The name of the port")
+ public void setPort(String port) {
+ this.port = port;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
new file mode 100644
index 0000000..35db532
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.List;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+import net.sf.taverna.t2.workflowmodel.Processor;
+import net.sf.taverna.t2.workflowmodel.health.HealthCheck;
+import net.sf.taverna.t2.workflowmodel.health.HealthChecker;
+import net.sf.taverna.t2.visit.VisitReport;
+import net.sf.taverna.t2.visit.VisitReport.Status;
+
+
+/**
+ * Health checker for SpreadsheetImport activities.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportHealthChecker implements HealthChecker<SpreadsheetImportActivity> {
+
+ public boolean canVisit(Object subject) {
+ return (subject != null && subject instanceof SpreadsheetImportActivity);
+ }
+
+ public VisitReport visit(SpreadsheetImportActivity activity, List<Object> ancestors) {
+ Processor p = (Processor) VisitReport.findAncestor(ancestors, Processor.class);
+ if (p == null) {
+ return null;
+ }
+ JsonNode configuration = activity.getConfiguration();
+ if (configuration == null) {
+ return new VisitReport(HealthCheck.getInstance(), p, "Spreadsheet import has not been configured", HealthCheck.NO_CONFIGURATION, Status.SEVERE);
+ }
+ return new VisitReport(HealthCheck.getInstance(), p, "Spreadsheet OK", HealthCheck.NO_PROBLEM, Status.OK);
+ }
+
+ public boolean isTimeConsuming() {
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
new file mode 100644
index 0000000..d1aaca8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Enumeration of the formats for output of spreadsheet cell values.
+ * <p>
+ * <dl>
+ * <dt>PORT_PER_COLUMN</dt>
+ * <dd>One port of depth 1 (a list) per column</dd>
+ * <dt>SINGLE_PORT</dt>
+ * <dd>A single port of depth 0, formatted as CSV</dd>
+ * </dl>
+ *
+ * @author David Withers
+ */
+public enum SpreadsheetOutputFormat {
+ PORT_PER_COLUMN, SINGLE_PORT
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
new file mode 100644
index 0000000..9cd4ba8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
@@ -0,0 +1,69 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Exception thrown when a spreadsheet cannot be read due to an IO error or when the file format is
+ * not supported.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetReadException extends Exception {
+
+ private static final long serialVersionUID = -823966225836697180L;
+
+ /**
+ * Constructs a new SpreadsheetReadException with null as its detail message.
+ */
+ public SpreadsheetReadException() {
+ }
+
+ /**
+ * Constructs a new SpreadsheetReadException with the specified detail message.
+ *
+ * @param message
+ */
+ public SpreadsheetReadException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructs a new SpreadsheetReadException with the specified cause and a detail message of
+ * (cause==null ? null : cause.toString()) (which typically contains the class and detail
+ * message of cause).
+ *
+ * @param cause
+ */
+ public SpreadsheetReadException(Throwable cause) {
+ super(cause);
+ }
+
+ /**
+ * Constructs a new SpreadsheetReadException with the specified detail message and cause.
+ *
+ * @param message
+ * @param cause
+ */
+ public SpreadsheetReadException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
new file mode 100644
index 0000000..79c92d1
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ * Modifications to the initial code base are copyright of their
+ * respective authors, or their employers as appropriate.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.InputStream;
+
+/**
+ * Interface for reading a spreadsheet from an input stream.
+ *
+ * @author David Withers
+ */
+public interface SpreadsheetReader {
+
+ /**
+ * Reads an InputStream and passes spreadsheet cell data values, row by row, to the
+ * rowProcessor.
+ *
+ * @param inputStream
+ * the stream to read
+ * @param rowProcessor
+ * the rowProcessor to write rows of data values to
+ * @param rowRange
+ * the rows to read
+ * @param columnRange
+ * the columns to read
+ * @param ignoreBlankRows
+ * whether to ignore blank rows
+ * @throws SpreadsheetReadException
+ * if there's an error reading the stream or the stream is not a valid spreadsheet
+ */
+ public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows,
+ SpreadsheetRowProcessor rowProcessor) throws SpreadsheetReadException;
+
+}
\ No newline at end of file