You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@taverna.apache.org by st...@apache.org on 2015/02/17 12:37:09 UTC

[49/70] [abbrv] incubator-taverna-common-activities git commit: taverna-spreadsheet-import-activity/

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java b/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
deleted file mode 100644
index 1c3a8f5..0000000
--- a/src/test/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetUtilsTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2009 The University of Manchester
- *
- *  Modifications to the initial code base are copyright of their
- *  respective authors, or their employers as appropriate.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public License
- *  as published by the Free Software Foundation; either version 2.1 of
- *  the License, or (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- ******************************************************************************/
-package net.sf.taverna.t2.activities.spreadsheet;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.node.ArrayNode;
-import com.fasterxml.jackson.databind.node.JsonNodeFactory;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-/**
- * Unit tests for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils}.
- *
- * @author David Withers
- */
-public class SpreadsheetUtilsTest {
-
-	@Test
-	public void testSpreadsheetUtils() {
-		assertNotNull(new SpreadsheetUtils());
-	}
-
-	/**
-	 * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getColumnIndex(java.lang.String)}.
-	 */
-	@Test
-	public void testGetColumnIndex() {
-		assertEquals(0, SpreadsheetUtils.getColumnIndex("A"));
-		assertEquals(4, SpreadsheetUtils.getColumnIndex("E"));
-		assertEquals(25, SpreadsheetUtils.getColumnIndex("Z"));
-		assertEquals(26, SpreadsheetUtils.getColumnIndex("AA"));
-		assertEquals(457833, SpreadsheetUtils.getColumnIndex("ZAFZ"));
-	}
-
-	/**
-	 * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getColumnLabel(int)}.
-	 */
-	@Test
-	public void testGetColumnLabel() {
-		assertEquals("A", SpreadsheetUtils.getColumnLabel(0));
-		assertEquals("E", SpreadsheetUtils.getColumnLabel(4));
-		assertEquals("Z", SpreadsheetUtils.getColumnLabel(25));
-		assertEquals("AA", SpreadsheetUtils.getColumnLabel(26));
-		assertEquals("ZAFZ", SpreadsheetUtils.getColumnLabel(457833));
-	}
-
-	/**
-	 * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getPortName(java.lang.String, java.util.Map)}.
-	 */
-	@Test
-	public void testGetPortNameStringMapOfStringString() {
-		assertEquals("A", SpreadsheetUtils.getPortName("A", null));
-		assertEquals("AABR", SpreadsheetUtils.getPortName("AABR", null));
-		ObjectNode configuration = JsonNodeFactory.instance.objectNode();
-		ArrayNode columnNames = configuration.arrayNode();
-		columnNames.addObject().put("column", "B").put("port", "beta");
-		configuration.put("columnNames", columnNames);
-		assertEquals("beta", SpreadsheetUtils.getPortName("B", configuration));
-		assertEquals("T", SpreadsheetUtils.getPortName("T", configuration));
-	}
-
-	/**
-	 * Test method for {@link net.sf.taverna.t2.activities.spreadsheet.SpreadsheetUtils#getPortName(int, java.util.Map)}.
-	 */
-	@Test
-	public void testGetPortNameIntMapOfStringString() {
-		assertEquals("A", SpreadsheetUtils.getPortName(0, null));
-		assertEquals("AA", SpreadsheetUtils.getPortName(26, null));
-		ObjectNode configuration = JsonNodeFactory.instance.objectNode();
-		ArrayNode columnNames = configuration.arrayNode();
-		columnNames.addObject().put("column", "D").put("port", "delta");
-		configuration.put("columnNames", columnNames);
-		assertEquals("delta", SpreadsheetUtils.getPortName(3, configuration));
-		assertEquals("AB", SpreadsheetUtils.getPortName(27, configuration));
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.csv
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.csv b/src/test/resources/test-spreadsheet.csv
deleted file mode 100644
index bbbc3f3..0000000
--- a/src/test/resources/test-spreadsheet.csv
+++ /dev/null
@@ -1 +0,0 @@
-A,5,C,1
A,5,C,1
TRUE,15/06/09,,2
X,X,X,X
X,X,X,X




y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
y,y,y,y
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.ods
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.ods b/src/test/resources/test-spreadsheet.ods
deleted file mode 100644
index e4dbd98..0000000
Binary files a/src/test/resources/test-spreadsheet.ods and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.xls
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.xls b/src/test/resources/test-spreadsheet.xls
deleted file mode 100644
index 12a2536..0000000
Binary files a/src/test/resources/test-spreadsheet.xls and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/src/test/resources/test-spreadsheet.xlsx
----------------------------------------------------------------------
diff --git a/src/test/resources/test-spreadsheet.xlsx b/src/test/resources/test-spreadsheet.xlsx
deleted file mode 100644
index 2accc38..0000000
Binary files a/src/test/resources/test-spreadsheet.xlsx and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/pom.xml
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/pom.xml b/taverna-spreadsheet-import-activity/pom.xml
new file mode 100644
index 0000000..7369bf6
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/pom.xml
@@ -0,0 +1,138 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>net.sf.taverna</groupId>
+		<artifactId>taverna-parent</artifactId>
+		<version>3.0.1-SNAPSHOT</version>
+	</parent>
+	<groupId>net.sf.taverna.t2.activities</groupId>
+	<artifactId>spreadsheet-import-activity</artifactId>
+	<version>2.0.1-SNAPSHOT</version>
+	<packaging>bundle</packaging>
+	<name>Taverna 2 Spreadsheet Import Activity</name>
+	<description>Activity to import data from spreadsheet files</description>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<extensions>true</extensions>
+				<configuration>
+					<instructions>
+						<Private-Package>org.apache.poi.*;-split-package:=merge-first,org.openxmlformats.schemas.*,schemasMicrosoftCom*,schemaorg_apache_xmlbeans.system.*,org.odftoolkit.*,com.csvreader.*</Private-Package>
+						<Import-Package>!org.apache.poi.hdgf.extractor,
+                                                        !org.apache.poi.hslf.extractor,
+                                                        !org.apache.poi.hwpf.extractor,
+                                                        !org.apache.poi.sl.*,
+                                                        org.apache.xmlbeans.impl.schema,
+                                                        !junit.*,
+                                                        !org.openxmlformats.schemas.*,
+                                                        !schemasMicrosoftComOfficePowerpoint,
+                                                        !schemasMicrosoftComOfficeWord,
+                                                        *
+                                                </Import-Package>
+					</instructions>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+	<dependencies>
+		<dependency>
+			<groupId>net.sf.taverna.t2.core</groupId>
+			<artifactId>reference-api</artifactId>
+			<version>${t2.core.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>net.sf.taverna.t2.core</groupId>
+			<artifactId>workflowmodel-api</artifactId>
+			<version>${t2.core.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.poi</groupId>
+			<artifactId>poi</artifactId>
+			<version>${poi.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.poi</groupId>
+			<artifactId>poi-ooxml</artifactId>
+			<version>${poi.version}</version>
+			<exclusions>
+				<exclusion>
+					<groupId>org.apache.xmlbeans</groupId>
+					<artifactId>xmlbeans</artifactId>
+				</exclusion>
+			</exclusions>
+		</dependency>
+		<dependency>
+			<groupId>org.odftoolkit</groupId>
+			<artifactId>odfdom-java</artifactId>
+			<version>${odfdom.version}</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>net.sourceforge.javacsv</groupId>
+			<artifactId>javacsv</artifactId>
+			<version>${javacsv.version}</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.xmlbeans</groupId>
+			<artifactId>com.springsource.org.apache.xmlbeans</artifactId>
+			<version>${xmlbeans.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.log4j</groupId>
+			<artifactId>com.springsource.org.apache.log4j</artifactId>
+			<version>${log4j.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>${junit.version}</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>net.sf.taverna.t2.activities</groupId>
+			<artifactId>activity-test-utils</artifactId>
+			<version>${t2.activities.version}</version>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+	<repositories>
+		<!-- This repository is down (2012-03-19) and as the project has moved 
+			to apache it's unclear if it will return. odfdom-java-0.7 artifact has been 
+			deployed to the mygrid repository. -->
+		<!-- <repository> <id>odftoolkit</id> <name>Odftoolkit Repository</name> 
+			<url>https://svn.odftoolkit.org/svn/odfdom~maven2/release</url> <releases> 
+			<checksumPolicy>fail</checksumPolicy> </releases> <snapshots> <checksumPolicy>fail</checksumPolicy> 
+			</snapshots> </repository> -->
+		<repository>
+			<releases />
+			<snapshots>
+				<enabled>false</enabled>
+			</snapshots>
+			<id>mygrid-repository</id>
+			<name>myGrid Repository</name>
+			<url>http://www.mygrid.org.uk/maven/repository</url>
+		</repository>
+		<repository>
+			<releases>
+				<enabled>false</enabled>
+			</releases>
+			<snapshots />
+			<id>mygrid-snapshot-repository</id>
+			<name>myGrid Snapshot Repository</name>
+			<url>http://www.mygrid.org.uk/maven/snapshot-repository</url>
+		</repository>
+	</repositories>
+	<scm>
+		<connection>scm:git:https://github.com/taverna/taverna-spreadsheet-import-activity.git</connection>
+		<developerConnection>scm:git:ssh://git@github.com/taverna/taverna-spreadsheet-import-activity.git</developerConnection>
+		<url>https://github.com/taverna/taverna-spreadsheet-import-activity/</url>
+		<tag>HEAD</tag>
+	</scm>
+
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
new file mode 100644
index 0000000..154019a
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/CSVSpreadsheetReader.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import com.csvreader.CsvReader;
+
+/**
+ * Reads CSV files.
+ *
+ * @author David Withers
+ */
+public class CSVSpreadsheetReader implements SpreadsheetReader {
+
+	public void read(InputStream inputStream, Range rowRange,
+			Range columnRange, boolean ignoreBlankRows,
+			SpreadsheetRowProcessor rowProcessor)
+			throws SpreadsheetReadException {
+		CsvReader csvReader = new CsvReader(new InputStreamReader(inputStream));
+		csvReader.setSkipEmptyRecords(false);
+
+		SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+
+		try {
+			while(csvReader.readRecord()) {
+				int rowIndex = (int) csvReader.getCurrentRecord();
+				boolean blankRow = true;
+				if (rowRange.contains(rowIndex)) {
+					for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+						if (columnRange.contains(columnIndex)) {
+							String value = csvReader.get(columnIndex);
+							value = "".equals(value) ? null : value;
+							if (value != null) {
+								blankRow = false;
+							}
+							currentDataRow.put(columnIndex, value);
+							if (columnIndex == columnRange.getEnd()) {
+								if (!ignoreBlankRows || !blankRow) {
+									rowProcessor.processRow(rowIndex, currentDataRow);
+								}
+								currentDataRow = new TreeMap<Integer, String>();
+							}
+						}
+					}
+				
+					if (rowIndex == rowRange.getEnd()) {
+						break;
+					}
+				}
+			}
+		} catch (IOException e) {
+			throw new SpreadsheetReadException("Unable to read CSV file", e);
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
new file mode 100644
index 0000000..8237ec1
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ExcelSpreadsheetReader.java
@@ -0,0 +1,324 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.log4j.Logger;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+
+/**
+ * Reads Excel '97 (.xls) or Excel '07 (.xlsx) spreadsheet files.
+ * 
+ * @author David Withers
+ */
+public class ExcelSpreadsheetReader implements SpreadsheetReader {
+
+	private static Logger logger = Logger.getLogger(ExcelSpreadsheetReader.class);
+
+	public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor)
+			throws SpreadsheetReadException {
+		Workbook workbook;
+		try {
+			workbook = WorkbookFactory.create(inputStream);
+		} catch (InvalidFormatException e) {
+			throw new SpreadsheetReadException(
+					"The file does not have a compatible spreadsheet format", e);
+		} catch (IOException e) {
+			throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
+		} catch (IllegalArgumentException e) {
+			throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
+		}
+
+		DataFormatter dataFormatter = new DataFormatter();
+		
+		workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
+		Sheet sheet = workbook.getSheetAt(0);
+
+		if (rowRange.getEnd() < 0) {
+			rowRange.setEnd(sheet.getLastRowNum());
+			logger.debug("No end of row range specified, setting to " + rowRange.getEnd());
+		}
+
+		SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+
+		for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
+			boolean blankRow = true;
+			if (rowRange.contains(rowIndex)) {
+				Row row = sheet.getRow(rowIndex);
+				for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+					if (columnRange.contains(columnIndex)) {
+						String value = null;
+						if (row != null) {
+							Cell cell = row.getCell(columnIndex);
+							if (cell != null) {
+								value = getCellValue(cell, dataFormatter);
+							}
+						}
+						if (value != null) {
+							blankRow = false;
+						}
+						currentDataRow.put(columnIndex, value);
+						if (columnIndex == columnRange.getEnd()) {
+							if (!ignoreBlankRows || !blankRow) {
+								rowProcessor.processRow(rowIndex, currentDataRow);
+							}
+							currentDataRow = new TreeMap<Integer, String>();
+						}
+					}
+				}
+			}
+		}
+
+	}
+
+	private String getCellValue(Cell cell, DataFormatter dataFormatter) {
+		String value = null;
+		switch (cell.getCellType()) {
+		case Cell.CELL_TYPE_BOOLEAN:
+			value = Boolean.toString(cell.getBooleanCellValue());
+			break;
+		case Cell.CELL_TYPE_NUMERIC:
+			if (DateUtil.isCellDateFormatted(cell)) {
+//				value = cell.getDateCellValue().toString();
+				value = dataFormatter.formatCellValue(cell);
+			} else {
+				value = Double.toString(cell.getNumericCellValue());
+			}
+			break;
+		case Cell.CELL_TYPE_STRING:
+			value = cell.getStringCellValue();
+			break;
+		case Cell.CELL_TYPE_FORMULA:
+			switch (cell.getCachedFormulaResultType()) {
+			case Cell.CELL_TYPE_BOOLEAN:
+				value = Boolean.toString(cell.getBooleanCellValue());
+				break;
+			case Cell.CELL_TYPE_NUMERIC:
+				if (DateUtil.isCellDateFormatted(cell)) {
+					value = cell.getDateCellValue().toString();
+				} else {
+					value = Double.toString(cell.getNumericCellValue());
+				}
+				break;
+			case Cell.CELL_TYPE_STRING:
+				value = cell.getStringCellValue();
+				break;
+			default:
+				break;
+			}
+		default:
+			break;
+		}
+		// value = dataFormatter.formatCellValue(cell);
+		// if ("".equals(value)) value = null;
+		return value;
+	}
+
+
+//	/**
+//	 * Reads data from an HSSF stream.
+//	 * 
+//	 * @param inputStream
+//	 * @param spreradsheetRowProcessor
+//	 * @throws IOException
+//	 * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
+//	 *             duplicated) data is contained in one event so memory footprint isn't much smaller
+//	 */
+//	public void readHSSF(InputStream inputStream, SpreadsheetRowProcessor spreradsheetRowProcessor)
+//			throws IOException {
+//		POIFSFileSystem poifs = new POIFSFileSystem(inputStream);
+//
+//		// get the workbook part of the stream
+//		InputStream documentInputStream = poifs.createDocumentInputStream("Workbook");
+//
+//		RecordProcessor recordProcessor = new RecordProcessor(spreradsheetRowProcessor);
+//		MissingRecordAwareHSSFListener hssfListener = new MissingRecordAwareHSSFListener(
+//				recordProcessor);
+//
+//		// listen for all records
+//		HSSFRequest request = new HSSFRequest();
+//		request.addListenerForAllRecords(hssfListener);
+//
+//		HSSFEventFactory factory = new HSSFEventFactory();
+//		factory.processEvents(request, documentInputStream);
+//
+//		inputStream.close();
+//		documentInputStream.close();
+//	}
+//
+//	/**
+//	 * Listener for processing events from an HSSF stream.
+//	 * 
+//	 * @author David Withers
+//	 * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
+//	 *             duplicated) data is contained in one event so memory footprint isn't much smaller
+//	 */
+//	class RecordProcessor implements HSSFListener {
+//
+//		private SpreadsheetRowProcessor spreradsheetRowProcessor;
+//
+//		private SSTRecord sstrec;
+//
+//		private boolean worksheetOpen = false;
+//
+//		private int row, column;
+//
+//		private Map<Integer, String> currentDataRow = new HashMap<Integer, String>();
+//
+//		public RecordProcessor(SpreadsheetRowProcessor spreradsheetRowProcessor) {
+//			this.spreradsheetRowProcessor = spreradsheetRowProcessor;
+//		}
+//
+//		public void processRecord(Record record) {
+//			switch (record.getSid()) {
+//			// the BOFRecord can represent either the beginning of a sheet or
+//			// the workbook
+//			case BOFRecord.sid:
+//				BOFRecord bof = (BOFRecord) record;
+//				if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
+//					worksheetOpen = true;
+//				}
+//				break;
+//			case EOFRecord.sid:
+//				if (worksheetOpen) {
+//					while (row < rowRange.getEnd()) {
+//						row++;
+//						if (rowRange.contains(row)) {
+//							for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+//								processCell(row, column, null);
+//							}
+//							spreradsheetRowProcessor.processRow(row, currentDataRow);
+//						}
+//						currentDataRow = new HashMap<Integer, String>();
+//					}
+//					worksheetOpen = false;
+//				}
+//				break;
+//			// don't care about sheet name for now
+//			// case BoundSheetRecord.sid:
+//			// BoundSheetRecord bsr = (BoundSheetRecord) record;
+//			// logger.info("New sheet named: " + bsr.getSheetname());
+//			// break;
+//			case RowRecord.sid:
+//				// RowRecord rowRecord = (RowRecord) record;
+//				// if (readAllRows) {
+//				// int rowNumber = row.getRowNumber();
+//				// if (rowNumber < minRow) {
+//				// minRow = rowNumber;
+//				// currentRow = rowNumber;
+//				// }
+//				// if (rowNumber > maxRow) {
+//				// maxRow = rowNumber;
+//				// }
+//				// }
+//				// if (readAllColumns) {
+//				// int firstColumn = row.getFirstCol();
+//				// int lastColumn = row.getLastCol() - 1;
+//				// if (firstColumn < minColumn) {
+//				// minColumn = firstColumn;
+//				// currentColumn = firstColumn;
+//				// }
+//				// if (lastColumn > maxColumn) {
+//				// maxColumn = lastColumn;
+//				// }
+//				// }
+//
+//				break;
+//			case NumberRecord.sid:
+//				NumberRecord number = (NumberRecord) record;
+//				row = number.getRow();
+//				column = number.getColumn();
+//				processCell(row, column, String.valueOf(number.getValue()));
+//				break;
+//			case SSTRecord.sid:
+//				// SSTRecords store a array of unique strings used in Excel.
+//				sstrec = (SSTRecord) record;
+//				break;
+//			case LabelSSTRecord.sid:
+//				LabelSSTRecord label = (LabelSSTRecord) record;
+//				row = label.getRow();
+//				column = label.getColumn();
+//				processCell(row, column, sstrec.getString(label.getSSTIndex()).getString());
+//				break;
+//			case BlankRecord.sid:
+//				BlankRecord blank = (BlankRecord) record;
+//				row = blank.getRow();
+//				column = blank.getColumn();
+//				processCell(row, column, null);
+//				break;
+//			}
+//
+//			// Missing column
+//			if (record instanceof MissingCellDummyRecord) {
+//				MissingCellDummyRecord cell = (MissingCellDummyRecord) record;
+//				row = cell.getRow();
+//				column = cell.getColumn();
+//				processCell(row, column, null);
+//			}
+//
+//			// Missing row
+//			if (record instanceof MissingRowDummyRecord) {
+//				MissingRowDummyRecord missingRow = (MissingRowDummyRecord) record;
+//				row = missingRow.getRowNumber();
+//				if (rowRange.contains(row)) {
+//					for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+//						processCell(row, column, null);
+//					}
+//					spreradsheetRowProcessor.processRow(row, currentDataRow);
+//				}
+//				currentDataRow = new HashMap<Integer, String>();
+//			}
+//
+//			// End of row
+//			if (record instanceof LastCellOfRowDummyRecord) {
+//				LastCellOfRowDummyRecord lastCell = (LastCellOfRowDummyRecord) record;
+//				row = lastCell.getRow();
+//				if (rowRange.contains(row)) {
+//					int lastColumn = lastCell.getLastColumnNumber();
+//					for (column = lastColumn + 1; column <= columnRange.getEnd(); column++) {
+//						processCell(row, column, null);
+//					}
+//					spreradsheetRowProcessor.processRow(row, currentDataRow);
+//				}
+//				currentDataRow = new HashMap<Integer, String>();
+//			}
+//		}
+//
+//		private void processCell(int row, int column, String value) {
+//			if (rowRange.contains(row) && columnRange.contains(column)) {
+//				currentDataRow.put(column, value);
+//			}
+//		}
+//
+//	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
new file mode 100644
index 0000000..2b01e00
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/ODFSpreadsheetReader.java
@@ -0,0 +1,193 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.InputStream;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+
+import org.apache.log4j.Logger;
+import org.odftoolkit.odfdom.OdfFileDom;
+import org.odftoolkit.odfdom.doc.OdfDocument;
+import org.odftoolkit.odfdom.doc.table.OdfTableCell;
+import org.odftoolkit.odfdom.doc.table.OdfTableRow;
+import org.w3c.dom.NodeList;
+
+/**
+ * Reads Open Document Format (ODF) spreadsheet files.
+ * 
+ * @author David Withers
+ */
+public class ODFSpreadsheetReader implements SpreadsheetReader {
+
+	private static Logger logger = Logger.getLogger(ODFSpreadsheetReader.class);
+
+	public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor)
+			throws SpreadsheetReadException {
+		NodeList rowList = null;
+		try {
+			// Load the ODF document
+			OdfDocument odfDoc = OdfDocument.loadDocument(inputStream);
+			logger.debug("Reading document of type : " + odfDoc.getMediaType());
+			// Get the content as DOM tree
+			OdfFileDom odfContent = odfDoc.getContentDom();
+			// Initialize XPath
+			XPath xpath = odfDoc.getXPath();
+			// Get the rows of the first table
+			String rowsPath = ("//table:table[1]/table:table-row");
+			rowList = (NodeList) xpath.evaluate(rowsPath, odfContent, XPathConstants.NODESET);
+		} catch (Exception e) {
+			if (e instanceof RuntimeException) {
+				throw (RuntimeException) e;
+			}
+			throw new SpreadsheetReadException("The spreadsheet file could not be read", e);
+		}
+
+		if (rowRange.getEnd() < 0) {
+			rowRange.setEnd(calculateRowCount(rowList) - 1);
+		}
+
+		SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
+		int rowRep = 0;
+		for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
+			boolean blankRow = true;
+			OdfTableRow row = (OdfTableRow) rowList.item(rowIndex);
+			int columnRep = 0;
+			for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
+				String value = null;
+				OdfTableCell cell = null;
+				if (row != null) {
+					cell = (OdfTableCell) row.getCellAt(columnIndex);
+					if (cell != null) {
+						String type = cell.getOfficeValueTypeAttribute();
+						if ("float".equals(type)) {
+							value = cell.getOfficeValueAttribute().toString();
+						} else if ("percentage".equals(type)) {
+							value = cell.getOfficeValueAttribute().toString();
+						} else if ("currency".equals(type)) {
+							value = cell.getOfficeValueAttribute().toString();
+						} else if ("date".equals(type)) {
+							value = cell.getOfficeDateValueAttribute();
+						} else if ("time".equals(type)) {
+							value = cell.getOfficeTimeValueAttribute();
+						} else if ("boolean".equals(type)) {
+							value = cell.getOfficeBooleanValueAttribute().toString();
+						} else if ("string".equals(type)) {
+							value = cell.getOfficeStringValueAttribute();
+							if (value == null) {
+								value = cell.getTextContent();
+							}
+						} else {
+							value = cell.getTextContent();
+						}
+					}
+				}
+				value = "".equals(value) ? null : value;
+				if (value != null) {
+					blankRow = false;
+				}
+				// if the cell is within the column range add it to the row values
+				if (columnRange.contains(columnIndex + columnRep)) {
+					currentDataRow.put(columnIndex + columnRep, value);
+				}
+				// check if this cell is repeated
+				int repeatedCells = cell == null ? 0 : cell
+						.getTableNumberColumnsRepeatedAttribute() - 1;
+				while (repeatedCells > 0 && columnIndex + columnRep < columnRange.getEnd()) {
+					columnRep++;
+					if (columnRange.contains(columnIndex + columnRep)) {
+						currentDataRow
+								.put(columnIndex + columnRep, value);
+					}
+					repeatedCells--;
+				}
+				// if it's the last cell in the range process the row
+				if (columnIndex == columnRange.getEnd()) {
+					if (rowRange.contains(rowIndex + rowRep)) {
+						if (!ignoreBlankRows || !blankRow) {
+							rowProcessor.processRow(rowIndex + rowRep, currentDataRow);
+						}
+					}
+					// check if this row is repeated
+					int repeatedRows = row == null ? 0
+							: row.getTableNumberRowsRepeatedAttribute() - 1;
+					while (repeatedRows > 0 && rowIndex + rowRep < rowRange.getEnd()) {
+						rowRep++;
+						if (rowRange.contains(rowIndex + rowRep)) {
+							if (!ignoreBlankRows || !blankRow) {
+								rowProcessor.processRow(rowIndex + rowRep, currentDataRow);
+							}
+						}
+						repeatedRows--;
+					}
+					currentDataRow = new TreeMap<Integer, String>();
+				}
+
+			}
+		}
+
+	}
+
+	/**
+	 * Calculates the number of rows in a table, ignoring blank rows at the end of the table.
+	 * 
+	 * @param rowList
+	 *            the list of rows in a table
+	 * @return the number of rows in a table
+	 */
+	private int calculateRowCount(NodeList rowList) {
+		int rowCount = 0;
+		int blankRows = 0;
+		for (int i = 0; i < rowList.getLength(); i++) {
+			OdfTableRow row = (OdfTableRow) rowList.item(i);
+			int repeatedRows = row.getTableNumberRowsRepeatedAttribute();
+			if (isBlankRow(row)) {
+				blankRows += repeatedRows;
+			} else {
+				rowCount += repeatedRows + blankRows;
+				blankRows = 0;
+			}
+		}
+		return rowCount;
+	}
+
+	/**
+	 * Returns <code>true</code> if a row is blank (non of the cells contain data).
+	 * 
+	 * @param row
+	 * @return <code>true</code> if a row is blank
+	 */
+	private boolean isBlankRow(OdfTableRow row) {
+		OdfTableCell cell = (OdfTableCell) row.getCellAt(0);
+		for (int i = 1; cell != null; i++) {
+			String cellContent = cell.getTextContent();
+			if (cellContent != null && !cellContent.equals("")) {
+				return false;
+			}
+			cell = (OdfTableCell) row.getCellAt(i);
+		}
+		return true;
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
new file mode 100644
index 0000000..29f5e05
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/Range.java
@@ -0,0 +1,298 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationBean;
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationProperty;
+
+/**
+ * A range of integer values.
+ *
+ * @author David Withers
+ */
+@ConfigurationBean(uri = SpreadsheetImportActivity.URI + "/Range")
+public class Range {
+
+	/**
+	 * The (inclusive) start and end of this <code>Range</code>.
+	 */
+	private int start, end;
+
+	/**
+	 * <code>Range</code>s that are excluded from this <code>Range</code>.
+	 */
+	private List<Range> excludes = new ArrayList<Range>();
+
+	/**
+	 * Constructs a <code>Range</code>.
+	 */
+	public Range() {
+	}
+
+	/**
+	 * Constructs a <code>Range</code> with the specified start and end values.
+	 *
+	 * @param start
+	 *            the start of the range
+	 * @param end
+	 *            the end of the range
+	 */
+	public Range(int start, int end) {
+		this.start = start;
+		this.end = end;
+	}
+
+	/**
+	 * Constructs a <code>Range</code> with the specified start and end values and a
+	 * <code>Range</code> of excluded values.
+	 *
+	 * @param start
+	 *            the start of the range
+	 * @param end
+	 *            the end of the range
+	 * @param exclude
+	 *            the range to exclude
+	 */
+	public Range(int start, int end, Range exclude) {
+		this.start = start;
+		this.end = end;
+		if (exclude != null) {
+			excludes.add(exclude);
+		}
+	}
+
+	/**
+	 * Constructs a <code>Range</code> with the specified start and end values and
+	 * <code>Range</code>s of excluded values.
+	 *
+	 * @param start
+	 *            the start of the range
+	 * @param end
+	 *            the end of the range
+	 * @param excludes
+	 *            the ranges to exclude
+	 */
+	public Range(int start, int end, List<Range> excludes) {
+		this.start = start;
+		this.end = end;
+		if (excludes != null) {
+			for (Range range : excludes) {
+				this.excludes.add(range);
+			}
+		}
+	}
+
+	/**
+	 * Constructs a <code>Range</code> that is a deep copy of the specified range.
+	 *
+	 * @param range
+	 *            the <code>Range</code> to copy
+	 */
+	public Range(Range range) {
+		this.start = range.start;
+		this.end = range.end;
+		if (range.excludes != null) {
+			for (Range excludeRange : range.excludes) {
+				excludes.add(new Range(excludeRange));
+			}
+		}
+	}
+
+	/**
+	 * Returns <code>true</code> if <code>value</code> is included in this <code>Range</code>.
+	 *
+	 * @param value
+	 * @return
+	 */
+	public boolean contains(int value) {
+		if (value >= start && (value <= end || end < 0)) {
+			for (Range exclude : excludes) {
+				if (exclude.contains(value)) {
+					return false;
+				}
+			}
+			return true;
+		}
+		return false;
+	}
+
+	/**
+	 * Returns an array of the values contained in this <code>Range</code>.
+	 *
+	 * @return an array of the values contained in this <code>Range</code>
+	 */
+	public int[] getRangeValues() {
+		List<Integer> rangeList = new ArrayList<Integer>();
+		for (int i = start; i <= end; i++) {
+			boolean excluded = false;
+			for (Range range : excludes) {
+				if (range.contains(i)) {
+					excluded = true;
+					break;
+				}
+			}
+			if (!excluded) {
+				rangeList.add(i);
+			}
+		}
+		int[] rangeArray = new int[rangeList.size()];
+		for (int i = 0; i < rangeArray.length; i++) {
+			rangeArray[i] = rangeList.get(i);
+		}
+		return rangeArray;
+	}
+
+	/**
+	 * Returns the start of the <code>Range</code>.
+	 *
+	 * @return the start of the <code>Range</code>
+	 */
+	public int getStart() {
+		return start;
+	}
+
+	/**
+	 * Sets the start of the <code>Range</code>.
+	 *
+	 * @param start
+	 *            the new value for start of the <code>Range</code>
+	 */
+	@ConfigurationProperty(name = "start", label = "Start", description = "The start of the range")
+	public void setStart(int start) {
+		this.start = start;
+	}
+
+	/**
+	 * Returns the end of the <code>Range</code>.
+	 *
+	 * @return the end of the <code>Range</code>
+	 */
+	public int getEnd() {
+		return end;
+	}
+
+	/**
+	 * Sets the end of the range.
+	 *
+	 * @param end
+	 *            the new value for end of the <code>Range</code>
+	 */
+	@ConfigurationProperty(name = "end", label = "End", description = "The end of the range")
+	public void setEnd(int end) {
+		this.end = end;
+	}
+
+	/**
+	 * Adds a <code>Range</code> to be excluded from this <code>Range</code>.
+	 *
+	 * @param exclude
+	 *            a <code>Range</code> to be excluded
+	 */
+	public void addExclude(Range exclude) {
+		excludes.add(exclude);
+	}
+
+	/**
+	 * Removes a <code>Range</code> from the exclusions for this range.
+	 *
+	 * @param exclude
+	 *            a <code>Range</code> to be removed from the exclusions
+	 */
+	public void removeExclude(Range exclude) {
+		excludes.remove(exclude);
+	}
+
+	/**
+	 * Returns the exclusions for this range.
+	 *
+	 * @return the exclusions for this range
+	 */
+	public List<Range> getExcludes() {
+		return excludes;
+	}
+
+	/**
+	 * Sets the exclusions for this range.
+	 *
+	 * @param excludes
+	 *            the exclusions for this range
+	 */
+	@ConfigurationProperty(name = "excludes", label = "Excludes Ranges", description = "The ranges the exclude from this range", required = false)
+	public void setExcludes(List<Range> excludes) {
+		this.excludes = excludes;
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder result = new StringBuilder();
+		result.append('[');
+		result.append(start);
+		result.append("..");
+		result.append(end);
+		result.append(']');
+		return result.toString();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 *
+	 * @see java.lang.Object#hashCode()
+	 */
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + end;
+		result = prime * result + ((excludes == null) ? 0 : excludes.hashCode());
+		result = prime * result + start;
+		return result;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 *
+	 * @see java.lang.Object#equals(java.lang.Object)
+	 */
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		Range other = (Range) obj;
+		if (end != other.end)
+			return false;
+		if (excludes == null) {
+			if (other.excludes != null)
+				return false;
+		} else if (!excludes.equals(other.excludes))
+			return false;
+		if (start != other.start)
+			return false;
+		return true;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
new file mode 100644
index 0000000..ece75e5
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetEmptyCellPolicy.java
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Enumeration of the policies for handling empty/missing cells in a spreadsheet.
+ * <p>
+ * <dl>
+ * <dt>EMPTY_STRING</dt>
+ * <dd>Use an empty string value ("")</dd>
+ * <dt>USER_DEFINED</dt>
+ * <dd>Use a value defined by the user</dd>
+ * <dt>GENERATE_ERROR</dt>
+ * <dd>Generate an ErrorDocument</dd>
+ * </dl>
+ * 
+ * @author David Withers
+ */
+public enum SpreadsheetEmptyCellPolicy {
+	EMPTY_STRING, USER_DEFINED, GENERATE_ERROR
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
new file mode 100644
index 0000000..1f48ce8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivity.java
@@ -0,0 +1,358 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedMap;
+
+import net.sf.taverna.t2.invocation.InvocationContext;
+import net.sf.taverna.t2.reference.ExternalReferenceSPI;
+import net.sf.taverna.t2.reference.Identified;
+import net.sf.taverna.t2.reference.ReferenceService;
+import net.sf.taverna.t2.reference.ReferenceServiceException;
+import net.sf.taverna.t2.reference.ReferenceSet;
+import net.sf.taverna.t2.reference.T2Reference;
+import net.sf.taverna.t2.reference.ValueCarryingExternalReference;
+import net.sf.taverna.t2.workflowmodel.OutputPort;
+import net.sf.taverna.t2.workflowmodel.Port;
+import net.sf.taverna.t2.workflowmodel.processor.activity.AbstractAsynchronousActivity;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityConfigurationException;
+import net.sf.taverna.t2.workflowmodel.processor.activity.AsynchronousActivityCallback;
+
+import org.apache.log4j.Logger;
+
+import com.csvreader.CsvWriter;
+import com.fasterxml.jackson.databind.JsonNode;
+
+
+/**
+ * An {@link net.sf.taverna.t2.workflowmodel.processor.activity.Activity} that reads spreadsheet
+ * files.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportActivity extends AbstractAsynchronousActivity<JsonNode> {
+
+	public static final String URI = "http://ns.taverna.org.uk/2010/activity/spreadsheet-import";
+
+	public static final String INPUT_PORT_NAME = "fileurl";
+
+	public static final String OUTPUT_PORT_NAME = "output";
+
+	private static Logger logger = Logger.getLogger(SpreadsheetImportActivity.class);
+
+	private JsonNode configurationBean;
+
+	private Range rowRange, columnRange;
+
+	private boolean ignoreBlankRows;
+
+	private String missingCellValue;
+
+	private SpreadsheetEmptyCellPolicy emptyCellPolicy;
+
+	private SpreadsheetOutputFormat outputFormat;
+
+	private String csvDelimiter;
+
+	/**
+	 * Constructs a SpreadsheetImport activity.
+	 */
+	public SpreadsheetImportActivity() {
+	}
+
+	@Override
+	public void configure(JsonNode configurationBean)
+			throws ActivityConfigurationException {
+		this.configurationBean = configurationBean;
+		rowRange = SpreadsheetUtils.getRange(configurationBean.get("rowRange"));
+		logger.debug("Setting row range to " + rowRange);
+		columnRange = SpreadsheetUtils.getRange(configurationBean.get("columnRange"));
+		logger.debug("Setting column range to " + columnRange);
+		ignoreBlankRows = configurationBean.get("ignoreBlankRows").booleanValue();
+		missingCellValue = configurationBean.get("emptyCellValue").textValue();
+		logger.debug("Setting empty cell value to '" + missingCellValue + "'");
+		emptyCellPolicy = SpreadsheetEmptyCellPolicy.valueOf(configurationBean.get("emptyCellPolicy").textValue());
+		logger.debug("Setting empty cell policy to " + emptyCellPolicy);
+		outputFormat = SpreadsheetOutputFormat.valueOf(configurationBean.get("outputFormat").textValue());
+		logger.debug("Setting output format to " + outputFormat);
+		csvDelimiter = configurationBean.get("csvDelimiter").textValue();
+		logger.debug("Setting csv delimiter to '" + csvDelimiter + "'");
+//		configurePorts();
+	}
+
+	private void configurePorts() {
+		removeInputs();
+		addInput(INPUT_PORT_NAME, 0, false, null, null);
+
+		removeOutputs();
+		if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+			for (int column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+				if (columnRange.contains(column)) {
+					addOutput(SpreadsheetUtils.getPortName(column, configurationBean), 1, 1);
+				}
+			}
+		} else {
+			addOutput(OUTPUT_PORT_NAME, 0, 0);
+		}
+	}
+
+	@Override
+	public JsonNode getConfiguration() {
+		return configurationBean;
+	}
+
+	@Override
+	public void executeAsynch(final Map<String, T2Reference> data,
+			final AsynchronousActivityCallback callback) {
+		callback.requestRun(new Runnable() {
+
+			public void run() {
+
+				Map<String, T2Reference> outputData = new HashMap<String, T2Reference>();
+
+				InvocationContext context = callback.getContext();
+				ReferenceService referenceService = context.getReferenceService();
+
+				try {
+					T2Reference inputRef = data.get(INPUT_PORT_NAME);
+
+					SpreadsheetRowProcessor spreadsheetRowProcessor = null;
+					Map<String, List<T2Reference>> outputLists = null;
+					StringWriter output = null;
+
+					if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+						outputLists = new HashMap<String, List<T2Reference>>();
+						for (Port port : getOutputPorts()) {
+							outputLists.put(port.getName(), new ArrayList<T2Reference>());
+						}
+						spreadsheetRowProcessor = new MultiplePortRowProcessor(referenceService, outputLists, context);
+					} else {
+						output = new StringWriter();
+						char csvDelimiterCharacter = ',';
+						if (csvDelimiter != null && csvDelimiter.length() > 0) {
+							csvDelimiterCharacter = csvDelimiter.charAt(0);
+						}
+						CsvWriter csvWriter = new CsvWriter(output, csvDelimiterCharacter);
+						csvWriter.setEscapeMode(CsvWriter.ESCAPE_MODE_DOUBLED);
+						csvWriter.setTextQualifier('"');
+						csvWriter.setUseTextQualifier(true);
+						spreadsheetRowProcessor = new SingleOutputRowProcessor(csvWriter);
+					}
+
+					InputStream inputStream = getInputStream(context, referenceService, inputRef);
+					if (inputStream == null) {
+						logger.warn("Input is not a file reference or a file name");
+						callback.fail("Input is not a file reference or a file name");
+						return;
+					}
+					try {
+						try {
+							new ExcelSpreadsheetReader().read(inputStream, new Range(rowRange),
+									new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+						} catch (SpreadsheetReadException e) {
+							inputStream.close();
+							inputStream = getInputStream(context, referenceService, inputRef);
+							try {
+								new ODFSpreadsheetReader().read(inputStream, new Range(rowRange),
+										new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+							} catch (SpreadsheetReadException e2) {
+								inputStream.close();
+								inputStream = getInputStream(context, referenceService, inputRef);
+								new CSVSpreadsheetReader().read(inputStream, new Range(rowRange),
+										new Range(columnRange), ignoreBlankRows, spreadsheetRowProcessor);
+							}
+						} finally {
+							inputStream.close();
+						}
+					} catch (IOException e1) {
+						logger.warn("Failed to close spereadsheet stream", e1);
+					}
+
+					// get outputs
+					if (outputFormat.equals(SpreadsheetOutputFormat.PORT_PER_COLUMN)) {
+						for (OutputPort outputPort : getOutputPorts()) {
+							String name = outputPort.getName();
+							Object value = outputLists.get(name);
+							T2Reference id = referenceService.register(value, outputPort.getDepth(),
+									true, context);
+							outputData.put(name, id);
+						}
+					} else {
+						T2Reference id = referenceService.register(output.toString(), 0, true, context);
+						outputData.put(OUTPUT_PORT_NAME, id);
+					}
+					callback.receiveResult(outputData, new int[0]);
+				} catch (ReferenceServiceException e) {
+					logger.warn("Error accessing spreadsheet input/output data", e);
+					callback.fail("Error accessing spreadsheet input/output data", e);
+				} catch (SpreadsheetReadException e) {
+					logger.warn("Spreadsheet input cannot be read", e);
+					callback.fail("Spreadsheet input cannot be read", e);
+				} catch (FileNotFoundException e) {
+					logger.warn("Input spreadsheet file does not exist", e);
+					callback.fail("Input spreadsheet file does not exist", e);
+				} catch (IOException e) {
+					logger.warn("Error reading spreadsheet", e);
+					callback.fail("Error reading spreadsheet", e);
+				}
+			}
+
+
+		});
+	}
+
+	private InputStream getInputStream(InvocationContext context,
+			ReferenceService referenceService, T2Reference inputRef)
+			throws IOException {
+		InputStream inputStream = null;
+
+		Identified identified = referenceService.resolveIdentifier(inputRef, null, context);
+		if (identified instanceof ReferenceSet) {
+			ReferenceSet referenceSet = (ReferenceSet) identified;
+			Set<ExternalReferenceSPI> externalReferences = referenceSet
+					.getExternalReferences();
+			for (ExternalReferenceSPI externalReference : externalReferences) {
+				if (externalReference instanceof ValueCarryingExternalReference<?>) {
+					ValueCarryingExternalReference<?> vcer = (ValueCarryingExternalReference<?>) externalReference;
+					if (String.class.isAssignableFrom(vcer.getValueType())) {
+						String input = (String) vcer.getValue();
+						try {
+							URL url = new URL(input);
+							inputStream = url.openStream();
+							logger.debug("Input spreadsheet url is '" + input + "'");
+						} catch (MalformedURLException e) {
+							logger.debug("Input spreadsheet file name is '" + input + "'");
+							inputStream = new FileInputStream(input);
+						}
+					}
+					break;
+				} else {
+					inputStream = externalReference.openStream(context);
+					break;
+				}
+			}
+		}
+		return inputStream;
+	}
+
+	/**
+	 * SpreadsheetRowProcessor for handling a single output formatted as csv.
+	 *
+	 * @author David Withers
+	 */
+	private final class SingleOutputRowProcessor implements SpreadsheetRowProcessor {
+
+		private final CsvWriter csvWriter;
+
+		/**
+		 * Constructs a new SingleOutputRowProcessor.
+		 *
+		 * @param csvWriter
+		 */
+		private SingleOutputRowProcessor(CsvWriter csvWriter) {
+			this.csvWriter = csvWriter;
+		}
+
+		public void processRow(int rowIndex, SortedMap<Integer, String> row) {
+			try {
+				for (String value : row.values()) {
+					if (value == null) {
+						if (emptyCellPolicy.equals(SpreadsheetEmptyCellPolicy.GENERATE_ERROR)) {
+							value = "ERROR";
+						} else if (emptyCellPolicy.equals(SpreadsheetEmptyCellPolicy.EMPTY_STRING)) {
+							value = "";
+						} else {
+							value = missingCellValue;
+						}
+					}
+					csvWriter.write(value, true);
+				}
+				csvWriter.endRecord();
+			} catch (IOException e) {
+				throw new RuntimeException(e);
+			}
+		}
+	}
+
+	/**
+	 * SpreadsheetRowProcessor for handling multiple outputs.
+	 *
+	 * @author David Withers
+	 */
+	private final class MultiplePortRowProcessor implements SpreadsheetRowProcessor {
+
+		private final ReferenceService referenceService;
+		private final Map<String, List<T2Reference>> outputLists;
+		private final InvocationContext context;
+
+		/**
+		 * Constructs a new MultiplePortRowProcessor.
+		 *
+		 * @param referenceService
+		 * @param outputLists
+		 * @param context
+		 */
+		private MultiplePortRowProcessor(ReferenceService referenceService,
+				Map<String, List<T2Reference>> outputLists, InvocationContext context) {
+			this.referenceService = referenceService;
+			this.outputLists = outputLists;
+			this.context = context;
+		}
+
+		public void processRow(int rowIndex, SortedMap<Integer, String> row) {
+			for (Entry<Integer, String> entry : row.entrySet()) {
+				String column = SpreadsheetUtils.getPortName(entry.getKey(),
+						configurationBean);
+				Object value = entry.getValue();
+				if (value == null) {
+					if (emptyCellPolicy
+							.equals(SpreadsheetEmptyCellPolicy.GENERATE_ERROR)) {
+						value = referenceService.getErrorDocumentService()
+								.registerError(
+										"Missing data for spreadsheet cell "
+												+ column + row, 0, context);
+					} else if (emptyCellPolicy
+							.equals(SpreadsheetEmptyCellPolicy.EMPTY_STRING)) {
+						value = "";
+					} else {
+						value = missingCellValue;
+					}
+				}
+				T2Reference id = referenceService.register(value, 0, true, context);
+				outputLists.get(column).add(id);
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
new file mode 100644
index 0000000..a4b1ab8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportActivityFactory.java
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * Copyright (C) 2011 The University of Manchester
+ *
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import net.sf.taverna.t2.workflowmodel.Edits;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityConfigurationException;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityFactory;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityInputPort;
+import net.sf.taverna.t2.workflowmodel.processor.activity.ActivityOutputPort;
+
+/**
+ * An {@link ActivityFactory} for creating <code>SpreadsheetImportActivity</code>.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportActivityFactory implements ActivityFactory {
+
+	private Edits edits;
+
+	@Override
+	public SpreadsheetImportActivity createActivity() {
+		return new SpreadsheetImportActivity();
+	}
+
+	@Override
+	public URI getActivityType() {
+		return URI.create(SpreadsheetImportActivity.URI);
+	}
+
+	@Override
+	public JsonNode getActivityConfigurationSchema() {
+		ObjectMapper objectMapper = new ObjectMapper();
+		try {
+ 			return objectMapper.readTree(getClass().getResource("/schema.json"));
+		} catch (IOException e) {
+			return objectMapper.createObjectNode();
+		}
+	}
+
+	@Override
+	public Set<ActivityInputPort> getInputPorts(JsonNode configuration)
+			throws ActivityConfigurationException {
+		Set<ActivityInputPort> inputPorts = new HashSet<>();
+		inputPorts.add(edits.createActivityInputPort(SpreadsheetImportActivity.INPUT_PORT_NAME, 0, false, null, null));
+		return inputPorts;
+	}
+
+	@Override
+	public Set<ActivityOutputPort> getOutputPorts(JsonNode configuration)
+			throws ActivityConfigurationException {
+		Set<ActivityOutputPort> outputPorts = new HashSet<>();
+		if ("PORT_PER_COLUMN".equals(configuration.get("outputFormat").textValue())) {
+			Range columnRange = SpreadsheetUtils.getRange(configuration.get("columnRange"));
+			for (int column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
+				if (columnRange.contains(column)) {
+					outputPorts.add(edits.createActivityOutputPort(SpreadsheetUtils.getPortName(column, configuration), 1, 1));
+				}
+			}
+		} else {
+			outputPorts.add(edits.createActivityOutputPort(SpreadsheetImportActivity.OUTPUT_PORT_NAME, 0, 0));
+		}
+		return outputPorts;
+	}
+
+	public void setEdits(Edits edits) {
+		this.edits = edits;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
new file mode 100644
index 0000000..1cf37ae
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportConfiguration.java
@@ -0,0 +1,416 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationBean;
+import net.sf.taverna.t2.workflowmodel.processor.config.ConfigurationProperty;
+
+/**
+ * Configuration for the SpreadsheetImport activity.
+ *
+ * @author David Withers
+ */
+@ConfigurationBean(uri = SpreadsheetImportActivity.URI + "#Config")
+public class SpreadsheetImportConfiguration {
+
+	private Range columnRange;
+	private Range rowRange;
+	private String emptyCellValue;
+	private Map<String, String> columnNames;
+	private boolean allRows;
+	private boolean excludeFirstRow;
+	private boolean ignoreBlankRows;
+	private SpreadsheetEmptyCellPolicy emptyCellPolicy;
+	private SpreadsheetOutputFormat outputFormat;
+	private String csvDelimiter;
+
+	/**
+	 * Constructs a new SpreadsheetImportConfiguration.
+	 */
+	public SpreadsheetImportConfiguration() {
+		columnRange = new Range(0, 1);
+		rowRange = new Range(0, -1);
+		emptyCellValue = "";
+		columnNames = new HashMap<String, String>();
+		allRows = true;
+		excludeFirstRow = false;
+		ignoreBlankRows = false;
+		emptyCellPolicy = SpreadsheetEmptyCellPolicy.EMPTY_STRING;
+		outputFormat = SpreadsheetOutputFormat.PORT_PER_COLUMN;
+		csvDelimiter = ",";
+	}
+
+	/**
+	 * Constructs a new SpreadsheetImportConfiguration that copies the values from the given
+	 * configuration.
+	 *
+	 * @param configuration
+	 */
+	public SpreadsheetImportConfiguration(SpreadsheetImportConfiguration configuration) {
+		columnRange = new Range(configuration.columnRange);
+		rowRange = new Range(configuration.rowRange);
+		emptyCellValue = configuration.emptyCellValue;
+		columnNames = new HashMap<String, String>(configuration.columnNames);
+		allRows = configuration.allRows;
+		excludeFirstRow = configuration.excludeFirstRow;
+		emptyCellPolicy = configuration.emptyCellPolicy;
+		ignoreBlankRows = configuration.ignoreBlankRows;
+		outputFormat = configuration.outputFormat;
+		csvDelimiter = configuration.csvDelimiter;
+	}
+
+	/**
+	 * Returns the columnRange.
+	 *
+	 * @return the value of columnRange
+	 */
+	public Range getColumnRange() {
+		return columnRange;
+	}
+
+	/**
+	 * Sets the columnRange.
+	 *
+	 * @param columnRange
+	 *            the new value for columnRange
+	 */
+	@ConfigurationProperty(name = "columnRange", label = "Column Range", description = "The range of columns to be imported (e.g. columns 2 to 7")
+	public void setColumnRange(Range columnRange) {
+		this.columnRange = columnRange;
+	}
+
+	/**
+	 * Returns the rowRange.
+	 *
+	 * @return the value of rowRange
+	 */
+	public Range getRowRange() {
+		return rowRange;
+	}
+
+	/**
+	 * Sets the rowRange.
+	 *
+	 * @param rowRange
+	 *            the new value for rowRange
+	 */
+	@ConfigurationProperty(name = "rowRange", label = "Row Range", description = "The range of rows to be imported (e.g. rows 1 to 15", required = false)
+	public void setRowRange(Range rowRange) {
+		this.rowRange = rowRange;
+	}
+
+	/**
+	 * Returns the emptyCellValue. The default value is "".
+	 *
+	 * @return the value of emptyCellValue
+	 */
+	public String getEmptyCellValue() {
+		return emptyCellValue;
+	}
+
+	/**
+	 * Sets the emptyCellValue.
+	 *
+	 * @param emptyCellValue
+	 *            the new value for emptyCellValue
+	 */
+	@ConfigurationProperty(name = "emptyCellValue", label = "Empty Cell Value", description = "The value to use for empty cells. The default is \"\"", required = false)
+	public void setEmptyCellValue(String emptyCellValue) {
+		this.emptyCellValue = emptyCellValue;
+	}
+
+	/**
+	 * Returns the columnNames. The default value is an empty map.
+	 *
+	 * @return the value of columnNames
+	 */
+	public Map<String, String> getColumnNames() {
+		return columnNames;
+	}
+
+	/**
+	 * Sets the columnNames.
+	 *
+	 * @param columnNames
+	 *            the new value for columnNames
+	 */
+	public void setColumnNames(Map<String, String> columnNames) {
+		this.columnNames = columnNames;
+	}
+
+	/**
+	 * Sets the columnNames.
+	 *
+	 * @param columnNames
+	 *            the new value for columnNames
+	 */
+	@ConfigurationProperty(name = "columnNames", label = "Column Name Mapping", description = "Mapping from column to port names", required = false)
+	public void setColumnNames(Set<Mapping> columnNames) {
+		Map<String, String> columnNamesMap = new HashMap<String, String>();
+		for (Mapping mapping : columnNames) {
+			columnNamesMap.put(mapping.column, mapping.port);
+		}
+		this.columnNames = columnNamesMap;
+	}
+
+	/**
+	 * Returns the allRows property. The default value is <code>true</code>.
+	 *
+	 * @return the value of allRows
+	 */
+	public boolean isAllRows() {
+		return allRows;
+	}
+
+	/**
+	 * Sets the allRows property.
+	 *
+	 * @param allRows
+	 *            the new value for allRows
+	 */
+	@ConfigurationProperty(name = "allRows", label = "Import All Rows", description = "Imports all the rows containing data", required = false)
+	public void setAllRows(boolean allRows) {
+		this.allRows = allRows;
+	}
+
+	/**
+	 * Returns the excludeFirstRow property. The default value is <code>false</code>.
+	 *
+	 * @return the value of excludeFirstRow
+	 */
+	public boolean isExcludeFirstRow() {
+		return excludeFirstRow;
+	}
+
+	/**
+	 * Sets the excludeFirstRow property.
+	 *
+	 * @param excludeFirstRow
+	 *            the new value for excludeFirstRow
+	 */
+	@ConfigurationProperty(name = "excludeFirstRow", label = "Exclude First Row", description = "Excludes the first row from the import", required = false)
+	public void setExcludeFirstRow(boolean excludeFirstRow) {
+		this.excludeFirstRow = excludeFirstRow;
+	}
+
+	/**
+	 * Returns the ignoreBlankRows property. The default value is <code>false</code>.
+	 *
+	 * @return the value of ignoreBlankRows
+	 */
+	public boolean isIgnoreBlankRows() {
+		return ignoreBlankRows;
+	}
+
+	/**
+	 * Sets the ignoreBlankRows property.
+	 *
+	 * @param ignoreBlankRows the new value for ignoreBlankRows
+	 */
+	@ConfigurationProperty(name = "ignoreBlankRows", label = "Ignore Blank Rows", description = "Excludes blank rows from the import", required = false)
+	public void setIgnoreBlankRows(boolean ignoreBlankRows) {
+		this.ignoreBlankRows = ignoreBlankRows;
+	}
+
+	/**
+	 * Returns the emptyCellPolicy. The default value is
+	 * <code>SpreadsheetEmptyCellPolicy.EMPTY_STRING</code>.
+	 *
+	 * @return the value of emptyCellPolicy
+	 */
+	public SpreadsheetEmptyCellPolicy getEmptyCellPolicy() {
+		return emptyCellPolicy;
+	}
+
+	/**
+	 * Sets the emptyCellPolicy.
+	 *
+	 * @param emptyCellPolicy
+	 *            the new value for emptyCellPolicy
+	 */
+	@ConfigurationProperty(name = "emptyCellPolicy", label = "Empty Cell Policy", description = "Policy for handling empty cells", required = false)
+	public void setEmptyCellPolicy(SpreadsheetEmptyCellPolicy emptyCellPolicy) {
+		this.emptyCellPolicy = emptyCellPolicy;
+	}
+
+	/**
+	 * Returns the outputFormat. The default value is
+	 * <code>SpreadsheetOutputFormat.PORT_PER_COLUMN</code>.
+	 *
+	 * @return the value of outputFormat
+	 */
+	public SpreadsheetOutputFormat getOutputFormat() {
+		return outputFormat == null ? SpreadsheetOutputFormat.PORT_PER_COLUMN : outputFormat;
+	}
+
+	/**
+	 * Sets the outputFormat.
+	 *
+	 * @param outputFormat
+	 *            the new value for outputFormat
+	 */
+	@ConfigurationProperty(name = "outputFormat", label = "Output Format", description = "How the activity outputs are to be formatted", required = false)
+	public void setOutputFormat(SpreadsheetOutputFormat outputFormat) {
+		this.outputFormat = outputFormat;
+	}
+
+	/**
+	 * Returns the delimiter for CSV formatted output. The default value is
+	 * <code>","</code>.
+	 *
+	 * @return the delimiter for CSV formatted output
+	 */
+	public String getCsvDelimiter() {
+		return csvDelimiter == null ? "," : csvDelimiter;
+	}
+
+	/**
+	 * Sets the delimiter for CSV formatted output.
+	 *
+	 * @param outputFormat
+	 *            the new delimiter for CSV formatted output
+	 */
+	@ConfigurationProperty(name = "csvDelimiter", label = "CSV Delimiter", description = "The delimiter to use for CSV input files. The default is ','", required = false)
+	public void setCsvDelimiter(String csvDelimiter) {
+		this.csvDelimiter = csvDelimiter;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + (allRows ? 1231 : 1237);
+		result = prime * result + ((columnNames == null) ? 0 : columnNames.hashCode());
+		result = prime * result + ((columnRange == null) ? 0 : columnRange.hashCode());
+		result = prime * result + ((csvDelimiter == null) ? 0 : csvDelimiter.hashCode());
+		result = prime * result + ((emptyCellPolicy == null) ? 0 : emptyCellPolicy.hashCode());
+		result = prime * result + ((emptyCellValue == null) ? 0 : emptyCellValue.hashCode());
+		result = prime * result + (excludeFirstRow ? 1231 : 1237);
+		result = prime * result + (ignoreBlankRows ? 1231 : 1237);
+		result = prime * result + ((outputFormat == null) ? 0 : outputFormat.hashCode());
+		result = prime * result + ((rowRange == null) ? 0 : rowRange.hashCode());
+		return result;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj) {
+			return true;
+		}
+		if (obj == null) {
+			return false;
+		}
+		if (getClass() != obj.getClass()) {
+			return false;
+		}
+		SpreadsheetImportConfiguration other = (SpreadsheetImportConfiguration) obj;
+		if (allRows != other.allRows) {
+			return false;
+		}
+		if (columnNames == null) {
+			if (other.columnNames != null) {
+				return false;
+			}
+		} else if (!columnNames.equals(other.columnNames)) {
+			return false;
+		}
+		if (columnRange == null) {
+			if (other.columnRange != null) {
+				return false;
+			}
+		} else if (!columnRange.equals(other.columnRange)) {
+			return false;
+		}
+		if (csvDelimiter == null) {
+			if (other.csvDelimiter != null) {
+				return false;
+			}
+		} else if (!csvDelimiter.equals(other.csvDelimiter)) {
+			return false;
+		}
+		if (emptyCellPolicy == null) {
+			if (other.emptyCellPolicy != null) {
+				return false;
+			}
+		} else if (!emptyCellPolicy.equals(other.emptyCellPolicy)) {
+			return false;
+		}
+		if (emptyCellValue == null) {
+			if (other.emptyCellValue != null) {
+				return false;
+			}
+		} else if (!emptyCellValue.equals(other.emptyCellValue)) {
+			return false;
+		}
+		if (excludeFirstRow != other.excludeFirstRow) {
+			return false;
+		}
+		if (ignoreBlankRows != other.ignoreBlankRows) {
+			return false;
+		}
+		if (outputFormat == null) {
+			if (other.outputFormat != null) {
+				return false;
+			}
+		} else if (!outputFormat.equals(other.outputFormat)) {
+			return false;
+		}
+		if (rowRange == null) {
+			if (other.rowRange != null) {
+				return false;
+			}
+		} else if (!rowRange.equals(other.rowRange)) {
+			return false;
+		}
+		return true;
+	}
+
+	@ConfigurationBean(uri = SpreadsheetImportActivity.URI + "/Mapping")
+	public static class Mapping {
+		private String column, port;
+
+		public Mapping() {
+		}
+
+		public String getColumn() {
+			return column;
+		}
+
+		@ConfigurationProperty(name = "column", label = "Column", description = "The name of the column")
+		public void setColumn(String column) {
+			this.column = column;
+		}
+
+		public String getPort() {
+			return port;
+		}
+
+		@ConfigurationProperty(name = "port", label = "Port", description = "The name of the port")
+		public void setPort(String port) {
+			this.port = port;
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
new file mode 100644
index 0000000..35db532
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetImportHealthChecker.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester
+ *
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.util.List;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+import net.sf.taverna.t2.workflowmodel.Processor;
+import net.sf.taverna.t2.workflowmodel.health.HealthCheck;
+import net.sf.taverna.t2.workflowmodel.health.HealthChecker;
+import net.sf.taverna.t2.visit.VisitReport;
+import net.sf.taverna.t2.visit.VisitReport.Status;
+
+
+/**
+ * Health checker for SpreadsheetImport activities.
+ *
+ * @author David Withers
+ */
+public class SpreadsheetImportHealthChecker implements HealthChecker<SpreadsheetImportActivity> {
+
+	public boolean canVisit(Object subject) {
+		return (subject != null && subject instanceof SpreadsheetImportActivity);
+	}
+
+	public VisitReport visit(SpreadsheetImportActivity activity, List<Object> ancestors) {
+		Processor p = (Processor) VisitReport.findAncestor(ancestors, Processor.class);
+		if (p == null) {
+			return null;
+		}
+		JsonNode configuration = activity.getConfiguration();
+		if (configuration == null) {
+			return new VisitReport(HealthCheck.getInstance(), p, "Spreadsheet import has not been configured", HealthCheck.NO_CONFIGURATION, Status.SEVERE);
+		}
+		return new VisitReport(HealthCheck.getInstance(), p, "Spreadsheet OK", HealthCheck.NO_PROBLEM, Status.OK);
+	}
+
+	public boolean isTimeConsuming() {
+		return false;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
new file mode 100644
index 0000000..d1aaca8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetOutputFormat.java
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Enumeration of the formats for output of spreadsheet cell values.
+ * <p>
+ * <dl>
+ * <dt>PORT_PER_COLUMN</dt>
+ * <dd>One port of depth 1 (a list) per column</dd>
+ * <dt>SINGLE_PORT</dt>
+ * <dd>A single port of depth 0, formatted as CSV</dd>
+ * </dl>
+ *
+ * @author David Withers
+ */
+public enum SpreadsheetOutputFormat {
+	PORT_PER_COLUMN, SINGLE_PORT
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
new file mode 100644
index 0000000..9cd4ba8
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReadException.java
@@ -0,0 +1,69 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+/**
+ * Exception thrown when a spreadsheet cannot be read due to an IO error or when the file format is
+ * not supported.
+ * 
+ * @author David Withers
+ */
+public class SpreadsheetReadException extends Exception {
+
+	private static final long serialVersionUID = -823966225836697180L;
+
+	/**
+	 * Constructs a new SpreadsheetReadException with null as its detail message.
+	 */
+	public SpreadsheetReadException() {
+	}
+
+	/**
+	 * Constructs a new SpreadsheetReadException with the specified detail message.
+	 * 
+	 * @param message
+	 */
+	public SpreadsheetReadException(String message) {
+		super(message);
+	}
+
+	/**
+	 * Constructs a new SpreadsheetReadException with the specified cause and a detail message of
+	 * (cause==null ? null : cause.toString()) (which typically contains the class and detail
+	 * message of cause).
+	 * 
+	 * @param cause
+	 */
+	public SpreadsheetReadException(Throwable cause) {
+		super(cause);
+	}
+
+	/**
+	 * Constructs a new SpreadsheetReadException with the specified detail message and cause.
+	 * 
+	 * @param message
+	 * @param cause
+	 */
+	public SpreadsheetReadException(String message, Throwable cause) {
+		super(message, cause);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-taverna-common-activities/blob/b7e29f54/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
----------------------------------------------------------------------
diff --git a/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
new file mode 100644
index 0000000..79c92d1
--- /dev/null
+++ b/taverna-spreadsheet-import-activity/src/main/java/net/sf/taverna/t2/activities/spreadsheet/SpreadsheetReader.java
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * Copyright (C) 2009 The University of Manchester   
+ * 
+ *  Modifications to the initial code base are copyright of their
+ *  respective authors, or their employers as appropriate.
+ * 
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2.1 of
+ *  the License, or (at your option) any later version.
+ *    
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *    
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ ******************************************************************************/
+package net.sf.taverna.t2.activities.spreadsheet;
+
+import java.io.InputStream;
+
+/**
+ * Interface for reading a spreadsheet from an input stream.
+ * 
+ * @author David Withers
+ */
+public interface SpreadsheetReader {
+
+	/**
+	 * Reads an InputStream and passes spreadsheet cell data values, row by row, to the
+	 * rowProcessor.
+	 * 
+	 * @param inputStream
+	 *            the stream to read
+	 * @param rowProcessor
+	 *            the rowProcessor to write rows of data values to
+	 * @param rowRange
+	 *            the rows to read
+	 * @param columnRange
+	 *            the columns to read
+	 * @param ignoreBlankRows
+	 *            whether to ignore blank rows
+	 * @throws SpreadsheetReadException
+	 *             if there's an error reading the stream or the stream is not a valid spreadsheet
+	 */
+	public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows,
+			SpreadsheetRowProcessor rowProcessor) throws SpreadsheetReadException;
+
+}
\ No newline at end of file