You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/17 12:15:47 UTC
svn commit: r1171936 - in
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser:
hdf/HDFParser.java netcdf/NetCDFParser.java
Author: jukka
Date: Sat Sep 17 10:15:46 2011
New Revision: 1171936
URL: http://svn.apache.org/viewvc?rev=1171936&view=rev
Log:
TIKA-598: Update HDF parser and NetCDF parser to emit minimal XHTML
Also some streamlining and better error handling
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java?rev=1171936&r1=1171935&r2=1171936&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java Sat Sep 17 10:15:46 2011
@@ -26,11 +26,13 @@ import java.util.Set;
//TIKA imports
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.netcdf.NetCDFParser;
+import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -50,8 +52,8 @@ import ucar.nc2.NetcdfFile;
*/
public class HDFParser extends AbstractParser {
- private static final Set<MediaType> SUPPORTED_TYPES = Collections
- .singleton(MediaType.application("x-hdf"));
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("x-hdf"));
/*
* (non-Javadoc)
@@ -76,10 +78,22 @@ public class HDFParser extends AbstractP
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
- this.writeStreamToMemory(stream, os);
+ IOUtils.copy(stream, os);
- NetcdfFile ncFile = NetcdfFile.openInMemory("", os.toByteArray());
- this.unravelStringMet(ncFile, null, metadata);
+ String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ if (name == null) {
+ name = "";
+ }
+ try {
+ NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray());
+ unravelStringMet(ncFile, null, metadata);
+ } catch (IOException e) {
+ throw new TikaException("HDF parse error", e);
+ }
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
}
protected void unravelStringMet(NetcdfFile ncFile, Group group, Metadata met) {
@@ -103,18 +117,4 @@ public class HDFParser extends AbstractP
}
}
- protected void writeStreamToMemory(InputStream is, ByteArrayOutputStream os)
- throws TikaException {
- byte[] buf = new byte[512];
-
- try {
- while ((is.read(buf, 0, 512)) != -1) {
- os.write(buf);
- }
- } catch (Exception e) {
- e.printStackTrace();
- throw new TikaException(e.getMessage());
- }
- }
-
}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java?rev=1171936&r1=1171935&r2=1171936&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java Sat Sep 17 10:15:46 2011
@@ -25,11 +25,13 @@ import java.util.Set;
//TIKA imports
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -46,8 +48,8 @@ import ucar.nc2.NetcdfFile;
*/
public class NetCDFParser extends AbstractParser {
- private final Set<MediaType> SUPPORTED_TYPES = Collections
- .singleton(MediaType.application("x-netcdf"));
+ private final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("x-netcdf"));
/*
* (non-Javadoc)
@@ -70,37 +72,34 @@ public class NetCDFParser extends Abstra
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
-
ByteArrayOutputStream os = new ByteArrayOutputStream();
- this.writeStreamToMemory(stream, os);
-
- NetcdfFile ncFile = NetcdfFile.openInMemory("", os.toByteArray());
+ IOUtils.copy(stream, os);
- // first parse out the set of global attributes
- for (Attribute attr : ncFile.getGlobalAttributes()) {
- String attrName = attr.getName();
- if (attr.getDataType().isString()) {
- metadata.add(attrName, attr.getStringValue());
- } else if (attr.getDataType().isNumeric()) {
- metadata.add(attrName, String.valueOf(attr.getNumericValue()
- .intValue()));
- }
+ String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ if (name == null) {
+ name = "";
}
- }
-
- protected void writeStreamToMemory(InputStream is, ByteArrayOutputStream os)
- throws TikaException {
- byte[] buf = new byte[512];
-
try {
- while ((is.read(buf, 0, 512)) != -1) {
- os.write(buf);
+ NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray());
+
+ // first parse out the set of global attributes
+ for (Attribute attr : ncFile.getGlobalAttributes()) {
+ String attrName = attr.getName();
+ if (attr.getDataType().isString()) {
+ metadata.add(attrName, attr.getStringValue());
+ } else if (attr.getDataType().isNumeric()) {
+ int value = attr.getNumericValue().intValue();
+ metadata.add(attrName, String.valueOf(value));
+ }
}
- } catch (Exception e) {
- e.printStackTrace();
- throw new TikaException(e.getMessage());
- }
+ } catch (IOException e) {
+ throw new TikaException("NetCDF parse error", e);
+ }
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
}
}