You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/02/20 20:29:43 UTC
svn commit: r1661200 [2/3] - in /tika/trunk/tika-server/src:
main/java/org/apache/tika/server/ test/java/org/apache/tika/server/
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Fri Feb 20 19:29:42 2015
@@ -75,288 +75,170 @@ import org.xml.sax.SAXException;
@Path("/tika")
public class TikaResource {
- public static final String GREETING = "This is Tika Server. Please PUT\n";
- public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
- public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
-
-
- private final Log logger = LogFactory.getLog(TikaResource.class);
-
- private TikaConfig tikaConfig;
- public TikaResource(TikaConfig tikaConfig) {
- this.tikaConfig = tikaConfig;
- }
-
- static {
- ExtractorFactory.setAllThreadsPreferEventExtractors(true);
- }
-
- @GET
- @Produces("text/plain")
- public String getMessage() {
- return GREETING;
- }
-
- @SuppressWarnings("serial")
- public static AutoDetectParser createParser(TikaConfig tikaConfig) {
- final AutoDetectParser parser = new AutoDetectParser(tikaConfig);
-
- Map<MediaType,Parser> parsers = parser.getParsers();
- parsers.put(MediaType.APPLICATION_XML, new HtmlParser());
- parser.setParsers(parsers);
-
- parser.setFallback(new Parser() {
- public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
- return parser.getSupportedTypes(parseContext);
- }
-
- public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
- throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
- }
- });
-
- return parser;
- }
-
- public static String detectFilename(MultivaluedMap<String, String> httpHeaders) {
-
- String disposition = httpHeaders.getFirst("Content-Disposition");
- if (disposition != null) {
- try {
- ContentDisposition c = new ContentDisposition(disposition);
-
- // only support "attachment" dispositions
- if ("attachment".equals(c.getDisposition())) {
- String fn = c.getParameter("filename");
- if (fn != null) {
- return fn;
- }
- }
- } catch (ParseException e) {
- // not a valid content-disposition field
- }
- }
-
- // this really should not be used, since it's not an official field
- return httpHeaders.getFirst("File-Name");
- }
-
- public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> httpHeaders) {
- TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
- PDFParserConfig pdfParserConfig = new PDFParserConfig();
- for (String key : httpHeaders.keySet()) {
- if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) {
- processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
- } else if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) {
- processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
- }
- }
- parseContext.set(TesseractOCRConfig.class, ocrConfig);
- parseContext.set(PDFParserConfig.class, pdfParserConfig);
- }
-
- /**
- * Utility method to set a property on a class via reflection.
- *
- * @param httpHeaders the HTTP headers set.
- * @param object the <code>Object</code> to set the property on.
- * @param key the key of the HTTP Header.
- * @param prefix the name of the HTTP Header prefix used to find property.
- * @throws WebApplicationException thrown when field cannot be found.
- */
- private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
- try {
- String property = StringUtils.removeStart(key, prefix);
- Field field = object.getClass().getDeclaredField(StringUtils.uncapitalize(property));
- field.setAccessible(true);
- if (field.getType() == String.class) {
- field.set(object, httpHeaders.getFirst(key));
- } else if (field.getType() == int.class) {
- field.setInt(object, Integer.parseInt(httpHeaders.getFirst(key)));
- } else if (field.getType() == double.class) {
- field.setDouble(object, Double.parseDouble(httpHeaders.getFirst(key)));
- } else if (field.getType() == boolean.class) {
- field.setBoolean(object, Boolean.parseBoolean(httpHeaders.getFirst(key)));
- }
- } catch (Throwable ex) {
- throw new WebApplicationException(String.format(Locale.ROOT,
- "%s is an invalid %s header", key, X_TIKA_OCR_HEADER_PREFIX));
- }
- }
-
- @SuppressWarnings("serial")
-public static void fillMetadata(AutoDetectParser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) {
- String fileName = detectFilename(httpHeaders);
- if (fileName != null) {
- metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
- }
-
- String contentTypeHeader = httpHeaders.getFirst(HttpHeaders.CONTENT_TYPE);
- javax.ws.rs.core.MediaType mediaType = contentTypeHeader == null ? null
- : javax.ws.rs.core.MediaType.valueOf(contentTypeHeader);
- if (mediaType!=null && "xml".equals(mediaType.getSubtype()) ) {
- mediaType = null;
- }
-
- if (mediaType !=null && mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
- mediaType = null;
- }
-
- if (mediaType !=null) {
- metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString());
-
- final Detector detector = parser.getDetector();
-
- parser.setDetector(new Detector() {
- public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
- String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
-
- if (ct!=null) {
- return MediaType.parse(ct);
- } else {
- return detector.detect(inputStream, metadata);
- }
- }
- });
+ public static final String GREETING = "This is Tika Server. Please PUT\n";
+ public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
+ public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
+
+
+ private final Log logger = LogFactory.getLog(TikaResource.class);
+
+ private TikaConfig tikaConfig;
+
+ public TikaResource(TikaConfig tikaConfig) {
+ this.tikaConfig = tikaConfig;
+ }
+
+ static {
+ ExtractorFactory.setAllThreadsPreferEventExtractors(true);
}
-
- final String password = httpHeaders.getFirst("Password");
- if (password != null) {
- context.set(PasswordProvider.class, new PasswordProvider() {
- @Override
- public String getPassword(Metadata metadata) {
- return password;
+
+ @SuppressWarnings("serial")
+ public static AutoDetectParser createParser(TikaConfig tikaConfig) {
+ final AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+
+ Map<MediaType, Parser> parsers = parser.getParsers();
+ parsers.put(MediaType.APPLICATION_XML, new HtmlParser());
+ parser.setParsers(parsers);
+
+ parser.setFallback(new Parser() {
+ public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
+ return parser.getSupportedTypes(parseContext);
+ }
+
+ public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
+ throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
}
});
+
+ return parser;
}
- }
- @POST
- @Consumes("multipart/form-data")
- @Produces("text/plain")
- @Path("form")
- public StreamingOutput getTextFromMultipart(Attachment att, @Context final UriInfo info) {
- return produceText(att.getObject(InputStream.class), att.getHeaders(), info);
- }
-
- @PUT
- @Consumes("*/*")
- @Produces("text/plain")
- public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceText(is, httpHeaders.getRequestHeaders(), info);
- }
- public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
- final AutoDetectParser parser = createParser(tikaConfig);
- final Metadata metadata = new Metadata();
- final ParseContext context = new ParseContext();
-
- fillMetadata(parser, metadata, context, httpHeaders);
- fillParseContext(context, httpHeaders);
-
- logRequest(logger, info, metadata);
-
- return new StreamingOutput() {
- public void write(OutputStream outputStream) throws IOException, WebApplicationException {
- Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8);
+ public static String detectFilename(MultivaluedMap<String, String> httpHeaders) {
- BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer));
+ String disposition = httpHeaders.getFirst("Content-Disposition");
+ if (disposition != null) {
+ try {
+ ContentDisposition c = new ContentDisposition(disposition);
+
+ // only support "attachment" dispositions
+ if ("attachment".equals(c.getDisposition())) {
+ String fn = c.getParameter("filename");
+ if (fn != null) {
+ return fn;
+ }
+ }
+ } catch (ParseException e) {
+ // not a valid content-disposition field
+ }
+ }
- TikaInputStream tis = TikaInputStream.get(is);
+ // this really should not be used, since it's not an official field
+ return httpHeaders.getFirst("File-Name");
+ }
- try {
- parse(parser, logger, info.getPath(), tis, body, metadata, context);
- } finally {
- tis.close();
+ public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> httpHeaders) {
+ TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
+ PDFParserConfig pdfParserConfig = new PDFParserConfig();
+ for (String key : httpHeaders.keySet()) {
+ if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) {
+ processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
+ } else if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) {
+ processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
+ }
}
- }
- };
- }
-
- @POST
- @Consumes("multipart/form-data")
- @Produces("text/html")
- @Path("form")
- public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final UriInfo info) {
- return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "html");
- }
-
- @PUT
- @Consumes("*/*")
- @Produces("text/html")
- public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceOutput(is, httpHeaders.getRequestHeaders(), info, "html");
- }
-
- @POST
- @Consumes("multipart/form-data")
- @Produces("text/xml")
- @Path("form")
- public StreamingOutput getXMLFromMultipart(Attachment att, @Context final UriInfo info) {
- return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "xml");
- }
-
- @PUT
- @Consumes("*/*")
- @Produces("text/xml")
- public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
- return produceOutput(is, httpHeaders.getRequestHeaders(), info, "xml");
- }
-
- private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders,
- final UriInfo info, final String format) {
- final AutoDetectParser parser = createParser(tikaConfig);
- final Metadata metadata = new Metadata();
- final ParseContext context = new ParseContext();
-
- fillMetadata(parser, metadata, context, httpHeaders);
- fillParseContext(context, httpHeaders);
-
-
- logRequest(logger, info, metadata);
-
- return new StreamingOutput() {
- public void write(OutputStream outputStream)
- throws IOException, WebApplicationException {
- Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8);
- ContentHandler content;
+ parseContext.set(TesseractOCRConfig.class, ocrConfig);
+ parseContext.set(PDFParserConfig.class, pdfParserConfig);
+ }
+ /**
+ * Utility method to set a property on a class via reflection.
+ *
+ * @param httpHeaders the HTTP headers set.
+ * @param object the <code>Object</code> to set the property on.
+ * @param key the key of the HTTP Header.
+ * @param prefix the name of the HTTP Header prefix used to find property.
+ * @throws WebApplicationException thrown when field cannot be found.
+ */
+ private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
try {
- SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance( );
- TransformerHandler handler = factory.newTransformerHandler( );
- handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format);
- handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
- handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, IOUtils.UTF_8.name());
- handler.setResult(new StreamResult(writer));
- content = new ExpandedTitleContentHandler( handler );
+ String property = StringUtils.removeStart(key, prefix);
+ Field field = object.getClass().getDeclaredField(StringUtils.uncapitalize(property));
+ field.setAccessible(true);
+ if (field.getType() == String.class) {
+ field.set(object, httpHeaders.getFirst(key));
+ } else if (field.getType() == int.class) {
+ field.setInt(object, Integer.parseInt(httpHeaders.getFirst(key)));
+ } else if (field.getType() == double.class) {
+ field.setDouble(object, Double.parseDouble(httpHeaders.getFirst(key)));
+ } else if (field.getType() == boolean.class) {
+ field.setBoolean(object, Boolean.parseBoolean(httpHeaders.getFirst(key)));
+ }
+ } catch (Throwable ex) {
+ throw new WebApplicationException(String.format(Locale.ROOT,
+ "%s is an invalid %s header", key, X_TIKA_OCR_HEADER_PREFIX));
}
- catch ( TransformerConfigurationException e ) {
- throw new WebApplicationException( e );
+ }
+
+ @SuppressWarnings("serial")
+ public static void fillMetadata(AutoDetectParser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) {
+ String fileName = detectFilename(httpHeaders);
+ if (fileName != null) {
+ metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
}
- TikaInputStream tis = TikaInputStream.get(is);
+ String contentTypeHeader = httpHeaders.getFirst(HttpHeaders.CONTENT_TYPE);
+ javax.ws.rs.core.MediaType mediaType = contentTypeHeader == null ? null
+ : javax.ws.rs.core.MediaType.valueOf(contentTypeHeader);
+ if (mediaType != null && "xml".equals(mediaType.getSubtype())) {
+ mediaType = null;
+ }
- try {
- parse(parser, logger, info.getPath(), tis, content, metadata, context);
- } finally {
- tis.close();
+ if (mediaType != null && mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
+ mediaType = null;
+ }
+
+ if (mediaType != null) {
+ metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString());
+
+ final Detector detector = parser.getDetector();
+
+ parser.setDetector(new Detector() {
+ public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
+ String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+
+ if (ct != null) {
+ return MediaType.parse(ct);
+ } else {
+ return detector.detect(inputStream, metadata);
+ }
+ }
+ });
+ }
+
+ final String password = httpHeaders.getFirst("Password");
+ if (password != null) {
+ context.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return password;
+ }
+ });
}
- }
- };
- }
+ }
public static void parse(Parser parser, Log logger, String path, InputStream inputStream,
ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException {
try {
parser.parse(inputStream, handler, metadata, parseContext);
} catch (SAXException e) {
- throw new TikaServerParseException(e);
+ throw new TikaServerParseException(e);
} catch (EncryptedDocumentException e) {
logger.warn(String.format(
- Locale.ROOT,
- "%s: Encrypted document",
- path
- ), e);
- throw new TikaServerParseException(e);
+ Locale.ROOT,
+ "%s: Encrypted document",
+ path
+ ), e);
+ throw new TikaServerParseException(e);
} catch (Exception e) {
logger.warn(String.format(
Locale.ROOT,
@@ -367,21 +249,139 @@ public static void fillMetadata(AutoDete
}
}
+ public static void logRequest(Log logger, UriInfo info, Metadata metadata) {
+ if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE) == null) {
+ logger.info(String.format(
+ Locale.ROOT,
+ "%s (autodetecting type)",
+ info.getPath()
+ ));
+ } else {
+ logger.info(String.format(
+ Locale.ROOT,
+ "%s (%s)",
+ info.getPath(),
+ metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)
+ ));
+ }
+ }
+
+ @GET
+ @Produces("text/plain")
+ public String getMessage() {
+ return GREETING;
+ }
+
+ @POST
+ @Consumes("multipart/form-data")
+ @Produces("text/plain")
+ @Path("form")
+ public StreamingOutput getTextFromMultipart(Attachment att, @Context final UriInfo info) {
+ return produceText(att.getObject(InputStream.class), att.getHeaders(), info);
+ }
+
+ @PUT
+ @Consumes("*/*")
+ @Produces("text/plain")
+ public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
+ return produceText(is, httpHeaders.getRequestHeaders(), info);
+ }
+
+ public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
+ final AutoDetectParser parser = createParser(tikaConfig);
+ final Metadata metadata = new Metadata();
+ final ParseContext context = new ParseContext();
+
+ fillMetadata(parser, metadata, context, httpHeaders);
+ fillParseContext(context, httpHeaders);
+
+ logRequest(logger, info, metadata);
- public static void logRequest(Log logger, UriInfo info, Metadata metadata) {
- if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null) {
- logger.info(String.format(
- Locale.ROOT,
- "%s (autodetecting type)",
- info.getPath()
- ));
- } else {
- logger.info(String.format(
- Locale.ROOT,
- "%s (%s)",
- info.getPath(),
- metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)
- ));
+ return new StreamingOutput() {
+ public void write(OutputStream outputStream) throws IOException, WebApplicationException {
+ Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8);
+
+ BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer));
+
+ TikaInputStream tis = TikaInputStream.get(is);
+
+ try {
+ parse(parser, logger, info.getPath(), tis, body, metadata, context);
+ } finally {
+ tis.close();
+ }
+ }
+ };
+ }
+
+ @POST
+ @Consumes("multipart/form-data")
+ @Produces("text/html")
+ @Path("form")
+ public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final UriInfo info) {
+ return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "html");
+ }
+
+ @PUT
+ @Consumes("*/*")
+ @Produces("text/html")
+ public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
+ return produceOutput(is, httpHeaders.getRequestHeaders(), info, "html");
+ }
+
+ @POST
+ @Consumes("multipart/form-data")
+ @Produces("text/xml")
+ @Path("form")
+ public StreamingOutput getXMLFromMultipart(Attachment att, @Context final UriInfo info) {
+ return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "xml");
+ }
+
+ @PUT
+ @Consumes("*/*")
+ @Produces("text/xml")
+ public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) {
+ return produceOutput(is, httpHeaders.getRequestHeaders(), info, "xml");
+ }
+
+ private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders,
+ final UriInfo info, final String format) {
+ final AutoDetectParser parser = createParser(tikaConfig);
+ final Metadata metadata = new Metadata();
+ final ParseContext context = new ParseContext();
+
+ fillMetadata(parser, metadata, context, httpHeaders);
+ fillParseContext(context, httpHeaders);
+
+
+ logRequest(logger, info, metadata);
+
+ return new StreamingOutput() {
+ public void write(OutputStream outputStream)
+ throws IOException, WebApplicationException {
+ Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8);
+ ContentHandler content;
+
+ try {
+ SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
+ TransformerHandler handler = factory.newTransformerHandler();
+ handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format);
+ handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
+ handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, IOUtils.UTF_8.name());
+ handler.setResult(new StreamResult(writer));
+ content = new ExpandedTitleContentHandler(handler);
+ } catch (TransformerConfigurationException e) {
+ throw new WebApplicationException(e);
+ }
+
+ TikaInputStream tis = TikaInputStream.get(is);
+
+ try {
+ parse(parser, logger, info.getPath(), tis, content, metadata, context);
+ } finally {
+ tis.close();
+ }
+ }
+ };
}
- }
}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Fri Feb 20 19:29:42 2015
@@ -39,111 +39,111 @@ import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
public class TikaServerCli {
- private static final Log logger = LogFactory.getLog(TikaServerCli.class);
- public static final int DEFAULT_PORT = 9998;
- public static final String DEFAULT_HOST = "localhost";
- public static final Set<String> LOG_LEVELS =
- new HashSet<String>(Arrays.asList("debug", "info"));
-
- private static Options getOptions() {
- Options options = new Options();
- options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ')');
- options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')');
- options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
- options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
- options.addOption("?", "help", false, "this help message");
-
- return options;
- }
-
- public static void main(String[] args) {
-
- logger.info("Starting " + new Tika().toString() + " server");
-
- try {
- Options options = getOptions();
-
- CommandLineParser cliParser = new GnuParser();
- CommandLine line = cliParser.parse(options, args);
-
- if (line.hasOption("help")) {
- HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("tikaserver", options);
- System.exit(-1);
- }
-
- String host = DEFAULT_HOST;
-
- if (line.hasOption("host")) {
- host = line.getOptionValue("host");
- }
-
- int port = DEFAULT_PORT;
-
- if (line.hasOption("port")) {
- port = Integer.valueOf(line.getOptionValue("port"));
- }
-
- boolean returnStackTrace = false;
- if (line.hasOption("includeStack")) {
- returnStackTrace = true;
- }
-
- TikaLoggingFilter logFilter = null;
- if (line.hasOption("log")) {
- String logLevel = line.getOptionValue("log");
- if (LOG_LEVELS.contains(logLevel)) {
- boolean isInfoLevel = "info".equals(logLevel);
- logFilter = new TikaLoggingFilter(isInfoLevel);
- } else {
- logger.info("Unsupported request URI log level: " + logLevel);
+ public static final int DEFAULT_PORT = 9998;
+ public static final String DEFAULT_HOST = "localhost";
+ public static final Set<String> LOG_LEVELS =
+ new HashSet<String>(Arrays.asList("debug", "info"));
+ private static final Log logger = LogFactory.getLog(TikaServerCli.class);
+
+ private static Options getOptions() {
+ Options options = new Options();
+ options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ')');
+ options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')');
+ options.addOption("l", "log", true, "request URI log level ('debug' or 'info')");
+ options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'");
+ options.addOption("?", "help", false, "this help message");
+
+ return options;
+ }
+
+ public static void main(String[] args) {
+
+ logger.info("Starting " + new Tika().toString() + " server");
+
+ try {
+ Options options = getOptions();
+
+ CommandLineParser cliParser = new GnuParser();
+ CommandLine line = cliParser.parse(options, args);
+
+ if (line.hasOption("help")) {
+ HelpFormatter helpFormatter = new HelpFormatter();
+ helpFormatter.printHelp("tikaserver", options);
+ System.exit(-1);
+ }
+
+ String host = DEFAULT_HOST;
+
+ if (line.hasOption("host")) {
+ host = line.getOptionValue("host");
+ }
+
+ int port = DEFAULT_PORT;
+
+ if (line.hasOption("port")) {
+ port = Integer.valueOf(line.getOptionValue("port"));
+ }
+
+ boolean returnStackTrace = false;
+ if (line.hasOption("includeStack")) {
+ returnStackTrace = true;
+ }
+
+ TikaLoggingFilter logFilter = null;
+ if (line.hasOption("log")) {
+ String logLevel = line.getOptionValue("log");
+ if (LOG_LEVELS.contains(logLevel)) {
+ boolean isInfoLevel = "info".equals(logLevel);
+ logFilter = new TikaLoggingFilter(isInfoLevel);
+ } else {
+ logger.info("Unsupported request URI log level: " + logLevel);
+ }
+ }
+ // The Tika Configuration to use throughout
+ TikaConfig tika = TikaConfig.getDefaultConfig();
+
+ JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
+
+ List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>();
+ rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new TikaParsers(tika)));
+ rCoreProviders.add(new SingletonResourceProvider(new TikaVersion(tika)));
+ List<ResourceProvider> rAllProviders = new ArrayList<ResourceProvider>(rCoreProviders);
+ rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(tika, rCoreProviders)));
+ sf.setResourceProviders(rAllProviders);
+
+ List<Object> providers = new ArrayList<Object>();
+ providers.add(new TarWriter());
+ providers.add(new ZipWriter());
+ providers.add(new CSVMessageBodyWriter());
+ providers.add(new MetadataListMessageBodyWriter());
+ providers.add(new JSONMessageBodyWriter());
+ providers.add(new XMPMessageBodyWriter());
+ providers.add(new TextMessageBodyWriter());
+ providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
+ if (logFilter != null) {
+ providers.add(logFilter);
+ }
+ sf.setProviders(providers);
+
+ sf.setAddress("http://" + host + ":" + port + "/");
+ BindingFactoryManager manager = sf.getBus().getExtension(
+ BindingFactoryManager.class);
+ JAXRSBindingFactory factory = new JAXRSBindingFactory();
+ factory.setBus(sf.getBus());
+ manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID,
+ factory);
+ sf.create();
+ logger.info("Started");
+ } catch (Exception ex) {
+ logger.fatal("Can't start", ex);
+ System.exit(-1);
}
- }
- // The Tika Configuration to use throughout
- TikaConfig tika = TikaConfig.getDefaultConfig();
-
- JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
-
- List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>();
- rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaParsers(tika)));
- rCoreProviders.add(new SingletonResourceProvider(new TikaVersion(tika)));
- List<ResourceProvider> rAllProviders = new ArrayList<ResourceProvider>(rCoreProviders);
- rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(tika, rCoreProviders)));
- sf.setResourceProviders(rAllProviders);
-
- List<Object> providers = new ArrayList<Object>();
- providers.add(new TarWriter());
- providers.add(new ZipWriter());
- providers.add(new CSVMessageBodyWriter());
- providers.add(new MetadataListMessageBodyWriter());
- providers.add(new JSONMessageBodyWriter());
- providers.add(new XMPMessageBodyWriter());
- providers.add(new TextMessageBodyWriter());
- providers.add(new TikaServerParseExceptionMapper(returnStackTrace));
- if (logFilter != null) {
- providers.add(logFilter);
- }
- sf.setProviders(providers);
-
- sf.setAddress("http://" + host + ":" + port + "/");
- BindingFactoryManager manager = sf.getBus().getExtension(
- BindingFactoryManager.class);
- JAXRSBindingFactory factory = new JAXRSBindingFactory();
- factory.setBus(sf.getBus());
- manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID,
- factory);
- sf.create();
- logger.info("Started");
- } catch (Exception ex) {
- logger.fatal("Can't start", ex);
- System.exit(-1);
}
- }
}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java Fri Feb 20 19:29:42 2015
@@ -31,6 +31,7 @@ import org.apache.tika.exception.TikaExc
@Provider
public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> {
+
private final boolean returnStack;
public TikaServerParseExceptionMapper(boolean returnStack) {
@@ -51,12 +52,12 @@ public class TikaServerParseExceptionMap
//unsupported media type
Throwable causeOfCause = cause.getCause();
if (causeOfCause instanceof WebApplicationException) {
- return ((WebApplicationException)causeOfCause).getResponse();
+ return ((WebApplicationException) causeOfCause).getResponse();
}
return buildResponse(cause, 422);
} else if (cause instanceof IllegalStateException) {
return buildResponse(cause, 422);
- } else if(cause instanceof OldWordFileFormatException) {
+ } else if (cause instanceof OldWordFileFormatException) {
return buildResponse(cause, 422);
} else if (cause instanceof WebApplicationException) {
return ((WebApplicationException) e.getCause()).getResponse();
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java Fri Feb 20 19:29:42 2015
@@ -23,16 +23,17 @@ import javax.ws.rs.Produces;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
-@Path("/version")
-public class TikaVersion {
- private Tika tika;
- public TikaVersion(TikaConfig tika) {
- this.tika = new Tika(tika);
- }
-
- @GET
- @Produces("text/plain")
- public String getVersion() {
- return tika.toString();
- }
-}
+@Path("/version")
+public class TikaVersion {
+ private Tika tika;
+
+ public TikaVersion(TikaConfig tika) {
+ this.tika = new Tika(tika);
+ }
+
+ @GET
+ @Produces("text/plain")
+ public String getVersion() {
+ return tika.toString();
+ }
+}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java Fri Feb 20 19:29:42 2015
@@ -13,12 +13,21 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
-package org.apache.tika.server;
-
-import java.lang.annotation.Annotation;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
+ */
+package org.apache.tika.server;
+
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.HEAD;
+import javax.ws.rs.OPTIONS;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
@@ -26,83 +35,74 @@ import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.ws.rs.DELETE;
-import javax.ws.rs.GET;
-import javax.ws.rs.HEAD;
-import javax.ws.rs.OPTIONS;
-import javax.ws.rs.POST;
-import javax.ws.rs.PUT;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-
-import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import sun.misc.Regexp;
-
-/**
- * <p>Provides a basic welcome to the Apache Tika Server.</p>
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.cxf.jaxrs.lifecycle.ResourceProvider;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+
+/**
+ * <p>Provides a basic welcome to the Apache Tika Server.</p>
*/
-@Path("/")
-public class TikaWelcome {
- private static final String DOCS_URL = "https://wiki.apache.org/tika/TikaJAXRS";
-
- private static final Map<Class<? extends Annotation>, String> HTTP_METHODS =
- new HashMap<Class<? extends Annotation>, String>();
- static {
- HTTP_METHODS.put(DELETE.class , "DELETE");
- HTTP_METHODS.put(GET.class, "GET");
- HTTP_METHODS.put(HEAD.class, "HEAD");
- HTTP_METHODS.put(OPTIONS.class, "OPTIONS");
- HTTP_METHODS.put(POST.class, "POST");
- HTTP_METHODS.put(PUT.class, "PUT");
- }
-
- private Tika tika;
- private HTMLHelper html;
- private List<Class<?>> endpoints = new LinkedList<Class<?>>();
-
- public TikaWelcome(TikaConfig tika, List<ResourceProvider> rCoreProviders) {
- this.tika = new Tika(tika);
- this.html = new HTMLHelper();
+@Path("/")
+public class TikaWelcome {
+ private static final String DOCS_URL = "https://wiki.apache.org/tika/TikaJAXRS";
+
+ private static final Map<Class<? extends Annotation>, String> HTTP_METHODS =
+ new HashMap<Class<? extends Annotation>, String>();
+
+ static {
+ HTTP_METHODS.put(DELETE.class, "DELETE");
+ HTTP_METHODS.put(GET.class, "GET");
+ HTTP_METHODS.put(HEAD.class, "HEAD");
+ HTTP_METHODS.put(OPTIONS.class, "OPTIONS");
+ HTTP_METHODS.put(POST.class, "POST");
+ HTTP_METHODS.put(PUT.class, "PUT");
+ }
+
+ private Tika tika;
+ private HTMLHelper html;
+ private List<Class<?>> endpoints = new LinkedList<Class<?>>();
+
+ public TikaWelcome(TikaConfig tika, List<ResourceProvider> rCoreProviders) {
+ this.tika = new Tika(tika);
+ this.html = new HTMLHelper();
for (ResourceProvider rp : rCoreProviders) {
- this.endpoints.add(rp.getResourceClass());
- }
- }
-
- protected List<Endpoint> identifyEndpoints() {
- List<Endpoint> found = new ArrayList<Endpoint>();
- for (Class<?> endpoint : endpoints) {
+ this.endpoints.add(rp.getResourceClass());
+ }
+ }
+
+ protected List<Endpoint> identifyEndpoints() {
+ List<Endpoint> found = new ArrayList<Endpoint>();
+ for (Class<?> endpoint : endpoints) {
Path p = endpoint.getAnnotation(Path.class);
String basePath = null;
if (p != null)
basePath = p.value();
for (Method m : endpoint.getMethods()) {
- String httpMethod = null;
- String methodPath = null;
- String[] produces = null;
-
- for (Annotation a : m.getAnnotations()) {
- for (Class<? extends Annotation> httpMethAnn : HTTP_METHODS.keySet()) {
- if (httpMethAnn.isInstance(a)) {
+ String httpMethod = null;
+ String methodPath = null;
+ String[] produces = null;
+
+ for (Annotation a : m.getAnnotations()) {
+ for (Class<? extends Annotation> httpMethAnn : HTTP_METHODS.keySet()) {
+ if (httpMethAnn.isInstance(a)) {
httpMethod = HTTP_METHODS.get(httpMethAnn);
- }
- }
- if (a instanceof Path) {
- methodPath = ((Path)a).value();
- }
- if (a instanceof Produces) {
- produces = ((Produces)a).value();
- }
- }
-
- if (httpMethod != null) {
- String mPath = basePath;
- if (mPath == null) {
+ }
+ }
+ if (a instanceof Path) {
+ methodPath = ((Path) a).value();
+ }
+ if (a instanceof Produces) {
+ produces = ((Produces) a).value();
+ }
+ }
+
+ if (httpMethod != null) {
+ String mPath = basePath;
+ if (mPath == null) {
mPath = "";
}
if (methodPath != null) {
@@ -124,21 +124,21 @@ public class TikaWelcome {
}
return res;
}
- });
- return found;
- }
-
- @GET
- @Produces("text/html")
- public String getWelcomeHTML() {
+ });
+ return found;
+ }
+
+ @GET
+ @Produces("text/html")
+ public String getWelcomeHTML() {
StringBuffer h = new StringBuffer();
- String tikaVersion = tika.toString();
-
- html.generateHeader(h, "Welcome to the " + tikaVersion + " Server");
-
- h.append("<p>For endpoints, please see <a href=\"");
- h.append(DOCS_URL);
- h.append("\">");
+ String tikaVersion = tika.toString();
+
+ html.generateHeader(h, "Welcome to the " + tikaVersion + " Server");
+
+ h.append("<p>For endpoints, please see <a href=\"");
+ h.append(DOCS_URL);
+ h.append("\">");
h.append(DOCS_URL);
h.append("</a>");
@@ -184,19 +184,19 @@ public class TikaWelcome {
}
@GET
- @Produces("text/plain")
- public String getWelcomePlain() {
- StringBuffer text = new StringBuffer();
-
- text.append(tika.toString());
- text.append("\n");
- text.append("For endpoints, please see ");
- text.append(DOCS_URL);
- text.append("\n\n");
-
- for (Endpoint e : identifyEndpoints()) {
- text.append(e.httpMethod);
- text.append(" ");
+ @Produces("text/plain")
+ public String getWelcomePlain() {
+ StringBuffer text = new StringBuffer();
+
+ text.append(tika.toString());
+ text.append("\n");
+ text.append("For endpoints, please see ");
+ text.append(DOCS_URL);
+ text.append("\n\n");
+
+ for (Endpoint e : identifyEndpoints()) {
+ text.append(e.httpMethod);
+ text.append(" ");
text.append(e.path);
text.append("\n");
for (String produces : e.produces) {
@@ -205,19 +205,20 @@ public class TikaWelcome {
text.append("\n");
}
}
-
- return text.toString();
- }
-
- protected class Endpoint {
- public final String className;
- public final String methodName;
- public final String path;
- public final String httpMethod;
- public final List<String> produces;
- protected Endpoint(Class<?> endpoint, Method method, String path,
- String httpMethod, String[] produces) {
- this.className = endpoint.getCanonicalName();
+
+ return text.toString();
+ }
+
+ protected class Endpoint {
+ public final String className;
+ public final String methodName;
+ public final String path;
+ public final String httpMethod;
+ public final List<String> produces;
+
+ protected Endpoint(Class<?> endpoint, Method method, String path,
+ String httpMethod, String[] produces) {
+ this.className = endpoint.getCanonicalName();
this.methodName = method.getName();
this.path = path;
this.httpMethod = httpMethod;
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java Fri Feb 20 19:29:42 2015
@@ -65,199 +65,200 @@ import org.xml.sax.helpers.DefaultHandle
@Path("/unpack")
public class UnpackerResource {
- private static final Log logger = LogFactory.getLog(UnpackerResource.class);
- public static final String TEXT_FILENAME = "__TEXT__";
- private static final String META_FILENAME = "__METADATA__";
+ public static final String TEXT_FILENAME = "__TEXT__";
+ private static final Log logger = LogFactory.getLog(UnpackerResource.class);
+ private static final String META_FILENAME = "__METADATA__";
- private TikaConfig tikaConfig;
- public UnpackerResource(TikaConfig tikaConfig) {
- this.tikaConfig = tikaConfig;
- }
+ private TikaConfig tikaConfig;
- @Path("/{id:(/.*)?}")
- @PUT
- @Produces({"application/zip", "application/x-tar"})
- public Map<String, byte[]> unpack(
- InputStream is,
- @Context HttpHeaders httpHeaders,
- @Context UriInfo info
- ) throws Exception {
- return process(is, httpHeaders, info, false);
- }
-
- @Path("/all{id:(/.*)?}")
- @PUT
- @Produces({"application/zip", "application/x-tar"})
- public Map<String, byte[]> unpackAll(
- InputStream is,
- @Context HttpHeaders httpHeaders,
- @Context UriInfo info
- ) throws Exception {
- return process(is, httpHeaders, info, true);
- }
-
- private Map<String, byte[]> process(
- InputStream is,
- @Context HttpHeaders httpHeaders,
- @Context UriInfo info,
- boolean saveAll
- ) throws Exception {
- Metadata metadata = new Metadata();
- ParseContext pc = new ParseContext();
-
- AutoDetectParser parser = TikaResource.createParser(tikaConfig);
-
- TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
- TikaResource.logRequest(logger, info, metadata);
-
- ContentHandler ch;
- ByteArrayOutputStream text = new ByteArrayOutputStream();
-
- if (saveAll) {
- ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, org.apache.tika.io.IOUtils.UTF_8)));
- } else {
- ch = new DefaultHandler();
+ public UnpackerResource(TikaConfig tikaConfig) {
+ this.tikaConfig = tikaConfig;
}
- Map<String, byte[]> files = new HashMap<String, byte[]>();
- MutableInt count = new MutableInt();
+ public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException {
+ CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, org.apache.tika.io.IOUtils.UTF_8));
- pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
- TikaResource.parse(parser, logger, info.getPath(), is, ch, metadata, pc);
+ for (String name : metadata.names()) {
+ String[] values = metadata.getValues(name);
+ ArrayList<String> list = new ArrayList<String>(values.length + 1);
+ list.add(name);
+ list.addAll(Arrays.asList(values));
+ writer.writeNext(list.toArray(values));
+ }
- if (count.intValue() == 0 && !saveAll) {
- throw new WebApplicationException(Response.Status.NO_CONTENT);
+ writer.close();
}
- if (saveAll) {
- files.put(TEXT_FILENAME, text.toByteArray());
+ @Path("/{id:(/.*)?}")
+ @PUT
+ @Produces({"application/zip", "application/x-tar"})
+ public Map<String, byte[]> unpack(
+ InputStream is,
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info
+ ) throws Exception {
+ return process(is, httpHeaders, info, false);
+ }
+
+ @Path("/all{id:(/.*)?}")
+ @PUT
+ @Produces({"application/zip", "application/x-tar"})
+ public Map<String, byte[]> unpackAll(
+ InputStream is,
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info
+ ) throws Exception {
+ return process(is, httpHeaders, info, true);
+ }
+
+ private Map<String, byte[]> process(
+ InputStream is,
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info,
+ boolean saveAll
+ ) throws Exception {
+ Metadata metadata = new Metadata();
+ ParseContext pc = new ParseContext();
+
+ AutoDetectParser parser = TikaResource.createParser(tikaConfig);
- ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
- metadataToCsv(metadata, metaStream);
+ TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
+ TikaResource.logRequest(logger, info, metadata);
- files.put(META_FILENAME, metaStream.toByteArray());
- }
-
- return files;
- }
+ ContentHandler ch;
+ ByteArrayOutputStream text = new ByteArrayOutputStream();
- public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException {
- CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, org.apache.tika.io.IOUtils.UTF_8));
-
- for (String name : metadata.names()) {
- String[] values = metadata.getValues(name);
- ArrayList<String> list = new ArrayList<String>(values.length+1);
- list.add(name);
- list.addAll(Arrays.asList(values));
- writer.writeNext(list.toArray(values));
- }
+ if (saveAll) {
+ ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, org.apache.tika.io.IOUtils.UTF_8)));
+ } else {
+ ch = new DefaultHandler();
+ }
- writer.close();
- }
+ Map<String, byte[]> files = new HashMap<String, byte[]>();
+ MutableInt count = new MutableInt();
- private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
- private final MutableInt count;
- private final Map<String, byte[]> zout;
+ pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
+ TikaResource.parse(parser, logger, info.getPath(), is, ch, metadata, pc);
- MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
- this.count = count;
- this.zout = zout;
- }
+ if (count.intValue() == 0 && !saveAll) {
+ throw new WebApplicationException(Response.Status.NO_CONTENT);
+ }
- public boolean shouldParseEmbedded(Metadata metadata) {
- return true;
- }
+ if (saveAll) {
+ files.put(TEXT_FILENAME, text.toByteArray());
- public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b) throws SAXException, IOException {
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(inputStream, bos);
- byte[] data = bos.toByteArray();
+ ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
+ metadataToCsv(metadata, metaStream);
- String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
- String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+ files.put(META_FILENAME, metaStream.toByteArray());
+ }
- if (name == null) {
- name = Integer.toString(count.intValue());
- }
+ return files;
+ }
- if (!name.contains(".") && contentType!=null) {
- try {
- String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension();
+ private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
+ private final MutableInt count;
+ private final Map<String, byte[]> zout;
- if (ext!=null) {
- name += ext;
- }
- } catch (MimeTypeException e) {
- logger.warn("Unexpected MimeTypeException", e);
+ MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
+ this.count = count;
+ this.zout = zout;
}
- }
- if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
- POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
- OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
-
- if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
- try {
- Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
- if (ole.getDataSize()>0) {
- String label = ole.getLabel();
+ public boolean shouldParseEmbedded(Metadata metadata) {
+ return true;
+ }
- if (label.startsWith("ole-")) {
- label = Integer.toString(count.intValue()) + '-' + label;
- }
+ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b) throws SAXException, IOException {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(inputStream, bos);
+ byte[] data = bos.toByteArray();
- name = label;
+ String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
+ String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
- data = ole.getDataBuffer();
+ if (name == null) {
+ name = Integer.toString(count.intValue());
}
- } catch (Ole10NativeException ex) {
- logger.warn("Skipping invalid part", ex);
- }
- } else {
- name += '.' + type.getExtension();
- }
- }
- final String finalName = name;
+ if (!name.contains(".") && contentType != null) {
+ try {
+ String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension();
+
+ if (ext != null) {
+ name += ext;
+ }
+ } catch (MimeTypeException e) {
+ logger.warn("Unexpected MimeTypeException", e);
+ }
+ }
- if (data.length > 0) {
- zout.put(finalName, data);
+ if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
+ POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
+ OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
+
+ if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
+ try {
+ Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
+ if (ole.getDataSize() > 0) {
+ String label = ole.getLabel();
+
+ if (label.startsWith("ole-")) {
+ label = Integer.toString(count.intValue()) + '-' + label;
+ }
+
+ name = label;
+
+ data = ole.getDataBuffer();
+ }
+ } catch (Ole10NativeException ex) {
+ logger.warn("Skipping invalid part", ex);
+ }
+ } else {
+ name += '.' + type.getExtension();
+ }
+ }
- count.increment();
- } else {
- if (inputStream instanceof TikaInputStream) {
- TikaInputStream tin = (TikaInputStream) inputStream;
+ final String finalName = name;
- if (tin.getOpenContainer()!=null && tin.getOpenContainer() instanceof DirectoryEntry) {
- POIFSFileSystem fs = new POIFSFileSystem();
- copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
- ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
- fs.writeFilesystem(bos2);
- bos2.close();
+ if (data.length > 0) {
+ zout.put(finalName, data);
- zout.put(finalName, bos2.toByteArray());
- }
+ count.increment();
+ } else {
+ if (inputStream instanceof TikaInputStream) {
+ TikaInputStream tin = (TikaInputStream) inputStream;
+
+ if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
+ POIFSFileSystem fs = new POIFSFileSystem();
+ copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
+ ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+ fs.writeFilesystem(bos2);
+ bos2.close();
+
+ zout.put(finalName, bos2.toByteArray());
+ }
+ }
+ }
}
- }
- }
- protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
- throws IOException {
- for (Entry entry : sourceDir) {
- if (entry instanceof DirectoryEntry) {
- // Need to recurse
- DirectoryEntry newDir = destDir.createDirectory(entry.getName());
- copy((DirectoryEntry) entry, newDir);
- } else {
- // Copy entry
- InputStream contents = new DocumentInputStream((DocumentEntry) entry);
- try {
- destDir.createDocument(entry.getName(), contents);
- } finally {
- contents.close();
- }
+ protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+ throws IOException {
+ for (Entry entry : sourceDir) {
+ if (entry instanceof DirectoryEntry) {
+ // Need to recurse
+ DirectoryEntry newDir = destDir.createDirectory(entry.getName());
+ copy((DirectoryEntry) entry, newDir);
+ } else {
+ // Copy entry
+ InputStream contents = new DocumentInputStream((DocumentEntry) entry);
+ try {
+ destDir.createDocument(entry.getName(), contents);
+ } finally {
+ contents.close();
+ }
+ }
+ }
}
- }
}
- }
}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java Fri Feb 20 19:29:42 2015
@@ -49,19 +49,19 @@ public class XMPMessageBodyWriter implem
public long getSize(Metadata data, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
return -1;
}
-
- @Override
- public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
- MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
- WebApplicationException {
- try {
- Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8);
- XMPMetadata xmp = new XMPMetadata(metadata);
- writer.write(xmp.toString());
- writer.flush();
- } catch (TikaException e) {
- throw new IOException(e);
- }
- entityStream.flush();
- }
-}
+
+ @Override
+ public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
+ MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
+ WebApplicationException {
+ try {
+ Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8);
+ XMPMetadata xmp = new XMPMetadata(metadata);
+ writer.write(xmp.toString());
+ writer.flush();
+ } catch (TikaException e) {
+ throw new IOException(e);
+ }
+ entityStream.flush();
+ }
+}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java Fri Feb 20 19:29:42 2015
@@ -14,72 +14,73 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-package org.apache.tika.server;
-
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
-
-import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.MediaType;
-import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.ext.MessageBodyWriter;
-import javax.ws.rs.ext.Provider;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.lang.annotation.Annotation;
+
+package org.apache.tika.server;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
import java.util.Map;
import java.util.UUID;
import java.util.zip.CRC32;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipException;
-import java.util.zip.ZipOutputStream;
-
-@Provider
-@Produces("application/zip")
-public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> {
- private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException {
- ZipEntry zipEntry = new ZipEntry(name!=null?name: UUID.randomUUID().toString());
- zipEntry.setMethod(ZipOutputStream.STORED);
-
- zipEntry.setSize(dataBuffer.length);
- CRC32 crc32 = new CRC32();
- crc32.update(dataBuffer);
- zipEntry.setCrc(crc32.getValue());
-
- try {
- zip.putArchiveEntry(new ZipArchiveEntry(zipEntry));
- } catch (ZipException ex) {
- if (name!=null) {
- zipStoreBuffer(zip, "x-"+name, dataBuffer);
- return;
- }
- }
-
- zip.write(dataBuffer);
-
- zip.closeArchiveEntry();
- }
-
- public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
- return Map.class.isAssignableFrom(type);
- }
-
- public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
- return -1;
- }
-
- public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
- ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream);
-
- zip.setMethod(ZipArchiveOutputStream.STORED);
-
- for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
- zipStoreBuffer(zip, entry.getKey(), entry.getValue());
- }
-
- zip.close();
- }
-}
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+
+@Provider
+@Produces("application/zip")
+public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> {
+ private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException {
+ ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString());
+ zipEntry.setMethod(ZipOutputStream.STORED);
+
+ zipEntry.setSize(dataBuffer.length);
+ CRC32 crc32 = new CRC32();
+ crc32.update(dataBuffer);
+ zipEntry.setCrc(crc32.getValue());
+
+ try {
+ zip.putArchiveEntry(new ZipArchiveEntry(zipEntry));
+ } catch (ZipException ex) {
+ if (name != null) {
+ zipStoreBuffer(zip, "x-" + name, dataBuffer);
+ return;
+ }
+ }
+
+ zip.write(dataBuffer);
+
+ zip.closeArchiveEntry();
+ }
+
+ public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
+ return Map.class.isAssignableFrom(type);
+ }
+
+ public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
+ return -1;
+ }
+
+ public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
+ ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream);
+
+ zip.setMethod(ZipArchiveOutputStream.STORED);
+
+ for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
+ zipStoreBuffer(zip, entry.getKey(), entry.getValue());
+ }
+
+ zip.close();
+ }
+}
Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java Fri Feb 20 19:29:42 2015
@@ -42,115 +42,21 @@ import org.apache.cxf.jaxrs.JAXRSServerF
import org.apache.tika.config.TikaConfig;
import org.apache.tika.io.IOUtils;
import org.junit.After;
-import org.junit.Before;
-
-public abstract class CXFTestBase {
- protected static final String endPoint =
- "http://localhost:" + TikaServerCli.DEFAULT_PORT;
- protected Server server;
- protected TikaConfig tika;
-
- @Before
- public void setUp() {
- this.tika = TikaConfig.getDefaultConfig();
-
- JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
- setUpResources(sf);
- setUpProviders(sf);
- sf.setAddress(endPoint + "/");
-
- BindingFactoryManager manager = sf.getBus().getExtension(
- BindingFactoryManager.class
- );
-
- JAXRSBindingFactory factory = new JAXRSBindingFactory();
- factory.setBus(sf.getBus());
-
- manager.registerBindingFactory(
- JAXRSBindingFactory.JAXRS_BINDING_ID,
- factory
- );
-
- server = sf.create();
- }
-
- /**
- * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)}
- * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)}
- */
- protected abstract void setUpResources(JAXRSServerFactoryBean sf);
- /**
- * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed
- */
- protected abstract void setUpProviders(JAXRSServerFactoryBean sf);
-
- @After
- public void tearDown() throws Exception {
- server.stop();
- server.destroy();
- }
-
- public static void assertContains(String needle, String haystack) {
- assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
- }
- public static void assertNotFound(String needle, String haystack) {
- assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
- }
-
- protected String getStringFromInputStream(InputStream in) throws Exception {
- return IOUtils.toString(in);
- }
-
- protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException {
- Map<String, String> data = new HashMap<String, String>();
- File tempFile = writeTemporaryArchiveFile(inputStream, "zip");
- ZipFile zip = new ZipFile(tempFile);
- Enumeration<ZipArchiveEntry> entries = zip.getEntries();
- while (entries.hasMoreElements()) {
- ZipArchiveEntry entry = entries.nextElement();
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(zip.getInputStream(entry), bos);
- data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray()));
- }
-
- zip.close();
- tempFile.delete();
- return data;
- }
-
- protected String readArchiveText(InputStream inputStream) throws IOException {
- File tempFile = writeTemporaryArchiveFile(inputStream, "zip");
- ZipFile zip = new ZipFile(tempFile);
- zip.getEntry(UnpackerResource.TEXT_FILENAME);
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos);
-
- zip.close();
- tempFile.delete();
- return bos.toString(IOUtils.UTF_8.name());
- }
-
- protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException {
- Map<String, String> data = new HashMap<String, String>();
- while (true) {
- ArchiveEntry entry = zip.getNextEntry();
- if (entry == null) {
- break;
- }
-
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(zip, bos);
- data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray()));
- }
-
- return data;
- }
-
- private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException {
- File tempFile = File.createTempFile("tmp-", "." + archiveType);
- IOUtils.copy(inputStream, new FileOutputStream(tempFile));
- return tempFile;
- }
+import org.junit.Before;
+
+public abstract class CXFTestBase {
+ protected static final String endPoint =
+ "http://localhost:" + TikaServerCli.DEFAULT_PORT;
+ protected Server server;
+ protected TikaConfig tika;
+
+ public static void assertContains(String needle, String haystack) {
+ assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle));
+ }
+
+ public static void assertNotFound(String needle, String haystack) {
+ assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle));
+ }
protected static InputStream copy(InputStream in, int remaining) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
@@ -166,4 +72,100 @@ public abstract class CXFTestBase {
return new ByteArrayInputStream(out.toByteArray());
}
+ @Before
+ public void setUp() {
+ this.tika = TikaConfig.getDefaultConfig();
+
+ JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
+ setUpResources(sf);
+ setUpProviders(sf);
+ sf.setAddress(endPoint + "/");
+
+ BindingFactoryManager manager = sf.getBus().getExtension(
+ BindingFactoryManager.class
+ );
+
+ JAXRSBindingFactory factory = new JAXRSBindingFactory();
+ factory.setBus(sf.getBus());
+
+ manager.registerBindingFactory(
+ JAXRSBindingFactory.JAXRS_BINDING_ID,
+ factory
+ );
+
+ server = sf.create();
+ }
+
+ /**
+ * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)}
+ * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)}
+ */
+ protected abstract void setUpResources(JAXRSServerFactoryBean sf);
+
+ /**
+ * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed
+ */
+ protected abstract void setUpProviders(JAXRSServerFactoryBean sf);
+
+ @After
+ public void tearDown() throws Exception {
+ server.stop();
+ server.destroy();
+ }
+
+ protected String getStringFromInputStream(InputStream in) throws Exception {
+ return IOUtils.toString(in);
+ }
+
+ protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException {
+ Map<String, String> data = new HashMap<String, String>();
+ File tempFile = writeTemporaryArchiveFile(inputStream, "zip");
+ ZipFile zip = new ZipFile(tempFile);
+ Enumeration<ZipArchiveEntry> entries = zip.getEntries();
+ while (entries.hasMoreElements()) {
+ ZipArchiveEntry entry = entries.nextElement();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(zip.getInputStream(entry), bos);
+ data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray()));
+ }
+
+ zip.close();
+ tempFile.delete();
+ return data;
+ }
+
+ protected String readArchiveText(InputStream inputStream) throws IOException {
+ File tempFile = writeTemporaryArchiveFile(inputStream, "zip");
+ ZipFile zip = new ZipFile(tempFile);
+ zip.getEntry(UnpackerResource.TEXT_FILENAME);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos);
+
+ zip.close();
+ tempFile.delete();
+ return bos.toString(IOUtils.UTF_8.name());
+ }
+
+ protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException {
+ Map<String, String> data = new HashMap<String, String>();
+ while (true) {
+ ArchiveEntry entry = zip.getNextEntry();
+ if (entry == null) {
+ break;
+ }
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(zip, bos);
+ data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray()));
+ }
+
+ return data;
+ }
+
+ private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException {
+ File tempFile = File.createTempFile("tmp-", "." + archiveType);
+ IOUtils.copy(inputStream, new FileOutputStream(tempFile));
+ return tempFile;
+ }
+
}
Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java?rev=1661200&r1=1661199&r2=1661200&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java (original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java Fri Feb 20 19:29:42 2015
@@ -17,88 +17,88 @@
package org.apache.tika.server;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import javax.ws.rs.core.Response;
-
-import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
-import org.apache.cxf.jaxrs.client.WebClient;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import javax.ws.rs.core.Response;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.junit.Test;
-
-public class DetectorResourceTest extends CXFTestBase {
-
- private static final String DETECT_PATH = "/detect";
- private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream";
- private static final String FOO_CSV = "foo.csv";
- private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02";
-
- @Override
- protected void setUpResources(JAXRSServerFactoryBean sf) {
- sf.setResourceClasses(DetectorResource.class);
- sf.setResourceProvider(DetectorResource.class,
- new SingletonResourceProvider(new DetectorResource(tika)));
-
- }
-
- @Override
- protected void setUpProviders(JAXRSServerFactoryBean sf) {
- List<Object> providers = new ArrayList<Object>();
- providers.add(new TarWriter());
- providers.add(new ZipWriter());
- providers.add(new TikaServerParseExceptionMapper(false));
- sf.setProviders(providers);
- }
-
- @Test
- public void testDetectCsvWithExt() throws Exception {
- String url = endPoint + DETECT_STREAM_PATH;
- Response response = WebClient
- .create(endPoint + DETECT_STREAM_PATH)
- .type("text/csv")
- .accept("*/*")
- .header("Content-Disposition",
- "attachment; filename=" + FOO_CSV)
- .put(ClassLoader.getSystemResourceAsStream(FOO_CSV));
- assertNotNull(response);
- String readMime = getStringFromInputStream((InputStream) response
- .getEntity());
- assertEquals("text/csv", readMime);
-
- }
-
- @Test
- public void testDetectCsvNoExt() throws Exception {
- String url = endPoint + DETECT_STREAM_PATH;
- Response response = WebClient
- .create(endPoint + DETECT_STREAM_PATH)
- .type("text/csv")
- .accept("*/*")
- .header("Content-Disposition",
- "attachment; filename=" + CDEC_CSV_NO_EXT)
- .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
- assertNotNull(response);
- String readMime = getStringFromInputStream((InputStream) response
- .getEntity());
- assertEquals("text/plain", readMime);
-
- // now trick it by adding .csv to the end
- response = WebClient
- .create(endPoint + DETECT_STREAM_PATH)
- .type("text/csv")
- .accept("*/*")
- .header("Content-Disposition",
- "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv")
- .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
- assertNotNull(response);
- readMime = getStringFromInputStream((InputStream) response.getEntity());
- assertEquals("text/csv", readMime);
-
- }
-}
+public class DetectorResourceTest extends CXFTestBase {
+
+ private static final String DETECT_PATH = "/detect";
+ private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream";
+ private static final String FOO_CSV = "foo.csv";
+ private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02";
+
+ @Override
+ protected void setUpResources(JAXRSServerFactoryBean sf) {
+ sf.setResourceClasses(DetectorResource.class);
+ sf.setResourceProvider(DetectorResource.class,
+ new SingletonResourceProvider(new DetectorResource(tika)));
+
+ }
+
+ @Override
+ protected void setUpProviders(JAXRSServerFactoryBean sf) {
+ List<Object> providers = new ArrayList<Object>();
+ providers.add(new TarWriter());
+ providers.add(new ZipWriter());
+ providers.add(new TikaServerParseExceptionMapper(false));
+ sf.setProviders(providers);
+
+ }
+
+ @Test
+ public void testDetectCsvWithExt() throws Exception {
+ String url = endPoint + DETECT_STREAM_PATH;
+ Response response = WebClient
+ .create(endPoint + DETECT_STREAM_PATH)
+ .type("text/csv")
+ .accept("*/*")
+ .header("Content-Disposition",
+ "attachment; filename=" + FOO_CSV)
+ .put(ClassLoader.getSystemResourceAsStream(FOO_CSV));
+ assertNotNull(response);
+ String readMime = getStringFromInputStream((InputStream) response
+ .getEntity());
+ assertEquals("text/csv", readMime);
+
+ }
+
+ @Test
+ public void testDetectCsvNoExt() throws Exception {
+ String url = endPoint + DETECT_STREAM_PATH;
+ Response response = WebClient
+ .create(endPoint + DETECT_STREAM_PATH)
+ .type("text/csv")
+ .accept("*/*")
+ .header("Content-Disposition",
+ "attachment; filename=" + CDEC_CSV_NO_EXT)
+ .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
+ assertNotNull(response);
+ String readMime = getStringFromInputStream((InputStream) response
+ .getEntity());
+ assertEquals("text/plain", readMime);
+
+ // now trick it by adding .csv to the end
+ response = WebClient
+ .create(endPoint + DETECT_STREAM_PATH)
+ .type("text/csv")
+ .accept("*/*")
+ .header("Content-Disposition",
+ "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv")
+ .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT));
+ assertNotNull(response);
+ readMime = getStringFromInputStream((InputStream) response.getEntity());
+ assertEquals("text/csv", readMime);
+
+ }
+}