You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ti...@apache.org on 2022/05/29 10:12:56 UTC
[tika] branch branch_1x updated: TIKA-3780: update opencsv

This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 5585fcafa TIKA-3780: update opencsv
5585fcafa is described below

commit 5585fcafaac49e5efee97ed8f3bd0591a87051ba
Author: Tilman Hausherr <ti...@snafu.de>
AuthorDate: Sun May 29 12:12:38 2022 +0200

    TIKA-3780: update opencsv
---
 tika-server/pom.xml                                |  16 +-
 .../tika/server/resource/UnpackerResource.java     | 598 ++++++++++-----------
 .../tika/server/writer/CSVMessageBodyWriter.java   |   2 +-
 .../apache/tika/server/MetadataResourceTest.java   |   2 +-
 4 files changed, 315 insertions(+), 303 deletions(-)

diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index 56862f962..f7ab2cb1e 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -68,9 +68,21 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>net.sf.opencsv</groupId>
+      <groupId>com.opencsv</groupId>
       <artifactId>opencsv</artifactId>
-      <version>2.3</version>
+      <version>5.6</version>
+    </dependency>
+    <!-- avoid org.apache.commons:commons-text:1.9 dependent on org.apache.commons:commons-lang3:3.11 -->
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+      <version>1.9</version>
+      <exclusions>
+          <exclusion>
+              <groupId>org.apache.commons</groupId>
+              <artifactId>commons-lang3</artifactId>
+          </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.cxf</groupId>
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index 76acde946..269508713 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -1,299 +1,299 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.server.resource;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import javax.ws.rs.PUT;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.Context;
-import javax.ws.rs.core.HttpHeaders;
-import javax.ws.rs.core.Response;
-import javax.ws.rs.core.UriInfo;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-import au.com.bytecode.opencsv.CSVWriter;
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
-import org.apache.tika.exception.TikaMemoryLimitException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.io.BoundedInputStream;
-import org.apache.tika.io.IOExceptionWithCause;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MimeTypeException;
-import org.apache.tika.parser.DigestingParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.microsoft.OfficeParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.RichTextContentHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-@Path("/unpack")
-public class UnpackerResource {
-    private static final long MAX_ATTACHMENT_BYTES = 100*1024*1024;
-
-    public static final String TEXT_FILENAME = "__TEXT__";
-    private static final String META_FILENAME = "__METADATA__";
-
-    private static final Logger LOG = LoggerFactory.getLogger(UnpackerResource.class);
-
-    public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException {
-        CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, UTF_8));
-
-        for (String name : metadata.names()) {
-            String[] values = metadata.getValues(name);
-            ArrayList<String> list = new ArrayList<>(values.length + 1);
-            list.add(name);
-            list.addAll(Arrays.asList(values));
-            writer.writeNext(list.toArray(values));
-        }
-
-        writer.close();
-    }
-
-    @Path("/{id:(/.*)?}")
-    @PUT
-    @Produces({"application/zip", "application/x-tar"})
-    public Map<String, byte[]> unpack(
-            InputStream is,
-            @Context HttpHeaders httpHeaders,
-            @Context UriInfo info
-    ) throws Exception {
-        return process(TikaResource.getInputStream(is, new Metadata(), httpHeaders), httpHeaders, info, false);
-    }
-
-    @Path("/all{id:(/.*)?}")
-    @PUT
-    @Produces({"application/zip", "application/x-tar"})
-    public Map<String, byte[]> unpackAll(
-            InputStream is,
-            @Context HttpHeaders httpHeaders,
-            @Context UriInfo info
-    ) throws Exception {
-        return process(TikaResource.getInputStream(is, new Metadata(), httpHeaders), httpHeaders, info, true);
-    }
-
-    private Map<String, byte[]> process(
-            InputStream is,
-            @Context HttpHeaders httpHeaders,
-            @Context UriInfo info,
-            boolean saveAll
-    ) throws Exception {
-        Metadata metadata = new Metadata();
-        ParseContext pc = new ParseContext();
-
-        Parser parser = TikaResource.createParser();
-        if (parser instanceof DigestingParser) {
-            //no need to digest for unwrapping
-            parser = ((DigestingParser)parser).getWrappedParser();
-        }
-        TikaResource.fillParseContext(pc, httpHeaders.getRequestHeaders(), null);
-        TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
-        TikaResource.logRequest(LOG, info, metadata);
-        //even though we aren't currently parsing embedded documents,
-        //we need to add this to allow for "inline" use of other parsers.
-        pc.set(Parser.class, parser);
-        ContentHandler ch;
-        ByteArrayOutputStream text = new ByteArrayOutputStream();
-
-        if (saveAll) {
-            ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, UTF_8)));
-        } else {
-            ch = new DefaultHandler();
-        }
-
-        Map<String, byte[]> files = new HashMap<>();
-        MutableInt count = new MutableInt();
-
-        pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
-        TikaResource.parse(parser, LOG, info.getPath(), is, ch, metadata, pc);
-
-        if (count.intValue() == 0 && !saveAll) {
-            throw new WebApplicationException(Response.Status.NO_CONTENT);
-        }
-
-        if (saveAll) {
-            files.put(TEXT_FILENAME, text.toByteArray());
-
-            ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
-            metadataToCsv(metadata, metaStream);
-
-            files.put(META_FILENAME, metaStream.toByteArray());
-        }
-
-        return files;
-    }
-
-    private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
-        private final MutableInt count;
-        private final Map<String, byte[]> zout;
-
-        MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
-            this.count = count;
-            this.zout = zout;
-        }
-
-        public boolean shouldParseEmbedded(Metadata metadata) {
-            return true;
-        }
-
-        public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b)
-                throws SAXException, IOException {
-            ByteArrayOutputStream bos = new ByteArrayOutputStream();
-            BoundedInputStream bis = new BoundedInputStream(MAX_ATTACHMENT_BYTES, inputStream);
-            IOUtils.copy(bis, bos);
-            if (bis.hasHitBound()) {
-                throw new IOExceptionWithCause(
-                        new TikaMemoryLimitException(MAX_ATTACHMENT_BYTES+1, MAX_ATTACHMENT_BYTES));
-            }
-            byte[] data = bos.toByteArray();
-
-            String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
-            String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
-
-            if (name == null) {
-                name = Integer.toString(count.intValue());
-            }
-
-            if (!name.contains(".") && contentType != null) {
-                try {
-                    String ext = TikaResource.getConfig().getMimeRepository().forName(contentType).getExtension();
-
-                    if (ext != null) {
-                        name += ext;
-                    }
-                } catch (MimeTypeException e) {
-                    LOG.warn("Unexpected MimeTypeException", e);
-                }
-            }
-
-            if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
-                POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
-                OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
-
-                if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
-                    try {
-                        Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
-                        if (ole.getDataSize() > 0) {
-                            String label = ole.getLabel();
-
-                            if (label.startsWith("ole-")) {
-                                label = Integer.toString(count.intValue()) + '-' + label;
-                            }
-
-                            name = label;
-
-                            data = ole.getDataBuffer();
-                        }
-                    } catch (Ole10NativeException ex) {
-                        LOG.warn("Skipping invalid part", ex);
-                    }
-                } else {
-                    name += '.' + type.getExtension();
-                }
-            }
-
-            final String finalName = getFinalName(name, zout);
-
-            if (data.length > 0) {
-                zout.put(finalName, data);
-
-                count.increment();
-            } else {
-                if (inputStream instanceof TikaInputStream) {
-                    TikaInputStream tin = (TikaInputStream) inputStream;
-
-                    if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
-                        POIFSFileSystem fs = new POIFSFileSystem();
-                        copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
-                        ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
-                        fs.writeFilesystem(bos2);
-                        bos2.close();
-
-                        zout.put(finalName, bos2.toByteArray());
-                    }
-                }
-            }
-        }
-
-        private String getFinalName(String name, Map<String, byte[]> zout) {
-            name = name.replaceAll("\u0000", " ");
-            String normalizedName = FilenameUtils.normalize(name);
-
-            if (normalizedName == null) {
-                normalizedName = FilenameUtils.getName(name);
-            }
-
-            if (normalizedName == null) {
-                normalizedName = count.toString();
-            }
-            //strip off initial C:/ or ~/ or /
-            int prefixLength = FilenameUtils.getPrefixLength(normalizedName);
-            if (prefixLength > -1) {
-                normalizedName = normalizedName.substring(prefixLength);
-            }
-            if (zout.containsKey(normalizedName)) {
-                return UUID.randomUUID().toString()+"-"+normalizedName;
-            }
-            return normalizedName;
-        }
-
-        protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
-                throws IOException {
-            for (Entry entry : sourceDir) {
-                if (entry instanceof DirectoryEntry) {
-                    // Need to recurse
-                    DirectoryEntry newDir = destDir.createDirectory(entry.getName());
-                    copy((DirectoryEntry) entry, newDir);
-                } else {
-                    // Copy entry
-                    try (InputStream contents = new DocumentInputStream((DocumentEntry) entry)) {
-                        destDir.createDocument(entry.getName(), contents);
-                    }
-                }
-            }
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server.resource;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.UriInfo;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+import com.opencsv.CSVWriter;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.mutable.MutableInt;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.exception.TikaMemoryLimitException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.BoundedInputStream;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.parser.DigestingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.RichTextContentHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+@Path("/unpack")
+public class UnpackerResource {
+    private static final long MAX_ATTACHMENT_BYTES = 100*1024*1024;
+
+    public static final String TEXT_FILENAME = "__TEXT__";
+    private static final String META_FILENAME = "__METADATA__";
+
+    private static final Logger LOG = LoggerFactory.getLogger(UnpackerResource.class);
+
+    public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException {
+        CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, UTF_8));
+
+        for (String name : metadata.names()) {
+            String[] values = metadata.getValues(name);
+            ArrayList<String> list = new ArrayList<>(values.length + 1);
+            list.add(name);
+            list.addAll(Arrays.asList(values));
+            writer.writeNext(list.toArray(values));
+        }
+
+        writer.close();
+    }
+
+    @Path("/{id:(/.*)?}")
+    @PUT
+    @Produces({"application/zip", "application/x-tar"})
+    public Map<String, byte[]> unpack(
+            InputStream is,
+            @Context HttpHeaders httpHeaders,
+            @Context UriInfo info
+    ) throws Exception {
+        return process(TikaResource.getInputStream(is, new Metadata(), httpHeaders), httpHeaders, info, false);
+    }
+
+    @Path("/all{id:(/.*)?}")
+    @PUT
+    @Produces({"application/zip", "application/x-tar"})
+    public Map<String, byte[]> unpackAll(
+            InputStream is,
+            @Context HttpHeaders httpHeaders,
+            @Context UriInfo info
+    ) throws Exception {
+        return process(TikaResource.getInputStream(is, new Metadata(), httpHeaders), httpHeaders, info, true);
+    }
+
+    private Map<String, byte[]> process(
+            InputStream is,
+            @Context HttpHeaders httpHeaders,
+            @Context UriInfo info,
+            boolean saveAll
+    ) throws Exception {
+        Metadata metadata = new Metadata();
+        ParseContext pc = new ParseContext();
+
+        Parser parser = TikaResource.createParser();
+        if (parser instanceof DigestingParser) {
+            //no need to digest for unwrapping
+            parser = ((DigestingParser)parser).getWrappedParser();
+        }
+        TikaResource.fillParseContext(pc, httpHeaders.getRequestHeaders(), null);
+        TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
+        TikaResource.logRequest(LOG, info, metadata);
+        //even though we aren't currently parsing embedded documents,
+        //we need to add this to allow for "inline" use of other parsers.
+        pc.set(Parser.class, parser);
+        ContentHandler ch;
+        ByteArrayOutputStream text = new ByteArrayOutputStream();
+
+        if (saveAll) {
+            ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, UTF_8)));
+        } else {
+            ch = new DefaultHandler();
+        }
+
+        Map<String, byte[]> files = new HashMap<>();
+        MutableInt count = new MutableInt();
+
+        pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
+        TikaResource.parse(parser, LOG, info.getPath(), is, ch, metadata, pc);
+
+        if (count.intValue() == 0 && !saveAll) {
+            throw new WebApplicationException(Response.Status.NO_CONTENT);
+        }
+
+        if (saveAll) {
+            files.put(TEXT_FILENAME, text.toByteArray());
+
+            ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
+            metadataToCsv(metadata, metaStream);
+
+            files.put(META_FILENAME, metaStream.toByteArray());
+        }
+
+        return files;
+    }
+
+    private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
+        private final MutableInt count;
+        private final Map<String, byte[]> zout;
+
+        MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
+            this.count = count;
+            this.zout = zout;
+        }
+
+        public boolean shouldParseEmbedded(Metadata metadata) {
+            return true;
+        }
+
+        public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b)
+                throws SAXException, IOException {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            BoundedInputStream bis = new BoundedInputStream(MAX_ATTACHMENT_BYTES, inputStream);
+            IOUtils.copy(bis, bos);
+            if (bis.hasHitBound()) {
+                throw new IOExceptionWithCause(
+                        new TikaMemoryLimitException(MAX_ATTACHMENT_BYTES+1, MAX_ATTACHMENT_BYTES));
+            }
+            byte[] data = bos.toByteArray();
+
+            String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
+            String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
+
+            if (name == null) {
+                name = Integer.toString(count.intValue());
+            }
+
+            if (!name.contains(".") && contentType != null) {
+                try {
+                    String ext = TikaResource.getConfig().getMimeRepository().forName(contentType).getExtension();
+
+                    if (ext != null) {
+                        name += ext;
+                    }
+                } catch (MimeTypeException e) {
+                    LOG.warn("Unexpected MimeTypeException", e);
+                }
+            }
+
+            if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) {
+                POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data));
+                OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs);
+
+                if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) {
+                    try {
+                        Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
+                        if (ole.getDataSize() > 0) {
+                            String label = ole.getLabel();
+
+                            if (label.startsWith("ole-")) {
+                                label = Integer.toString(count.intValue()) + '-' + label;
+                            }
+
+                            name = label;
+
+                            data = ole.getDataBuffer();
+                        }
+                    } catch (Ole10NativeException ex) {
+                        LOG.warn("Skipping invalid part", ex);
+                    }
+                } else {
+                    name += '.' + type.getExtension();
+                }
+            }
+
+            final String finalName = getFinalName(name, zout);
+
+            if (data.length > 0) {
+                zout.put(finalName, data);
+
+                count.increment();
+            } else {
+                if (inputStream instanceof TikaInputStream) {
+                    TikaInputStream tin = (TikaInputStream) inputStream;
+
+                    if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) {
+                        POIFSFileSystem fs = new POIFSFileSystem();
+                        copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
+                        ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+                        fs.writeFilesystem(bos2);
+                        bos2.close();
+
+                        zout.put(finalName, bos2.toByteArray());
+                    }
+                }
+            }
+        }
+
+        private String getFinalName(String name, Map<String, byte[]> zout) {
+            name = name.replaceAll("\u0000", " ");
+            String normalizedName = FilenameUtils.normalize(name);
+
+            if (normalizedName == null) {
+                normalizedName = FilenameUtils.getName(name);
+            }
+
+            if (normalizedName == null) {
+                normalizedName = count.toString();
+            }
+            //strip off initial C:/ or ~/ or /
+            int prefixLength = FilenameUtils.getPrefixLength(normalizedName);
+            if (prefixLength > -1) {
+                normalizedName = normalizedName.substring(prefixLength);
+            }
+            if (zout.containsKey(normalizedName)) {
+                return UUID.randomUUID().toString()+"-"+normalizedName;
+            }
+            return normalizedName;
+        }
+
+        protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+                throws IOException {
+            for (Entry entry : sourceDir) {
+                if (entry instanceof DirectoryEntry) {
+                    // Need to recurse
+                    DirectoryEntry newDir = destDir.createDirectory(entry.getName());
+                    copy((DirectoryEntry) entry, newDir);
+                } else {
+                    // Copy entry
+                    try (InputStream contents = new DocumentInputStream((DocumentEntry) entry)) {
+                        destDir.createDocument(entry.getName(), contents);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java b/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java
index fcb14d3a0..e5f0b4c70 100644
--- a/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java
+++ b/tika-server/src/main/java/org/apache/tika/server/writer/CSVMessageBodyWriter.java
@@ -32,7 +32,7 @@ import java.lang.reflect.Type;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-import au.com.bytecode.opencsv.CSVWriter;
+import com.opencsv.CSVWriter;
 import org.apache.tika.metadata.Metadata;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
diff --git a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
index 7cd5f1d4d..cc5ec847d 100644
--- a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
@@ -32,7 +32,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import au.com.bytecode.opencsv.CSVReader;
+import com.opencsv.CSVReader;
 import org.apache.cxf.helpers.IOUtils;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;