You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2018/09/18 15:06:20 UTC

[tika] branch branch_1x updated (7259325 -> a366813)

This is an automated email from the ASF dual-hosted git repository.

grossws pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 7259325  [maven-release-plugin] prepare for next development iteration
     new 231fbb0  Fixed javadocs
     new a24976a  Cosmetics
     new a366813  Removed #getDetector from ImportContextImpl

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/tika/batch/BatchProcess.java   |  2 +-
 .../org/apache/tika/batch/ConsumersManager.java    |  3 +-
 .../org/apache/tika/batch/FileResourceCrawler.java |  6 ++--
 .../tika/batch/builders/BatchProcessBuilder.java   |  2 +-
 .../org/apache/tika/embedder/ExternalEmbedder.java |  4 +--
 .../main/java/org/apache/tika/fork/ForkParser.java |  6 ++--
 .../apache/tika/language/LanguageIdentifier.java   |  2 +-
 .../main/java/org/apache/tika/parser/Parser.java   |  1 -
 .../org/apache/tika/sax/LinkContentHandler.java    |  2 +-
 .../tika/dl/imagerec/DL4JInceptionV3Net.java       |  2 +-
 .../org/apache/tika/eval/AbstractProfiler.java     |  2 +-
 .../org/apache/tika/example/ImportContextImpl.java | 10 -------
 .../tika/example/InterruptableParsingExample.java  |  2 +-
 .../java/org/apache/tika/example/MyFirstTika.java  |  2 +-
 .../tika/parser/chm/accessor/ChmItsfHeader.java    |  5 ----
 .../tika/parser/chm/accessor/ChmItspHeader.java    |  5 ----
 .../parser/chm/accessor/ChmLzxcControlData.java    | 11 ++------
 .../parser/chm/accessor/ChmLzxcResetTable.java     |  5 ----
 .../tika/parser/chm/accessor/ChmPmgiHeader.java    |  6 +---
 .../tika/parser/chm/accessor/ChmPmglHeader.java    |  9 +-----
 .../parser/chm/accessor/DirectoryListingEntry.java |  1 -
 .../tika/parser/chm/assertion/ChmAssert.java       |  6 ++--
 .../parser/ctakes/CTAKESAnnotationProperty.java    |  2 +-
 .../apache/tika/parser/ctakes/CTAKESConfig.java    | 32 +++++++++++-----------
 .../tika/parser/ctakes/CTAKESContentHandler.java   | 17 ++++++------
 .../apache/tika/parser/ctakes/CTAKESParser.java    |  4 +--
 .../org/apache/tika/parser/ctakes/CTAKESUtils.java |  8 +++---
 .../java/org/apache/tika/parser/mail/MailUtil.java |  4 +--
 .../tika/parser/microsoft/JackcessParser.java      |  2 +-
 .../parser/microsoft/xml/SpreadsheetMLParser.java  |  6 ++--
 .../tika/parser/microsoft/xml/WordMLParser.java    |  7 ++---
 .../java/org/apache/tika/parser/mp3/ID3Tags.java   |  4 +--
 .../org/apache/tika/parser/ner/NERecogniser.java   |  2 +-
 .../parser/ner/corenlp/CoreNLPNERecogniser.java    |  2 +-
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |  2 +-
 .../tika/parser/ner/mitie/MITIENERecogniser.java   |  2 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |  2 +-
 .../parser/ner/opennlp/OpenNLPNERecogniser.java    |  2 +-
 .../tika/parser/ner/opennlp/OpenNLPNameFinder.java |  2 +-
 .../org/apache/tika/parser/pdf/AccessChecker.java  |  2 +-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  6 ++--
 .../tika/parser/pkg/CompressorParserOptions.java   |  2 +-
 .../tika/parser/recognition/ObjectRecogniser.java  |  4 +--
 .../org/apache/tika/parser/strings/FileConfig.java |  3 +-
 .../apache/tika/parser/strings/StringsConfig.java  |  4 +--
 45 files changed, 81 insertions(+), 134 deletions(-)


[tika] 02/03: Cosmetics

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

grossws pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a24976a8dff12899fc46e41575b6b846a291c678
Author: Konstantin Gribov <gr...@gmail.com>
AuthorDate: Tue Sep 18 16:54:01 2018 +0300

    Cosmetics
---
 .../org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java    | 4 +---
 .../main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java | 5 +----
 tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java   | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
index ad8dd59..8674279 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
@@ -48,9 +48,7 @@ public class SpreadsheetMLParser extends AbstractXML2003Parser {
     final static String WORKSHEET = "worksheet";
 
     private static final MediaType MEDIA_TYPE = MediaType.application("vnd.ms-spreadsheetml");
-    private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
-                    MEDIA_TYPE)));
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MEDIA_TYPE);
 
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
index 8c18d1c..6d1ea8e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
@@ -49,8 +49,6 @@ import org.xml.sax.helpers.DefaultHandler;
  * See <a href="https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats">https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats</a>
  */
 public class WordMLParser extends AbstractXML2003Parser {
-
-
     //map between wordml and xhtml entities
     private static final Map<String, String> WORDML_TO_XHTML;
     static {
@@ -72,8 +70,7 @@ public class WordMLParser extends AbstractXML2003Parser {
 
 
     private static final MediaType MEDIA_TYPE = MediaType.application("vnd.ms-wordml");
-    private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.singleton(MEDIA_TYPE); //immutable
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MEDIA_TYPE); //immutable
 
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
index 63577e6..b8d723f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
@@ -212,7 +212,7 @@ public interface ID3Tags {
      * Represents a comments in ID3 (especially ID3 v2), where are 
      *  made up of several parts
      */
-    public static class ID3Comment {
+    class ID3Comment {
         private String language;
         private String description;
         private String text;


[tika] 01/03: Fixed javadocs

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

grossws pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 231fbb0f79212de698316f7b89f37ebf7c85b78f
Author: Konstantin Gribov <gr...@gmail.com>
AuthorDate: Tue Sep 18 16:53:26 2018 +0300

    Fixed javadocs
---
 .../java/org/apache/tika/batch/BatchProcess.java   |  2 +-
 .../org/apache/tika/batch/ConsumersManager.java    |  3 +-
 .../org/apache/tika/batch/FileResourceCrawler.java |  6 ++--
 .../tika/batch/builders/BatchProcessBuilder.java   |  2 +-
 .../org/apache/tika/embedder/ExternalEmbedder.java |  4 +--
 .../main/java/org/apache/tika/fork/ForkParser.java |  6 ++--
 .../apache/tika/language/LanguageIdentifier.java   |  2 +-
 .../main/java/org/apache/tika/parser/Parser.java   |  1 -
 .../org/apache/tika/sax/LinkContentHandler.java    |  2 +-
 .../tika/dl/imagerec/DL4JInceptionV3Net.java       |  2 +-
 .../org/apache/tika/eval/AbstractProfiler.java     |  2 +-
 .../tika/example/InterruptableParsingExample.java  |  2 +-
 .../java/org/apache/tika/example/MyFirstTika.java  |  2 +-
 .../tika/parser/chm/accessor/ChmItsfHeader.java    |  5 ----
 .../tika/parser/chm/accessor/ChmItspHeader.java    |  5 ----
 .../parser/chm/accessor/ChmLzxcControlData.java    | 11 ++------
 .../parser/chm/accessor/ChmLzxcResetTable.java     |  5 ----
 .../tika/parser/chm/accessor/ChmPmgiHeader.java    |  6 +---
 .../tika/parser/chm/accessor/ChmPmglHeader.java    |  9 +-----
 .../parser/chm/accessor/DirectoryListingEntry.java |  1 -
 .../tika/parser/chm/assertion/ChmAssert.java       |  6 ++--
 .../parser/ctakes/CTAKESAnnotationProperty.java    |  2 +-
 .../apache/tika/parser/ctakes/CTAKESConfig.java    | 32 +++++++++++-----------
 .../tika/parser/ctakes/CTAKESContentHandler.java   | 17 ++++++------
 .../apache/tika/parser/ctakes/CTAKESParser.java    |  4 +--
 .../org/apache/tika/parser/ctakes/CTAKESUtils.java |  8 +++---
 .../java/org/apache/tika/parser/mail/MailUtil.java |  4 +--
 .../tika/parser/microsoft/JackcessParser.java      |  2 +-
 .../parser/microsoft/xml/SpreadsheetMLParser.java  |  2 +-
 .../tika/parser/microsoft/xml/WordMLParser.java    |  2 +-
 .../java/org/apache/tika/parser/mp3/ID3Tags.java   |  2 +-
 .../org/apache/tika/parser/ner/NERecogniser.java   |  2 +-
 .../parser/ner/corenlp/CoreNLPNERecogniser.java    |  2 +-
 .../tika/parser/ner/grobid/GrobidNERecogniser.java |  2 +-
 .../tika/parser/ner/mitie/MITIENERecogniser.java   |  2 +-
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     |  2 +-
 .../parser/ner/opennlp/OpenNLPNERecogniser.java    |  2 +-
 .../tika/parser/ner/opennlp/OpenNLPNameFinder.java |  2 +-
 .../org/apache/tika/parser/pdf/AccessChecker.java  |  2 +-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  6 ++--
 .../tika/parser/pkg/CompressorParserOptions.java   |  2 +-
 .../tika/parser/recognition/ObjectRecogniser.java  |  4 +--
 .../org/apache/tika/parser/strings/FileConfig.java |  3 +-
 .../apache/tika/parser/strings/StringsConfig.java  |  4 +--
 44 files changed, 78 insertions(+), 116 deletions(-)

diff --git a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
index 7a8fcd4..23887af 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
@@ -530,7 +530,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
      * memory leaks, it is sometimes beneficial to shutdown (and restart) the
      * process periodically.
      * <p/>
-     * If the value is < 0, the process will run until completion, interruption or exception.
+     * If the value is &lt; 0, the process will run until completion, interruption or exception.
      *
      * @param maxAliveTimeSeconds maximum amount of time in seconds to remain alive
      */
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/ConsumersManager.java b/tika-batch/src/main/java/org/apache/tika/batch/ConsumersManager.java
index a4f3b82..5c83018 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/ConsumersManager.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/ConsumersManager.java
@@ -69,10 +69,9 @@ public abstract class ConsumersManager {
     }
 
     /**
-     * {@see #getConsumersManagerMaxMillis()}
-     *
      * @param consumersManagerMaxMillis maximum number of milliseconds
      *                                  to allow for init() or shutdown()
+     * @see #getConsumersManagerMaxMillis()
      */
     public void setConsumersManagerMaxMillis(long consumersManagerMaxMillis) {
         this.consumersManagerMaxMillis = consumersManagerMaxMillis;
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
index c02b053..fe1fd1a 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
@@ -195,7 +195,7 @@ public abstract class FileResourceCrawler implements Callable<IFileProcessorFutu
     }
 
     /**
-     * Maximum number of files to add.  If {@link #maxFilesToAdd} < 0 (default),
+     * Maximum number of files to add.  If {@link #maxFilesToAdd} &lt; 0 (default),
      * then this crawler will add all documents.
      *
      * @param maxFilesToAdd maximum number of files to add to the queue
@@ -209,7 +209,7 @@ public abstract class FileResourceCrawler implements Callable<IFileProcessorFutu
      * Maximum number of files to consider.  A file is considered
      * whether or not the DocumentSelector selects a document.
      * <p/>
-     * If {@link #maxFilesToConsider} < 0 (default), then this crawler
+     * If {@link #maxFilesToConsider} &lt; 0 (default), then this crawler
      * will add all documents.
      *
      * @param maxFilesToConsider maximum number of files to consider adding to the queue
@@ -260,8 +260,6 @@ public abstract class FileResourceCrawler implements Callable<IFileProcessorFutu
      * adding poison.  Do this only if you've already called another mechanism
      * to request that consumers shut down.  This prevents a potential deadlock issue
      * where the crawler is trying to add to the queue, but it is full.
-     *
-     * @return
      */
     public void shutDownNoPoison() {
         this.shutDownNoPoison = true;
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
index 314ea76..0ebfd15 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
@@ -183,7 +183,7 @@ public class BatchProcessBuilder {
      * numConsumers is needed by both the crawler and the consumers. This utility method
      * is to be used to extract the number of consumers from a map of String key value pairs.
      * <p>
-     * If the value is "default", not a parseable integer or has a value < 1,
+     * If the value is "default", not a parseable integer or has a value &lt; 1,
      * then <code>AbstractConsumersBuilder</code>'s <code>getDefaultNumConsumers()</code>
      * @param attrs attributes from which to select the NUM_CONSUMERS_KEY
      * @return number of consumers
diff --git a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
index 84dc5da..25581be 100644
--- a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
+++ b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
@@ -107,7 +107,7 @@ public class ExternalEmbedder implements Embedder {
 
     /**
      * Gets the command to be run. This can include either of
-     * {@link #INPUT_FILE_TOKEN} or {@link #OUTPUT_FILE_TOKEN} if the command
+     * {@link ExternalParser#INPUT_FILE_TOKEN} or {@link ExternalParser#OUTPUT_FILE_TOKEN} if the command
      * needs filenames.
      *
      * @return
@@ -118,7 +118,7 @@ public class ExternalEmbedder implements Embedder {
 
     /**
      * Sets the command to be run. This can include either of
-     * {@link #INPUT_FILE_TOKEN} or {@link #OUTPUT_FILE_TOKEN} if the command
+     * {@link ExternalParser#INPUT_FILE_TOKEN} or {@link ExternalParser#OUTPUT_FILE_TOKEN} if the command
      * needs filenames.
      *
      * @see Runtime#exec(String[])
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
index c7b51ac..a37aed4 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
@@ -191,8 +191,8 @@ public class ForkParser extends AbstractParser {
     /**
      * Sets the command used to start the forked server process.
      * The given command line is split on whitespace and the arguments
-2    * "-jar" and "/path/to/bootstrap.jar" are appended to it when starting
-2    * the process. The default setting is "java -Xmx32m".
+     * "-jar" and "/path/to/bootstrap.jar" are appended to it when starting
+     * the process. The default setting is "java -Xmx32m".
      *
      * @param java java command line
      * @deprecated since 1.8
@@ -212,7 +212,7 @@ public class ForkParser extends AbstractParser {
      * This sends the objects to the server for parsing, and the server via
      * the proxies acts on the handler as if it were updating it directly.
      * <p>
-     * If using a RecursiveParserWrapper, there are two options:
+     * If using a {@link RecursiveParserWrapper}, there are two options:
      * </p>
      * <p>
      *     <ol>
diff --git a/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java b/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
index dbb05f9..17db9d3 100644
--- a/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
+++ b/tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
@@ -145,7 +145,7 @@ public class LanguageIdentifier {
      * Tries to judge whether the identification is certain enough
      * to be trusted.
      * WARNING: Will never return true for small amount of input texts. 
-     * @return <code>true</code> if the distance is smaller then {@value #CERTAINTY_LIMIT}, <code>false</code> otherwise
+     * @return <code>true</code> if the distance is smaller then {@value LanguageIdentifier#CERTAINTY_LIMIT}, <code>false</code> otherwise
      */
     public boolean isReasonablyCertain() {
         return distance < CERTAINTY_LIMIT;
diff --git a/tika-core/src/main/java/org/apache/tika/parser/Parser.java b/tika-core/src/main/java/org/apache/tika/parser/Parser.java
index 352b8d3..3ac2d1f 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/Parser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/Parser.java
@@ -29,7 +29,6 @@ import org.xml.sax.SAXException;
 
 /**
  * Tika parser interface.
- * @see ConfigurableParser for parsers which adopts to runtime params
  */
 public interface Parser extends Serializable {
 
diff --git a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
index b560f9e..bb1081e 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/LinkContentHandler.java
@@ -56,7 +56,7 @@ public class LinkContentHandler extends DefaultHandler {
     /**
      * Default constructor
      *
-     * @boolean collapseWhitespaceInAnchor
+     * @param collapseWhitespaceInAnchor
      */
     public LinkContentHandler(boolean collapseWhitespaceInAnchor) {
       super();
diff --git a/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java b/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
index abe65ff..c45f192 100644
--- a/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
+++ b/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
@@ -297,7 +297,7 @@ public class DL4JInceptionV3Net implements ObjectRecogniser {
      * Loads the class to
      *
      * @param stream label index stream
-     * @return Map of integer -> label name
+     * @return Map of integer -&gt; label name
      * @throws IOException    when the stream breaks unexpectedly
      * @throws ParseException when the input doesn't contain a valid JSON map
      */
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
index 0a67ad0..322a1f6 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/AbstractProfiler.java
@@ -208,7 +208,7 @@ public abstract class AbstractProfiler extends FileResourceConsumer {
     }
 
     /**
-     * Add a LimitTokenCountFilterFactory if > -1
+     * Add a LimitTokenCountFilterFactory if &gt; -1
      *
      * @param maxTokens
      */
diff --git a/tika-example/src/main/java/org/apache/tika/example/InterruptableParsingExample.java b/tika-example/src/main/java/org/apache/tika/example/InterruptableParsingExample.java
index 9aadf58..d134fde 100644
--- a/tika-example/src/main/java/org/apache/tika/example/InterruptableParsingExample.java
+++ b/tika-example/src/main/java/org/apache/tika/example/InterruptableParsingExample.java
@@ -36,7 +36,7 @@ import org.xml.sax.helpers.DefaultHandler;
  * This example demonstrates how to interrupt document parsing if
  * some condition is met.
  * <p>
- * {@link InterruptingContentHandler} throws special exception as soon as
+ * {@link InterruptableParsingExample.InterruptingContentHandler} throws special exception as soon as
  * find {@code query} string in parsed file.
  *
  * See also http://stackoverflow.com/questions/31939851
diff --git a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
index fe0c8d9..554342c 100755
--- a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
+++ b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
@@ -41,7 +41,7 @@ import org.xml.sax.ContentHandler;
 /**
  * Demonstrates how to call the different components within Tika: its
  * {@link Detector} framework (aka MIME identification and repository), its
- * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ * {@link Parser} interface, its {@link org.apache.tika.language.LanguageIdentifier} and other goodies.
  * <p>
  * It also shows the "easy way" via {@link AutoDetectParser}
  */
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
index 2c4dc4e..e4f9ee5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
@@ -35,11 +35,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * beginning of file 0008: QWORD Length of section Following the header section
  * table is 8 bytes of additional header data. In Version 2 files, this data is
  * not there and the content section starts immediately after the directory.
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1}
- * 
  */
 /* structure of ITSF headers */
 public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
index 10b00ae..28e2ff3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
@@ -38,11 +38,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * 0034: GUID {5D02926A-212E-11D0-9DF9-00A0C922E6EC} 0044: DWORD $54 (This is
  * the length again) 0048: DWORD -1 (unknown) 004C: DWORD -1 (unknown) 0050:
  * DWORD -1 (unknown)
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1}
- * 
  */
 public class ChmItspHeader implements ChmAccessor<ChmItspHeader> {
     // TODO: refactor all unmarshals
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
index 17a2e2f..1bd0c4f 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
@@ -32,11 +32,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * $8000-byte blocks 0010: DWORD The window size in $8000-byte blocks 0014:
  * DWORD unknown (sometimes 2, sometimes 1, sometimes 0) 0018: DWORD 0 (unknown)
  * 001C: DWORD 0 (unknown)
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?page=2 }
- * 
  */
 public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> {
     private static final long serialVersionUID = -7897854774939631565L;
@@ -91,7 +86,7 @@ public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> {
     /**
      * Sets a place holder
      * 
-     * @param current_place
+     * @param currentPlace
      */
     private void setCurrentPlace(int currentPlace) {
         this.currentPlace = currentPlace;
@@ -181,7 +176,7 @@ public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> {
     /**
      * Sets a window size
      * 
-     * @param window_size
+     * @param windowSize
      */
     protected void setWindowSize(long windowSize) {
         this.windowSize = windowSize;
@@ -199,7 +194,7 @@ public class ChmLzxcControlData implements ChmAccessor<ChmLzxcControlData> {
     /**
      * Sets windows per reset
      * 
-     * @param windows_per_reset
+     * @param windowsPerReset
      */
     protected void setWindowsPerReset(long windowsPerReset) {
         this.windowsPerReset = windowsPerReset;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java
index 5823f67..be57c3d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java
@@ -28,11 +28,6 @@ import org.apache.tika.parser.chm.exception.ChmParsingException;
  * LZXC reset table For ensuring a decompression. Reads the block named
  * "::DataSpace/Storage/<SectionName>/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"
  * .
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?page=2 }
- * 
  */
 public class ChmLzxcResetTable implements ChmAccessor<ChmLzxcResetTable> {
     private static final long serialVersionUID = -8209574429411707460L;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
index 97eaf46..a98ae67 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
@@ -35,15 +35,11 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * ENCINT: directory listing chunk which starts with name Encoded Integers aka
  * ENCINT An ENCINT is a variable-length integer. The high bit of each byte
  * indicates "continued to the next byte". Bytes are stored most significant to
- * least significant. So, for example, $EA $15 is (((0xEA&0x7F)<<7)|0x15) =
+ * least significant. So, for example, $EA $15 is (((0xEA&amp;0x7F)&lt;&lt;7)|0x15) =
  * 0x3515.
  * 
  * <p>
  * Note: This class is not in use
- * 
- * {@link http://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original/?show-translation-form=1 }
- * 
- * 
  */
 public class ChmPmgiHeader implements ChmAccessor<ChmPmgiHeader> {
     private static final long serialVersionUID = -2092282339894303701L;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
index abb7175..0bfb95a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
@@ -34,7 +34,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * (-1 if this is the last listing chunk) 0014: Directory listing entries (to
  * quickref area) Sorted by filename; the sort is case-insensitive The quickref
  * area is written backwards from the end of the chunk. One quickref entry
- * exists for every n entries in the file, where n is calculated as 1 + (1 <<
+ * exists for every n entries in the file, where n is calculated as 1 + (1 &lt;&lt;
  * quickref density). So for density = 2, n = 5 Chunklen-0002: WORD Number of
  * entries in the chunk Chunklen-0004: WORD Offset of entry n from entry 0
  * Chunklen-0008: WORD Offset of entry 2n from entry 0 Chunklen-000C: WORD
@@ -47,13 +47,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * in the directory: user data and format related files. The files which are
  * format-related have names which begin with '::', the user data files have
  * names which begin with "/".
- * 
- * {@link http
- * ://translated.by/you/microsoft-s-html-help-chm-format-incomplete/original
- * /?show-translation-form=1 }
- * 
- * @author olegt
- * 
  */
 public class ChmPmglHeader implements ChmAccessor<ChmPmglHeader> {
     private static final long serialVersionUID = -6139486487475923593L;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
index c413e07..5f95675 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
@@ -29,7 +29,6 @@ import org.apache.tika.parser.chm.core.ChmCommons;
  * two kinds of file represented in the directory: user data and format related
  * files. The files which are format-related have names which begin with '::',
  * the user data files have names which begin with "/".
- * 
  */
 public class DirectoryListingEntry {
     /* Length of the entry name */
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java b/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
index cdedc3e..0c736e8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
@@ -86,7 +86,7 @@ public class ChmAssert {
      * Checks validity of ChmAccessor parameters
      * 
      * @param data
-     * @param chmItsfHeader
+     * @param chmAccessor
      * @param count
      * @throws ChmParsingException 
      */
@@ -110,7 +110,7 @@ public class ChmAssert {
     /**
      * Checks if ChmAccessor is not null In case of null throws exception
      * 
-     * @param ChmAccessor
+     * @param chmAccessor
      * @throws ChmParsingException 
      */
     public static final void assertChmAccessorNotNull(ChmAccessor<?> chmAccessor) throws ChmParsingException {
@@ -156,7 +156,7 @@ public class ChmAssert {
     }
 
     /**
-     * Checks if int param is greater than zero In case param <=0 throws an
+     * Checks if int param is greater than zero In case param &lt;= 0 throws an
      * exception
      * 
      * @param param
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
index e6d261d..1c1be02 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
@@ -19,7 +19,7 @@ package org.apache.tika.parser.ctakes;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 
 /**
- * This enumeration includes the properties that an {@see IdentifiedAnnotation} object can provide.
+ * This enumeration includes the properties that an {@link IdentifiedAnnotation} object can provide.
  *
  */
 public enum CTAKESAnnotationProperty {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
index 67ba993..cc20273 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
@@ -25,7 +25,7 @@ import java.util.Properties;
 import static org.apache.commons.io.output.NullOutputStream.NULL_OUTPUT_STREAM;
 
 /**
- * Configuration for {@see CTAKESContentHandler}.
+ * Configuration for {@link CTAKESContentHandler}.
  * 
  * This class allows to enable cTAKES and set its parameters.
  */
@@ -77,7 +77,7 @@ public class CTAKESConfig implements Serializable {
 
     /**
      * Loads properties from InputStream and then tries to close InputStream.
-     * @param stream {@see InputStream} object used to read properties.
+     * @param stream {@link InputStream} object used to read properties.
      */
     public CTAKESConfig(InputStream stream) {
         init(stream);
@@ -153,8 +153,8 @@ public class CTAKESConfig implements Serializable {
     }
 
     /**
-     * Returns an {@see OutputStream} object used write the CAS.
-     * @return {@see OutputStream} object used write the CAS.
+     * Returns an {@link OutputStream} object used write the CAS.
+     * @return {@link OutputStream} object used write the CAS.
      */
     public OutputStream getOutputStream() {
         return stream;
@@ -203,15 +203,15 @@ public class CTAKESConfig implements Serializable {
     }
 
     /**
-     * Returns an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
-     * @return an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * Returns an array of {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * @return an array of {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
      */
     public CTAKESAnnotationProperty[] getAnnotationProps() {
         return annotationProps;
     }
 
     /**
-     * Returns a string containing a comma-separated list of {@see CTAKESAnnotationProperty} names that will be included into cTAKES metadata.
+     * Returns a string containing a comma-separated list of {@link CTAKESAnnotationProperty} names that will be included into cTAKES metadata.
      * @return
      */
     public String getAnnotationPropsAsString() {
@@ -260,7 +260,7 @@ public class CTAKESConfig implements Serializable {
 
     /**
      * Enables the formatted output for serializer.
-     * @param prettyPrint {@true} to enable formatted output, {@code false} otherwise.
+     * @param prettyPrint {@code true} to enable formatted output, {@code false} otherwise.
      */
     public void setPrettyPrint(boolean prettyPrint) {
         this.prettyPrint = prettyPrint;
@@ -275,8 +275,8 @@ public class CTAKESConfig implements Serializable {
     }
 
     /**
-     * Sets the {@see OutputStream} object used to write the CAS.
-     * @param stream the {@see OutputStream} object used to write the CAS.
+     * Sets the {@link OutputStream} object used to write the CAS.
+     * @param stream the {@link OutputStream} object used to write the CAS.
      */
     public void setOutputStream(OutputStream stream) {
         this.stream = stream;
@@ -284,7 +284,7 @@ public class CTAKESConfig implements Serializable {
 
     /**
      * Enables CAS serialization.
-     * @param serialize {@true} to enable CAS serialization, {@code false} otherwise.
+     * @param serialize {@code true} to enable CAS serialization, {@code false} otherwise.
      */
     public void setSerialize(boolean serialize) {
         this.serialize = serialize;
@@ -292,7 +292,7 @@ public class CTAKESConfig implements Serializable {
 
     /**
      * Enables content text analysis using cTAKES.
-     * @param text {@true} to enable content text analysis, {@code false} otherwise.
+     * @param text {@code true} to enable content text analysis, {@code false} otherwise.
      */
     public void setText(boolean text) {
         this.text = text;
@@ -307,16 +307,16 @@ public class CTAKESConfig implements Serializable {
     }
 
     /**
-     * Sets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
-     * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * Sets the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * @param annotationProps the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
      */
     public void setAnnotationProps(CTAKESAnnotationProperty[] annotationProps) {
         this.annotationProps = annotationProps;
     }
 
     /**
-     * ets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
-     * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * ets the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+     * @param annotationProps the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
      */
     public void setAnnotationProps(String[] annotationProps) {
         CTAKESAnnotationProperty[] properties = new CTAKESAnnotationProperty[annotationProps.length];
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
index 38326e3..422b4b5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
@@ -58,16 +58,15 @@ public class CTAKESContentHandler extends ContentHandlerDecorator {
 	private JCas jcas = null;
 
 	/**
-	 * Creates a new {@see CTAKESContentHandler} for the given {@see
-	 * ContentHandler} and Metadata objects.
+	 * Creates a new {@link CTAKESContentHandler} for the given {@link ContentHandler} and Metadata objects.
 	 * 
 	 * @param handler
-	 *            the {@see ContentHandler} object to be decorated.
+	 *            the {@link ContentHandler} object to be decorated.
 	 * @param metadata
-	 *            the {@see Metadata} object that will be populated using
+	 *            the {@link Metadata} object that will be populated using
 	 *            biomedical information extracted by cTAKES.
 	 * @param config
-	 *            the {@see CTAKESConfig} object used to configure the handler.
+	 *            the {@link CTAKESConfig} object used to configure the handler.
 	 */
 	public CTAKESContentHandler(ContentHandler handler, Metadata metadata,
 			CTAKESConfig config) {
@@ -78,13 +77,13 @@ public class CTAKESContentHandler extends ContentHandlerDecorator {
 	}
 
 	/**
-	 * Creates a new {@see CTAKESContentHandler} for the given {@see
+	 * Creates a new {@link CTAKESContentHandler} for the given {@link
 	 * ContentHandler} and Metadata objects.
 	 * 
 	 * @param handler
-	 *            the {@see ContentHandler} object to be decorated.
+	 *            the {@link ContentHandler} object to be decorated.
 	 * @param metadata
-	 *            the {@see Metadata} object that will be populated using
+	 *            the {@link Metadata} object that will be populated using
 	 *            biomedical information extracted by cTAKES.
 	 */
 	public CTAKESContentHandler(ContentHandler handler, Metadata metadata) {
@@ -168,7 +167,7 @@ public class CTAKESContentHandler extends ContentHandlerDecorator {
 	/**
 	 * Returns metadata that includes cTAKES annotations.
 	 * 
-	 * @return {@Metadata} object that includes cTAKES annotations.
+	 * @return {@link Metadata} object that includes cTAKES annotations.
 	 */
 	public Metadata getMetadata() {
 		return metadata;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
index acd1965..dc92502 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
@@ -30,8 +30,8 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 /**
- * CTAKESParser decorates a {@see Parser} and leverages on 
- * {@see CTAKESContentHandler} to extract biomedical information from 
+ * CTAKESParser decorates a {@link Parser} and leverages on
+ * {@link CTAKESContentHandler} to extract biomedical information from
  * clinical text using Apache cTAKES.
  * <p>It is normally called by supplying an instance to 
  *  {@link AutoDetectParser}, such as:
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
index c8cc8ce..94ebefa 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
@@ -38,7 +38,7 @@ import org.xml.sax.SAXException;
 
 /**
  * This class provides methods to extract biomedical information from plain text
- * using {@see CTAKESContentHandler} that relies on Apache cTAKES.
+ * using {@link CTAKESContentHandler} that relies on Apache cTAKES.
  * 
  * <p>
  * Apache cTAKES is built on top of <a href="https://uima.apache.org/">Apache
@@ -152,7 +152,7 @@ public class CTAKESUtils {
 	 * @param prettyPrint
 	 *            {@code true} to do pretty printing of output.
 	 * @param stream
-	 *            {@see OutputStream} object used to print out information
+	 *            {@link OutputStream} object used to print out information
 	 *            extracted by using cTAKES.
 	 * @throws SAXException
 	 *             if there was a SAX exception.
@@ -176,9 +176,9 @@ public class CTAKESUtils {
 	 * Returns the annotation value based on the given annotation type.
 	 * 
 	 * @param annotation
-	 *            {@see IdentifiedAnnotation} object.
+	 *            {@link IdentifiedAnnotation} object.
 	 * @param property
-	 *            {@see CTAKESAnnotationProperty} enum used to identify the
+	 *            {@link CTAKESAnnotationProperty} enum used to identify the
 	 *            annotation type.
 	 * @return the annotation value.
 	 */
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailUtil.java b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailUtil.java
index 1cf2686..1fac159 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailUtil.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailUtil.java
@@ -100,8 +100,8 @@ public class MailUtil {
 
     /**
      * If the chunk looks like it contains an email
-     * @param chunk
-     * @return
+     * @param chunk to analyse
+     * @return {@code true} if chunk looks like it contains an email
      */
     public static boolean containsEmail(String chunk) {
         if (chunk == null) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
index f379491..784a244 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
@@ -43,7 +43,7 @@ import org.xml.sax.SAXException;
 
 /**
  * Parser that handles Microsoft Access files via
- * <a href="http://jackcess.sourceforge.net/>Jackcess</a>
+ * <a href="http://jackcess.sourceforge.net/">Jackcess</a>
  * <p>
  * Many, many thanks to LexisNexis®/Health Market Science (HMS), Brian O'Neill,
  * and James Ahlborn for relicensing Jackcess to Apache v2.0!
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
index c442453..ad8dd59 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
@@ -38,7 +38,7 @@ import org.xml.sax.helpers.DefaultHandler;
  * Parses wordml 2003 format Excel files.  These are single xml files
  * that predate ooxml.
  *
- * @see {@url https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats}
+ * See <a href="https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats">https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats</a>
  */
 public class SpreadsheetMLParser extends AbstractXML2003Parser {
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
index e9890c8..8c18d1c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
@@ -46,7 +46,7 @@ import org.xml.sax.helpers.DefaultHandler;
  * Parses wordml 2003 format word files.  These are single xml files
  * that predate ooxml.
  *
- * @see {@url https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats}
+ * See <a href="https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats">https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats</a>
  */
 public class WordMLParser extends AbstractXML2003Parser {
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
index 98ef504..63577e6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
@@ -32,7 +32,7 @@ public interface ID3Tags {
     /**
      * List of predefined genres.
      *
-     * @see http://www.id3.org/id3v2-00
+     * See <a href="http://www.id3.org/id3v2-00">http://www.id3.org/id3v2-00</a>
      */
     String[] GENRES = new String[] {
         /*  0 */ "Blues",
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
index c4693eb..27d58da 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java
@@ -52,7 +52,7 @@ public interface NERecogniser {
     /**
      * call for name recognition action from text
      * @param text text with possibly contains names
-     * @return map of entityType -> set of names
+     * @return map of entityType -&gt; set of names
      */
     Map<String, Set<String>> recognise(String text);
 }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
index 05ef89f..18b3d1a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/corenlp/CoreNLPNERecogniser.java
@@ -123,7 +123,7 @@ public class CoreNLPNERecogniser implements NERecogniser {
     /**
      * recognises names of entities in the text
      * @param text text which possibly contains names
-     * @return map of entity type -> set of names
+     * @return map of entity type -&gt; set of names
      */
     public Map<String, Set<String>> recognise(String text) {
         Map<String, Set<String>> names = new HashMap<>();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
index cf97194..696836e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java
@@ -149,7 +149,7 @@ public class GrobidNERecogniser implements NERecogniser{
     /**
      * recognises names of entities in the text
      * @param text text which possibly contains names
-     * @return map of entity type -> set of names
+     * @return map of entity type -&gt; set of names
      */
     public Map<String, Set<String>> recognise(String text) {
        
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/mitie/MITIENERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/mitie/MITIENERecogniser.java
index 85935e3..9157aaa 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/mitie/MITIENERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/mitie/MITIENERecogniser.java
@@ -98,7 +98,7 @@ public class MITIENERecogniser implements NERecogniser {
     /**
      * recognises names of entities in the text
      * @param text text which possibly contains names
-     * @return map of entity type -> set of names
+     * @return map of entity type -&gt; set of names
      */
     public Map<String, Set<String>> recognise(String text) {
         Map<String, Set<String>> names = new HashMap<>();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
index e7b3638..0e3d2d7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -114,7 +114,7 @@ public class NLTKNERecogniser implements NERecogniser {
     /**
      * recognises names of entities in the text
      * @param text text which possibly contains names
-     * @return map of entity type -> set of names
+     * @return map of entity type -&gt; set of names
      */
     public Map<String, Set<String>> recognise(String text) {
         Map<String, Set<String>> entities = new HashMap<>();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNERecogniser.java
index 8d2d953..de93f8c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNERecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNERecogniser.java
@@ -111,7 +111,7 @@ public class OpenNLPNERecogniser implements NERecogniser {
 
     /**
      * Creates a chain of Named Entity recognisers
-     * @param models map of entityType -> model path
+     * @param models map of entityType -&gt; model path
      * NOTE: the model path should be known to class loader.
      */
     public OpenNLPNERecogniser(Map<String, String> models){
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNameFinder.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNameFinder.java
index a1e19ae..457aa00 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNameFinder.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/opennlp/OpenNLPNameFinder.java
@@ -97,7 +97,7 @@ public class OpenNLPNameFinder implements NERecogniser {
     /**
      * finds names from given array of tokens
      * @param tokens the tokens array
-     * @return map of EntityType -> set of entity names
+     * @return map of EntityType -&gt; set of entity names
      */
     public Map<String, Set<String>> findNames(String[] tokens) {
         Span[] nameSpans = nameFinder.find(tokens);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
index 0bb6590..b525d54 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
@@ -39,7 +39,7 @@ public class AccessChecker implements Serializable {
      * will not perform any checking and will always return without
      * throwing an exception.
      * <p/>
-     * This constructor is available to allow for Tika's legacy ( <= v1.7) behavior.
+     * This constructor is available to allow for Tika's legacy (&lt;= v1.7) behavior.
      */
     public AccessChecker() {
         needToCheck = false;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index a305c4c..b0327af 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -319,15 +319,16 @@ public class PDFParserConfig implements Serializable {
     /**
      * If true, extract inline embedded OBXImages.
      * <b>Beware:</b> some PDF documents of modest size (~4MB) can contain
-     * thousands of embedded images totaling > 2.5 GB.  Also, at least as of PDFBox 1.8.5,
+     * thousands of embedded images totaling &gt; 2.5 GB.  Also, at least as of PDFBox 1.8.5,
      * there can be surprisingly large memory consumption and/or out of memory errors.
      * Set to <code>true</code> with caution.
      * <p/>
      * The default is <code>false</code>.
      * <p/>
-     * See also: {@see #setExtractUniqueInlineImagesOnly(boolean)};
      *
      * @param extractInlineImages
+     *
+     * @see #setExtractUniqueInlineImagesOnly(boolean)
      */
     public void setExtractInlineImages(boolean extractInlineImages) {
         this.extractInlineImages = extractInlineImages;
@@ -639,7 +640,6 @@ public class PDFParserConfig implements Serializable {
     /**
      * Image quality used to render the page image for OCR.
      * This does not apply to all image formats
-     * @return
      */
     public void setOcrImageQuality(float ocrImageQuality) {
         this.ocrImageQuality = ocrImageQuality;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParserOptions.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParserOptions.java
index 16d9e53..f3ff966 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParserOptions.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParserOptions.java
@@ -20,7 +20,7 @@ import org.apache.tika.metadata.Metadata;
 
 /**
  * Interface for setting options for the {@link CompressorParser} by passing
- * via the {@link ParseContext}.
+ * via the {@link org.apache.tika.parser.ParseContext}.
  */
 public interface CompressorParserOptions {
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecogniser.java
index 65242ab..518e5ed 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecogniser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecogniser.java
@@ -34,8 +34,8 @@ import java.util.Map;
 import java.util.Set;
 
 /**
- *  This is a contract for object recognisers used by {@link ObjectRecognitionParser}
- *  @see {@link TensorflowImageRecParser} for an example
+ * This is a contract for object recognisers used by {@link ObjectRecognitionParser}
+ * @see TensorflowImageRecParser
  */
 public interface ObjectRecogniser  extends Initializable {
 
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/strings/FileConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/strings/FileConfig.java
index da9deab..d3724a3 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/strings/FileConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/strings/FileConfig.java
@@ -48,8 +48,7 @@ public class FileConfig implements Serializable {
 	/**
 	 * Sets the "file" installation folder.
 	 * 
-	 * @param path
-	 *            the "file" installation folder.
+	 * @param filePath the "file" installation folder.
 	 */
 	public void setFilePath(String filePath) {
 		this.filePath = filePath;
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java
index 9183f2e..ec893a1 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java
@@ -115,7 +115,7 @@ public class StringsConfig implements Serializable {
 	/**
 	 * Returns the character encoding of the strings that are to be found.
 	 * 
-	 * @return {@see StringsEncoding} enum that represents the character
+	 * @return {@link StringsEncoding} enum that represents the character
 	 *         encoding of the strings that are to be found.
 	 */
 	public StringsEncoding getEncoding() {
@@ -163,7 +163,7 @@ public class StringsConfig implements Serializable {
 	 * Sets the character encoding of the strings that are to be found.
 	 * 
 	 * @param encoding
-	 *            {@see StringsEncoding} enum that represents the character
+	 *            {@link StringsEncoding} enum that represents the character
 	 *            encoding of the strings that are to be found.
 	 */
 	public void setEncoding(StringsEncoding encoding) {


[tika] 03/03: Removed #getDetector from ImportContextImpl

Posted by gr...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

grossws pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a366813685d322d86907395b60c4486d7a6dd26f
Author: Konstantin Gribov <gr...@gmail.com>
AuthorDate: Tue Sep 18 17:32:00 2018 +0300

    Removed #getDetector from ImportContextImpl
---
 .../main/java/org/apache/tika/example/ImportContextImpl.java   | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java b/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
index 514f05c..0cad990 100755
--- a/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
+++ b/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
@@ -50,8 +50,6 @@ public class ImportContextImpl implements ImportContext {
     private InputContext inputCtx;
     private boolean completed;
 
-    private final Detector detector;
-
     private final MediaType type;
 
     /**
@@ -78,7 +76,6 @@ public class ImportContextImpl implements ImportContext {
         this.inputCtx = ctx;
         this.ioListener = (ioListener != null) ? ioListener
                 : new DefaultIOListener(LOG);
-        this.detector = detector;
 
         Metadata metadata = new Metadata();
         if (ctx != null && ctx.getContentType() != null) {
@@ -110,13 +107,6 @@ public class ImportContextImpl implements ImportContext {
     }
 
     /**
-     * @see ImportContext#getDetector()
-     */
-    public Detector getDetector() {
-        return detector;
-    }
-
-    /**
      * @see ImportContext#hasStream()
      */
     public boolean hasStream() {