You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/09/27 12:44:10 UTC

[tika] branch TIKA-3948 updated (62950c5d9 -> fa65c11c3)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3948
in repository https://gitbox.apache.org/repos/asf/tika.git


    from 62950c5d9 simplify dependencies -- further cleanups
     add c08ebcc4d Bump aws.version from 1.12.549 to 1.12.550
     add 574a76d63 Merge pull request #1338 from apache/dependabot/maven/aws.version-1.12.550
     add 2178540af Bump com.azure:azure-storage-blob from 12.23.1 to 12.24.0
     add 1515b1116 Merge pull request #1340 from apache/dependabot/maven/com.azure-azure-storage-blob-12.24.0
     add 67f7b8e99 Bump com.google.cloud:google-cloud-storage from 2.26.1 to 2.27.0
     add 37b7892ac Merge pull request #1339 from apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.27.0
     add 0c3eda925 Bump org.springframework:spring-context from 5.3.29 to 5.3.30
     add 45dff3e13 Merge pull request #1344 from apache/dependabot/maven/org.springframework-spring-context-5.3.30
     add 4b57cb850 Bump aws.version from 1.12.550 to 1.12.551
     add db89767b0 Merge pull request #1343 from apache/dependabot/maven/aws.version-1.12.551
     add 3adb2e2ad TIKA-4120 -- comment out test that breaks with recent version of gdalinfo
     add 3c8824608 TIKA-4123 -- general updates for 3.0.0-BETA -- upgrade commons-compress
     add aeb637b57 TIKA-4133 -- add a capture group metadatafilter (#1346)
     add 7da1d6b70 TIKA-4129: update tyrus
     add 3f7a89544 TIKA-4129: update javadoc, mockito, jwarc, enforcer
     add 86ec9ce43 TIKA-4129: update aws, plexus
     add 162f0cbbc TIKA-4129: update h2, plexus
     add d9f2f0840 Bump aws.version from 1.12.553 to 1.12.554
     add 46c1cae3c Merge pull request #1347 from apache/dependabot/maven/aws.version-1.12.554
     add 70a4481e0 Bump org.codehaus.mojo:versions-maven-plugin from 2.16.0 to 2.16.1
     add 72b1ad394 Merge pull request #1348 from apache/dependabot/maven/org.codehaus.mojo-versions-maven-plugin-2.16.1
     add c75460f68 Bump aws.version from 1.12.554 to 1.12.555
     add e7d296e2b Merge pull request #1349 from apache/dependabot/maven/aws.version-1.12.555
     add 55989e1f8 TIKA-4108 (#1351)
     add b2bdb5b83 Bump io.netty:netty-bom from 4.1.97.Final to 4.1.98.Final
     add d2adc3c94 Merge pull request #1352 from apache/dependabot/maven/io.netty-netty-bom-4.1.98.Final
     add 72a81a16e Tika 4137 (#1353)
     add 6871c9157 TIKA-4137 -- add a jdk21 build workflow
     add e04c47820 TIKA-4138 -- move BoilerpipeContentHandler (#1355)
     add db6f790f7 Bump org.xerial.snappy:snappy-java from 1.1.10.3 to 1.1.10.4
     add 315aec2f7 Merge pull request #1357 from apache/dependabot/maven/org.xerial.snappy-snappy-java-1.1.10.4
     add 967547418 Bump aws.version from 1.12.555 to 1.12.556
     add 4fd18a9f5 Merge pull request #1358 from apache/dependabot/maven/aws.version-1.12.556
     add 147a1682e Bump aws.version from 1.12.556 to 1.12.557
     add 960fba53d Merge pull request #1359 from apache/dependabot/maven/aws.version-1.12.557
     add caf312593 TIKA-4135 -- remove xerces2 as a dependency (#1360)
     add 5361b6d12 TIKA-1599 (#1356)
     add 79b1d9563 fix unit test that is failing in github actions' environment
     add 07198df1d Bump com.google.cloud:google-cloud-storage from 2.27.0 to 2.27.1
     add 3750ff054 Merge pull request #1361 from apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.27.1
     add 4f0a0383a Bump aws.version from 1.12.557 to 1.12.558
     add e431b8ba0 Merge pull request #1362 from apache/dependabot/maven/aws.version-1.12.558
     add ea44239bd Merge remote-tracking branch 'origin/main' into TIKA-3948
     add fa65c11c3 merge upstream

No new revisions were added by this update.

Summary of changes:
 .../{main-jdk17-build.yml => main-jdk21-build.yml} |   4 +-
 CHANGES.txt                                        |  14 ++
 pom.xml                                            |   3 +
 tika-app/pom.xml                                   |   2 +-
 .../test/java/org/apache/tika/cli/TikaCLITest.java |   4 +-
 .../src/test/resources/test-data/tika-config1.xml  |   2 +-
 tika-bom/pom.xml                                   |  13 +-
 tika-bundles/tika-bundle-standard/pom.xml          |   8 +-
 tika-core/pom.xml                                  |  10 +-
 .../filter/CaptureGroupMetadataFilter.java         | 110 ++++++++++
 .../java/org/apache/tika/utils/XMLReaderUtils.java |  10 +
 .../tika/metadata/filter/TestMetadataFilter.java   |  53 +++++
 .../apache/tika/sax/CustomErrorHandlerTest.java    |   2 +
 .../tika/sax/ErrorResistantSAXParserFactory.java   |  39 ----
 ...e.xml => TIKA-4133-capture-group-overwrite.xml} |   9 +-
 ...137-exclude.xml => TIKA-4133-capture-group.xml} |   9 +-
 .../org/apache/tika/example/TIAParsingExample.java |   6 +-
 tika-handlers/README.md                            |   2 +
 .../tika-emitter-jdbc => tika-handlers}/pom.xml    |  24 +-
 .../tika-handler-boilerpipe}/pom.xml               |  21 +-
 .../sax/boilerpipe/BoilerpipeContentHandler.java   |   0
 tika-langdetect/tika-langdetect-optimaize/pom.xml  |  13 +-
 tika-parent/pom.xml                                |  97 ++++----
 .../apache/tika/parser/gdal/TestGDALParser.java    |   6 +-
 .../src/test/resources/2.4.0-no-tesseract.txt      |   8 +-
 .../src/test/resources/2.4.0-tesseract.txt         |   8 +-
 .../src/test/resources/2.4.1-no-tesseract.txt      |   8 +-
 .../src/test/resources/2.4.1-tesseract.txt         |   8 +-
 .../tika-parser-tagsoup-module/pom.xml             |  34 +++
 .../tika/parser/html/tagsoup}/DataURIScheme.java   |   2 +-
 .../html/tagsoup}/DataURISchemeParseException.java |   2 +-
 .../parser/html/tagsoup}/DataURISchemeUtil.java    |   2 +-
 .../parser/html/tagsoup}/DefaultHtmlMapper.java    |   2 +-
 .../parser/html/tagsoup}/HtmlEncodingDetector.java |   2 +-
 .../tika/parser/html/tagsoup}/HtmlHandler.java     |   2 +-
 .../tika/parser/html/tagsoup}/HtmlMapper.java      |   2 +-
 .../tika/parser/html/tagsoup}/HtmlParser.java      |   2 +-
 .../parser/html/tagsoup}/IdentityHtmlMapper.java   |   2 +-
 .../html/tagsoup}/XHTMLDowngradeHandler.java       |   2 +-
 .../tagsoup}/charsetdetector/CharsetAliases.java   |   6 +-
 .../charsetdetector/CharsetDetectionResult.java    |   2 +-
 .../tagsoup}/charsetdetector/MetaProcessor.java    |   6 +-
 .../html/tagsoup}/charsetdetector/PreScanner.java  |   2 +-
 .../StandardHtmlEncodingDetector.java              |   6 +-
 .../charsets/ReplacementCharset.java               |   2 +-
 .../charsets/XUserDefinedCharset.java              |   2 +-
 .../org.apache.tika.detect.EncodingDetector        |   2 +-
 .../services/org.apache.tika.parser.Parser         |   2 +-
 .../StandardCharsets_unsupported_by_IANA.txt       |   0
 .../html/tagsoup}/DataURISchemeParserTest.java     |   3 +-
 .../html/tagsoup}/HtmlEncodingDetectorTest.java    |   3 +-
 .../tika/parser/html/tagsoup}/HtmlParserTest.java  |   5 +-
 .../tika/parser/html/tagsoup}/SrcDocTest.java      |   2 +-
 .../tagsoup}/StandardHtmlEncodingDetectorTest.java |   6 +-
 .../org/apache/tika/parser/html/tika-config.xml    |   4 +-
 .../resources/test-documents/big-preamble.html     |   0
 .../test-documents/boilerplate-whitespace.html     |   0
 .../test/resources/test-documents/boilerplate.html |   0
 .../testBoilerplateMissingSpace.html               |   0
 .../test/resources/test-documents/testHTML.html    |   0
 .../test-documents/testHTMLBadScript.html          |   0
 .../test-documents/testHTMLGoodScript.html         |   0
 .../testHTMLNoisyMetaEncoding_1.html               |   0
 .../testHTMLNoisyMetaEncoding_2.html               |   0
 .../testHTMLNoisyMetaEncoding_3.html               |   0
 .../testHTMLNoisyMetaEncoding_4.html               |   0
 .../test-documents/testHTML_charset_utf16le.html   | Bin
 .../test-documents/testHTML_charset_utf8.html      |   0
 .../testHTML_embedded_data_uri_js.html             |   0
 .../test-documents/testHTML_embedded_img.html      |   0
 .../testHTML_embedded_img_in_js.html               |   0
 .../resources/test-documents/testHTML_head.html    |   0
 .../test-documents/testHTML_metadata.html          |   0
 .../testHTML_metadata_two_titles.html              |   0
 .../resources/test-documents/testHTML_utf8.html    |   0
 .../test/resources/test-documents/testSrcDoc.html  |   0
 .../test-documents/testUserDefinedCharset.mhtml    |   0
 .../test/resources/test-documents/testXHTML.html   |   0
 .../src/test/resources/test-documents/tika434.html |   0
 .../pom.xml                                        |  46 +---
 .../tika-parsers-ml/tika-age-recogniser/pom.xml    |   2 +-
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |   4 +
 .../tika-parsers-standard-modules/pom.xml          |   1 -
 .../tika-parser-html-commons/README.md             |  22 --
 .../tika-parser-html-commons/pom.xml               |  74 -------
 .../tika-parser-html-module/pom.xml                |   5 +-
 .../org/apache/tika/parser/html/JSoupParser.java   | 243 +++++++++++++++++++++
 .../services/org.apache.tika.parser.Parser         |   2 +-
 .../apache/tika/parser/html/HtmlParserTest.java    | 121 ++++------
 .../org/apache/tika/parser/html/tika-config.xml    |   4 +-
 .../tika/parser/mail/MailContentHandler.java       |   4 +-
 .../tika-parser-microsoft-module/pom.xml           |   2 -
 .../tika/parser/microsoft/JackcessExtractor.java   |   6 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |   6 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   6 +-
 .../tika-parser-xml-module/pom.xml                 |   4 -
 .../tika-parsers-standard-package/pom.xml          |   2 +-
 .../apache/tika/parser/TestXMLEntityExpansion.java |   4 +-
 .../java/org/apache/tika/parser/XMLTestBase.java   |   3 +-
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |   2 +-
 .../org/apache/tika/sax/BoilerpipeHandlerTest.java |  21 +-
 tika-server/tika-server-core/pom.xml               |   2 +-
 .../tika/server/core/resource/TikaResource.java    |   1 +
 tika-server/tika-server-standard/pom.xml           |   6 +-
 104 files changed, 744 insertions(+), 449 deletions(-)
 copy .github/workflows/{main-jdk17-build.yml => main-jdk21-build.yml} (96%)
 create mode 100644 tika-core/src/main/java/org/apache/tika/metadata/filter/CaptureGroupMetadataFilter.java
 delete mode 100644 tika-core/src/test/java/org/apache/tika/sax/ErrorResistantSAXParserFactory.java
 copy tika-core/src/test/resources/org/apache/tika/config/{TIKA-3137-exclude.xml => TIKA-4133-capture-group-overwrite.xml} (81%)
 copy tika-core/src/test/resources/org/apache/tika/config/{TIKA-3137-exclude.xml => TIKA-4133-capture-group.xml} (81%)
 create mode 100644 tika-handlers/README.md
 copy {tika-pipes/tika-emitters/tika-emitter-jdbc => tika-handlers}/pom.xml (70%)
 copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-jdbc-commons => tika-handlers/tika-handler-boilerpipe}/pom.xml (66%)
 rename {tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons => tika-handlers/tika-handler-boilerpipe}/src/main/java/org/apache/tika/sax/boilerpipe/BoilerpipeContentHandler.java (100%)
 create mode 100644 tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/pom.xml
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURIScheme.java (98%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParseException.java (95%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeUtil.java (98%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DefaultHtmlMapper.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetector.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlHandler.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlMapper.java (98%)
 rename tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlParser.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/IdentityHtmlMapper.java (96%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/XHTMLDowngradeHandler.java (98%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetAliases.java (97%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetDetectionResult.java (97%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/MetaProcessor.java (92%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/PreScanner.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/StandardHtmlEncodingDetector.java (95%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/ReplacementCharset.java (96%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/XUserDefinedCharset.java (96%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/main/resources/META-INF/services/org.apache.tika.detect.EncodingDetector (93%)
 copy {tika-core/src/test => tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/src/main}/resources/META-INF/services/org.apache.tika.parser.Parser (94%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/resources/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/main/resources/org/apache/tika/parser/html/tagsoup}/StandardCharsets_unsupported_by_IANA.txt (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParserTest.java (96%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetectorTest.java (97%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlParserTest.java (99%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/SrcDocTest.java (97%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html => tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/StandardHtmlEncodingDetectorTest.java (98%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/org/apache/tika/parser/html/tika-config.xml (87%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/big-preamble.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate-whitespace.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testBoilerplateMissingSpace.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLBadScript.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLGoodScript.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_1.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_2.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_3.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_4.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf16le.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf8.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_data_uri_js.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img_in_js.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_head.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata_two_titles.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_utf8.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testSrcDoc.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testUserDefinedCharset.mhtml (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testXHTML.html (100%)
 copy tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module => tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/tika434.html (100%)
 copy tika-parsers/tika-parsers-extended/{tika-parser-sqlite3-package => tika-parser-tagsoup-package}/pom.xml (62%)
 delete mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons/README.md
 delete mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons/pom.xml
 create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/JSoupParser.java