You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/07/15 19:08:08 UTC
[tika] branch main updated: TIKA-3131 -- swap default values of
averageCharTolerance and spacingTolerance to match PDFBox defaults (#325)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 5f747ac TIKA-3131 -- swap default values of averageCharTolerance and spacingTolerance to match PDFBox defaults (#325)
5f747ac is described below
commit 5f747ac3c7d19224cd9d9086346251096c1109fc
Author: Clark Perkins <cl...@users.noreply.github.com>
AuthorDate: Wed Jul 15 14:08:01 2020 -0500
TIKA-3131 -- swap default values of averageCharTolerance and spacingTolerance to match PDFBox defaults (#325)
---
.../src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index 81d7e0f..bb588df 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -119,11 +119,11 @@ public class PDFParserConfig implements Serializable {
//The character width-based tolerance value used to estimate where spaces in text should be added
//Default taken from PDFBox.
- private Float averageCharTolerance = 0.5f;
+ private Float averageCharTolerance = 0.3f;
//The space width-based tolerance value used to estimate where spaces in text should be added
//Default taken from PDFBox.
- private Float spacingTolerance = 0.3f;
+ private Float spacingTolerance = 0.5f;
// The multiplication factor for line height to decide when a new paragraph starts.
//Default taken from PDFBox.