You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/02/10 18:39:26 UTC
[tika] branch main updated: add writeContent option to RegexCaptureParser
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 91f7af73a add writeContent option to RegexCaptureParser
91f7af73a is described below
commit 91f7af73a61f098cb338c27f4e6cf1ab6541e470
Author: tballison <ta...@apache.org>
AuthorDate: Fri Feb 10 13:39:16 2023 -0500
add writeContent option to RegexCaptureParser
---
.../main/java/org/apache/tika/parser/RegexCaptureParser.java | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RegexCaptureParser.java b/tika-core/src/main/java/org/apache/tika/parser/RegexCaptureParser.java
index beb3d9d82..5a07f3c5b 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RegexCaptureParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RegexCaptureParser.java
@@ -65,6 +65,8 @@ public class RegexCaptureParser extends AbstractParser implements Initializable
return SUPPORTED_TYPES;
}
+ private boolean writeContent = false;
+
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
@@ -99,6 +101,10 @@ public class RegexCaptureParser extends AbstractParser implements Initializable
metadata.set(e.getKey(), "true");
}
}
+ if (writeContent) {
+ char[] chars = line.toCharArray();
+ handler.characters(chars, 0, chars.length);
+ }
line = reader.readLine();
}
for (Map.Entry<String, Set<String>> e : keyVals.entrySet()) {
@@ -135,4 +141,9 @@ public class RegexCaptureParser extends AbstractParser implements Initializable
matchMap.put(field, pattern);
}
}
+
+ @Field
+ public void setWriteContent(boolean writeContent) {
+ this.writeContent = writeContent;
+ }
}