You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2023/06/08 21:11:12 UTC

[tika] branch TIKA-4060 updated: Hex values in a match regex need escaping to be treated as hex

This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch TIKA-4060
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/TIKA-4060 by this push:
     new 04021e427 Hex values in a match regex need escaping to be treated as hex
04021e427 is described below

commit 04021e4276606bb2ca8837444651da049f21c222
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu Jun 8 21:55:49 2023 +0100

    Hex values in a match regex need escaping to be treated as hex
---
 tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 461ad6128..5c8cbbcb1 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5627,12 +5627,12 @@
     <glob pattern="*.aac"/>
     <magic priority="30">
       <!-- Without ID3 tags -->
-      <match type="regex" value="FF(F0|F1|F8|F9)(40|41|44|45|48|49|4C|4D|50|51|54|55|58|59|5C|5D|60|61|64|65|68|69|6C|6D|70|71|80|81|84|85|88|89|8C|8D|90|91|94|95|98|99|9C|9D|A0|A1|A4|A5|A8|A9|AC|AD|B0|B1)(00|01|20|40|41|60|80|81|60|A0|C0|C1|E0)" offset="0" />
+      <match type="regex" value="\\xFF(\\xF0|\\xF1|\\xF8|\\xF9)(\\x40|\\x41|\\x44|\\x45|\\x48|\\x49|\\x4C|\\x4D|\\x50|\\x51|\\x54|\\x55|\\x58|\\x59|\\x5C|\\x5D|\\x60|\\x61|\\x64|\\x65|\\x68|\\x69|\\x6C|\\x6D|\\x70|\\x71|\\x80|\\x81|\\x84|\\x85|\\x88|\\x89|\\x8C|\\x8D|\\x90|\\x91|\\x94|\\x95|\\x98|\\x99|\\x9C|\\x9D|\\xA0|\\xA1|\\xA4|\\xA5|\\xA8|\\xA9|\\xAC|\\xAD|\\xB0|\\xB1)(\\x00|\\x01|\\x20|\\x40|\\x41|\\x60|\\x80|\\x81|\\x60|\\xA0|\\xC0|\\xC1|\\xE0)" offset="0" />
     </magic>
     <magic priority="40">
       <!-- With ID3 tags at the start -->
       <match value="ID3" type="string" offset="0">
-         <match type="regex" value="FF(F0|F1|F8|F9)(40|41|44|45|48|49|4C|4D|50|51|54|55|58|59|5C|5D|60|61|64|65|68|69|6C|6D|70|71|80|81|84|85|88|89|8C|8D|90|91|94|95|98|99|9C|9D|A0|A1|A4|A5|A8|A9|AC|AD|B0|B1)(00|01|20|40|41|60|80|81|60|A0|C0|C1|E0)" offset="512:2048" />
+         <match type="regex" value="\\xFF(\\xF0|\\xF1|\\xF8|\\xF9)(\\x40|\\x41|\\x44|\\x45|\\x48|\\x49|\\x4C|\\x4D|\\x50|\\x51|\\x54|\\x55|\\x58|\\x59|\\x5C|\\x5D|\\x60|\\x61|\\x64|\\x65|\\x68|\\x69|\\x6C|\\x6D|\\x70|\\x71|\\x80|\\x81|\\x84|\\x85|\\x88|\\x89|\\x8C|\\x8D|\\x90|\\x91|\\x94|\\x95|\\x98|\\x99|\\x9C|\\x9D|\\xA0|\\xA1|\\xA4|\\xA5|\\xA8|\\xA9|\\xAC|\\xAD|\\xB0|\\xB1)(\\x00|\\x01|\\x20|\\x40|\\x41|\\x60|\\x80|\\x81|\\x60|\\xA0|\\xC0|\\xC1|\\xE0)" offset="512:2048" />
       </match>
     </magic>
   </mime-type>