You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2019/03/31 19:29:42 UTC
svn commit: r1856689 - in /poi/trunk/src:
java/org/apache/poi/poifs/filesystem/FileMagic.java
testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java
Author: centic
Date: Sun Mar 31 19:29:42 2019
New Revision: 1856689
URL: http://svn.apache.org/viewvc?rev=1856689&view=rev
Log:
Add some more variants of HTML with preceding newline which we see frequently
in the large regression test corpus
Modified:
poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java
poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java
Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java?rev=1856689&r1=1856688&r2=1856689&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java Sun Mar 31 19:29:42 2019
@@ -78,7 +78,17 @@ public enum FileMagic {
/** PDF document */
PDF("%PDF"),
/** Some different HTML documents */
- HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8), "<HTML".getBytes(UTF_8)),
+ HTML("<!DOCTYP".getBytes(UTF_8),
+ "<html".getBytes(UTF_8),
+ "\n\r<html".getBytes(UTF_8),
+ "\r\n<html".getBytes(UTF_8),
+ "\r<html".getBytes(UTF_8),
+ "\n<html".getBytes(UTF_8),
+ "<HTML".getBytes(UTF_8),
+ "\r\n<HTML".getBytes(UTF_8),
+ "\n\r<HTML".getBytes(UTF_8),
+ "\r<HTML".getBytes(UTF_8),
+ "\n<HTML".getBytes(UTF_8)),
WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
// keep UNKNOWN always as last enum!
/** UNKNOWN magic */
@@ -110,11 +120,12 @@ public enum FileMagic {
return UNKNOWN;
}
- private static boolean findMagic(byte[] cmp, byte[] actual) {
+ private static boolean findMagic(byte[] expected, byte[] actual) {
int i=0;
- for (byte m : cmp) {
- byte d = actual[i++];
- if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
+ for (byte expectedByte : expected) {
+ byte actualByte = actual[i++];
+ if ((actualByte != expectedByte &&
+ (expectedByte != 0x70 || (actualByte != 0x10 && actualByte != 0x20 && actualByte != 0x40)))) {
return false;
}
}
Modified: poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java?rev=1856689&r1=1856688&r2=1856689&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java Sun Mar 31 19:29:42 2019
@@ -38,6 +38,10 @@ public class TestFileMagic {
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.HTML, FileMagic.valueOf("\n\r<html".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.HTML, FileMagic.valueOf("\n<html".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
+ assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
try {
FileMagic.valueOf("some string");
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org