You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/11/16 17:18:46 UTC
svn commit: r880827 -
/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
Author: jukka
Date: Mon Nov 16 16:18:46 2009
New Revision: 880827
URL: http://svn.apache.org/viewvc?rev=880827&view=rev
Log:
TIKA-321: Optimize type detection speed
Use the new MagicDetector class in MagicMatch to avoid costly BigInteger calculations.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java?rev=880827&r1=880826&r2=880827&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java Mon Nov 16 16:18:46 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -16,43 +16,56 @@
*/
package org.apache.tika.mime;
-// JDK imports
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.math.BigInteger;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.tika.detect.MagicDetector;
+import org.apache.tika.metadata.Metadata;
/**
* Defines a magic match.
- *
- *
*/
class MagicMatch implements Clause {
- private int offsetStart;
-
- private int offsetEnd;
-
- private String type;
+ private static final MediaType MATCH =
+ new MediaType("x-tika", "magic-match");
- private BigInteger mask;
+ private final int length;
- private BigInteger value;
-
- private int length;
+ private final MagicDetector detector;
MagicMatch(int offsetStart, int offsetEnd, String type, String mask,
String value) throws MimeTypeException {
- this.offsetStart = offsetStart;
- this.offsetEnd = offsetEnd;
- this.type = type;
-
- byte[] decoded = decodeValue(type, value);
- this.length = decoded.length;
- this.value = new BigInteger(decoded);
+ byte[] patternBytes = decodeValue(type, value);
+ byte[] maskBytes;
if (mask != null) {
- this.mask = new BigInteger(decodeValue(type, mask));
- this.value = this.value.and(this.mask);
+ maskBytes = decodeValue(type, mask);
+ } else {
+ maskBytes = new byte[patternBytes.length];
+ Arrays.fill(maskBytes, (byte) 0xff);
+ }
+ this.length = Math.max(patternBytes.length, maskBytes.length);
+
+ if (patternBytes.length < length) {
+ byte[] buffer = new byte[length];
+ System.arraycopy(patternBytes, 0, buffer, 0, patternBytes.length);
+ patternBytes = buffer;
+ } else if (maskBytes.length < length) {
+ byte[] buffer = new byte[length];
+ Arrays.fill(buffer, (byte) 0xff);
+ System.arraycopy(maskBytes, 0, buffer, 0, maskBytes.length);
+ maskBytes = buffer;
}
+
+ for (int i = 0; i < length; i++) {
+ patternBytes[i] &= maskBytes[i];
+ }
+
+ this.detector = new MagicDetector(
+ MATCH, patternBytes, maskBytes, offsetStart, offsetEnd);
}
private byte[] decodeValue(String type, String value)
@@ -148,23 +161,13 @@
}
public boolean eval(byte[] data) {
- for (int i = offsetStart; i <= offsetEnd; i++) {
- if (data.length < (this.length + i)) {
- // Not enough data...
- return false;
- }
- byte[] array = new byte[this.length];
- System.arraycopy(data, i, array, 0, this.length);
- BigInteger content = new BigInteger(array);
- // System.out.println("Evaluating " + content);
- if (mask != null) {
- content = content.and(mask);
- }
- if (value.equals(content)) {
- return true;
- }
+ try {
+ return detector.detect(
+ new ByteArrayInputStream(data), new Metadata()) == MATCH;
+ } catch (IOException e) {
+ // Should never happen with a ByteArrayInputStream
+ return false;
}
- return false;
}
public int size() {
@@ -172,8 +175,7 @@
}
public String toString() {
- return "[" + offsetStart + ":" + offsetEnd
- + "(" + type + ")-" + mask + "#" + value + "]";
+ return detector.toString();
}
}