You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commons-dev@ws.apache.org by sc...@apache.org on 2007/04/14 00:01:35 UTC

svn commit: r528680 - in /webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments: ./ utils/

Author: scheu
Date: Fri Apr 13 15:01:34 2007
New Revision: 528680

URL: http://svn.apache.org/viewvc?view=rev&rev=528680
Log:
WSCOMMONS-179
Contributor:Rich Scheuerle
Addition of ByteSearch utility which is a performant byte searching algorithm.  Upgraded several of the Axiom attachment classes to use
buffering and the ByteSearch utility to improve speed.

I have run all axis2 and axiom tests with this change.  Please let me know if other improvements are needed.

Thanks!

Added:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/BoundaryPushbackInputStream.java
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/utils/ByteSearch.java
Modified:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/Attachments.java
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/MIMEBodyPartInputStream.java
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/PushbackFilePartInputStream.java

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/Attachments.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/Attachments.java?view=diff&rev=528680&r1=528679&r2=528680
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/Attachments.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/Attachments.java Fri Apr 13 15:01:34 2007
@@ -53,6 +53,7 @@
      * has the ability to "push back" or "unread" one byte.
      */
     PushbackInputStream pushbackInStream;
+    int PUSHBACK_SIZE = 1000;
 
     /**
      * <code>attachmentsMap</code> stores the Data Handlers of the already parsed Mime Body Parts.
@@ -141,7 +142,7 @@
         // do we need to wrap InputStream from a BufferedInputStream before
         // wrapping from PushbackStream
         pushbackInStream = new PushbackInputStream(inStream,
-                                                   (this.boundary.length + 2));
+                                                   PUSHBACK_SIZE);
 
         // Move the read pointer to the beginning of the first part
         // read till the end of first boundary
@@ -479,7 +480,7 @@
                     MIMEBodyPartInputStream partStream;
                     byte[] buffer = new byte[fileStorageThreshold];
                     partStream = new MIMEBodyPartInputStream(pushbackInStream,
-                                                             boundary, this);
+                                                             boundary, this, PUSHBACK_SIZE);
                     int count = 0;
                     do {
                         int len;
@@ -508,7 +509,7 @@
             } else {
                 MIMEBodyPartInputStream partStream;
                 partStream = new MIMEBodyPartInputStream(pushbackInStream,
-                                                         boundary, this);
+                                                         boundary, this, PUSHBACK_SIZE);
                 part = new PartOnMemory(partStream);
             }
 

Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/BoundaryPushbackInputStream.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/BoundaryPushbackInputStream.java?view=auto&rev=528680
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/BoundaryPushbackInputStream.java (added)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/BoundaryPushbackInputStream.java Fri Apr 13 15:01:34 2007
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2004,2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.axiom.attachments;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+import org.apache.axiom.attachments.utils.ByteSearch;
+
+/**
+ * An InputStream that reads bytes up to a boundary.
+ * The boundary is not logically part of the bytes to read.
+ * The wrapped PushbackInputStream is set to to the byte after
+ * the boundary once the bytes are read.
+ * The boundary is not logically returned.
+ * 
+ * There are two forms that are supported, where . is a byte 
+ * 
+ * .......................boundary
+ * 
+ * and
+ * 
+ * ..................../r/nboundary
+ * 
+ * In both cases, only the bytes (.) are returned.
+ *
+ */
+public class BoundaryPushbackInputStream extends InputStream {
+    PushbackInputStream is;
+
+    boolean boundaryFound;
+    byte[] boundary;
+    int rnBoundaryLen;  // '/r/nboundary' length
+    
+    byte[] buffer;
+    int bufferSize;     // BufferSize
+    int numBytes;       // Number of bytes in the buffer
+    int index = -1;     // Current index in buffer
+    int bIndex = -1;    // Index of boundary or /r/nboundary
+    final int MIN_BUF_SIZE = 32;
+    protected static final int BOUNDARY_NT_FOUND = -1;
+    
+    // Skip search array
+    private short[] skip = null;
+
+    /**
+     * @param inStream
+     * @param boundary
+     * @param pushBackSize
+     */
+    public BoundaryPushbackInputStream(PushbackInputStream inStream, byte[] boundary, int pushBackSize) {
+        super();
+        this.is = inStream;
+        this.boundary = boundary;
+        this.rnBoundaryLen = boundary.length + 2;
+        this.bufferSize = Math.max(rnBoundaryLen * 2, pushBackSize);
+    }
+    
+    /**
+     * Method readFromStream
+     *
+     * @param b
+     * @param start
+     * @param length
+     *
+     * @return
+     *
+     * @throws java.io.IOException
+     */
+    private final int readFromStream(
+            final byte[] b, final int start, final int length)
+            throws java.io.IOException {
+
+        // We need to make sure to capture enough data to 
+        // actually search for the rn + boundary
+        int minRead = Math.max(rnBoundaryLen * 2, length);
+        minRead = Math.min(minRead, length - start);
+
+        int br = 0;
+        int brTotal = 0;
+
+        do {
+            // Read data into the buffer
+            br = is.read(b, brTotal + start, length - brTotal);
+
+            if (br > 0) {
+                brTotal += br;
+            }
+        } while ((br > -1) && (brTotal < minRead));
+
+        return (brTotal != 0)
+                ? brTotal
+                : br;
+    }
+    
+    
+    /**
+     * @param b
+     * @return
+     * @throws java.io.IOException
+     */
+    private final int readFromStream(final byte[] b)
+            throws java.io.IOException {
+        return readFromStream(b, 0, b.length);
+    }
+
+    /* (non-Javadoc)
+     * @see java.io.InputStream#read(byte[])
+     */
+    public int read(byte[] b) throws java.io.IOException {
+        return read(b, 0, b.length);
+    }
+    
+    /**
+     * Read from the boundary delimited stream.
+     * Generally, this won't be called...callers will
+     * most likely call the read(byte[]..) methods
+     * @return The byte read, or -1 if endof stream.
+     *
+     * @throws java.io.IOException
+     */
+    public int read() throws java.io.IOException {
+
+        byte[] b = new byte[1];    
+        int read = read(b);
+
+        if (read < 0) {
+            return -1;
+        } else {
+            return b[0];
+        }
+    }
+
+    /**
+     * Read from the boundary delimited stream.
+     * @param b is the array to read into.
+     * @param off is the offset
+     * @param len
+     * @return the number of bytes read. -1 if endof stream.
+     *
+     * @throws java.io.IOException
+     */
+    public synchronized int read(byte[] b, final int off, final int len)
+            throws java.io.IOException {
+
+        // If already found the buffer, then we are done
+        if (boundaryFound) {
+            return -1;
+        }
+
+        // The first time called, read a chunk of data
+        if (buffer == null) {    // Allocate the buffer.
+            buffer = new byte[bufferSize];
+            numBytes = readFromStream(buffer);
+
+            if (numBytes < 0) {
+                buffer = null;
+                boundaryFound = true;
+            }
+
+            index = 0;
+
+            // Finds the boundary pos.
+            bIndex = boundaryPosition(buffer, index, numBytes);
+            if (bIndex >=0) {
+                unread();  // Unread pushback inputstream
+            }
+        }
+
+        int bwritten = 0;    // Number of bytes written to b
+
+        
+        do {
+            
+            // Never read to the of the buffer because
+            // the boundary may span buffers.
+            int bcopy = Math.min((numBytes - rnBoundaryLen) - index,
+                    len - bwritten);
+
+            // Never read past the boundary
+            if (bIndex >= 0) {
+                bcopy = Math.min(bcopy, bIndex - index);
+            }
+
+            // Copy the bytes 
+            if (bcopy > 0) {
+                System.arraycopy(buffer, index, b, off + bwritten, bcopy);
+
+                bwritten += bcopy;
+                index += bcopy;
+            }
+
+            if (index == bIndex) {
+                boundaryFound = true;  
+
+            } else if (bwritten < len) {    
+                
+                // If more data is needed,
+                // create a temporary buffer to span 
+                // the straggling bytes in the current buffer
+                // and the new yet unread bytes
+                byte[] dstbuf = buffer;
+
+                // Move straggling bytes from the current buffer
+                int movecnt = numBytes - index;
+                System.arraycopy(buffer, index, dstbuf, 0, movecnt);
+
+                // Read in the new data.
+                int readcnt = readFromStream(dstbuf, movecnt,
+                        dstbuf.length - movecnt);
+
+                if (readcnt < 0) {
+                    buffer = null;
+                    boundaryFound = true;
+                    throw new java.io.IOException("End of Stream, but boundary not found");
+                }
+
+                numBytes = readcnt + movecnt;
+                buffer = dstbuf;
+                index = 0;             // start at the begining.
+
+                // just move the boundary by what we moved
+                if (bIndex >=0) {
+                    bIndex -= movecnt;
+                } else {
+                    bIndex = boundaryPosition(
+                            buffer, index,
+                            numBytes);       
+                    if (bIndex >= 0) {
+                        unread();  // Unread pushback inputstream
+                    }
+                }
+            }
+        }
+        // read till we get the amount or the stream is finished.
+        while (!boundaryFound && (bwritten < len));
+
+        if (boundaryFound) {
+            buffer = null;    // GC the buffer
+        }
+
+        return bwritten;
+    }
+    
+    /**
+     * Unread the bytes past the buffer
+     */
+    private void unread() throws IOException {
+        
+        int i = bIndex;
+        if (buffer[i] == 13) { // If /r, must be /r/nboundary
+            i = i + this.rnBoundaryLen;
+        } else { 
+            i = i + boundary.length;
+        }
+        if (numBytes - i > 0) {
+            is.unread(buffer, i, numBytes - i);
+        }
+    }
+    
+    /**
+     * Read from the boundary delimited stream.
+     *
+     * @param searchbuf
+     * @param start
+     * @param end
+     * @return The position of the boundary.
+     *
+     */
+    protected int boundaryPosition(byte[] searchbuf, int start, int end) throws java.io.IOException  {
+
+        if (skip == null) {
+            skip = ByteSearch.getSkipArray(boundary, true);
+        }
+        int foundAt = ByteSearch.skipSearch(boundary, true,searchbuf, start, end, skip);
+
+        // First find the boundary marker
+        if (foundAt >= 0) {    // Something was found.
+            if (foundAt + rnBoundaryLen > end) {
+                foundAt = -1;  // Not sure why this check is done
+            }
+        }
+        
+        // Backup 2 if the boundary is preceeded by /r/n
+        // The /r/n are treated as part of the boundary
+        if (foundAt >=2) {
+            if (searchbuf[foundAt-2] == 13 &&
+                searchbuf[foundAt-1] == 10) {
+                foundAt = foundAt -2;
+            }
+        }
+
+        return foundAt;
+    }
+    
+    public boolean getBoundaryStatus()
+    {
+        return boundaryFound;
+    }
+}
\ No newline at end of file

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/MIMEBodyPartInputStream.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/MIMEBodyPartInputStream.java?view=diff&rev=528680&r1=528679&r2=528680
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/MIMEBodyPartInputStream.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/MIMEBodyPartInputStream.java Fri Apr 13 15:01:34 2007
@@ -20,88 +20,138 @@
 import java.io.InputStream;
 import java.io.PushbackInputStream;
 
+/**
+ * @author scheu
+ *
+ */
 public class MIMEBodyPartInputStream extends InputStream {
+    BoundaryPushbackInputStream bpis;
     PushbackInputStream inStream;
+    Attachments parent = null;
+    boolean done = false;
 
-    boolean boundaryFound;
-
-    Attachments parent;
-
-    byte[] boundary;
-
+    /**
+     * @param inStream
+     * @param boundary
+     */
     public MIMEBodyPartInputStream(PushbackInputStream inStream, byte[] boundary) {
-        super();
-        this.inStream = inStream;
-        this.boundary = boundary;
+        this (inStream, boundary, null, boundary.length + 2);
     }
 
+    /**
+     * @param inStream
+     * @param boundary
+     * @param parent
+     */
+    public MIMEBodyPartInputStream(PushbackInputStream inStream, byte[] boundary, Attachments parent) {
+        this (inStream, boundary, parent, boundary.length + 2);
+    }
+    
+    /**
+     * @param inStream
+     * @param boundary
+     * @param parent
+     * @param pushbacksize <= size of pushback buffer on inStream
+     */
     public MIMEBodyPartInputStream(PushbackInputStream inStream,
-                                   byte[] boundary, Attachments parent) {
-        this(inStream, boundary);
+            byte[] boundary, Attachments parent, int pushbacksize) {
+        bpis = new BoundaryPushbackInputStream(inStream, boundary, pushbacksize);
+        this.inStream = inStream;
         this.parent = parent;
     }
 
+    /* (non-Javadoc)
+     * @see java.io.InputStream#read()
+     */
     public int read() throws IOException {
-        if (boundaryFound) {
+        if (done) {
             return -1;
         }
-        // read the next value from stream
-        int value = inStream.read();
-        // A problem occured because all the mime parts tends to have a /r/n at
-        // the end. Making it hard to transform them to correct DataSources.
-        // This logic introduced to handle it
-        //TODO look more in to this && for a better way to do this
-        if (value == 13) {
-            value = inStream.read();
-            if (value != 10) {
-                inStream.unread(value);
-                return 13;
-            } else {
-                value = inStream.read();
-                if ((byte) value != boundary[0]) {
-                    inStream.unread(value);
-                    inStream.unread(10);
-                    return 13;
-                }
-            }
-        } else {
-            return value;
+        int rc = bpis.read();
+        if (getBoundaryStatus()) {
+            finish();
         }
+        return rc;
+    }
 
-        // read value is the first byte of the boundary. Start matching the
-        // next characters to find a boundary
-        int boundaryIndex = 0;
-        while ((boundaryIndex < (boundary.length - 1))
-                && ((byte) value == boundary[boundaryIndex])) {
-            value = inStream.read();
-            boundaryIndex++;
+    /* (non-Javadoc)
+     * @see java.io.InputStream#read(byte[], int, int)
+     */
+    public int read(byte[] b, int off, int len) throws IOException {
+        if (done) {
+            return 0;
+        } 
+        int rc = bpis.read(b, off, len);
+        if (getBoundaryStatus()) {
+            finish();
         }
+        return rc;
+    }
 
-        if (boundaryIndex == (boundary.length - 1)) { // boundary found
-            boundaryFound = true;
-            // read the end of line character
-            if ((value = inStream.read()) == 45) {
-                //check whether end of stream
-                //Last mime boundary should have a succeeding "--"
-                if ((value = inStream.read()) == 45 && parent != null) {
-                    parent.setEndOfStream(true);
-                }
-            } else {
-                inStream.read();
-            }
-
-            return -1;
+    /* (non-Javadoc)
+     * @see java.io.InputStream#read(byte[])
+     */
+    public int read(byte[] b) throws IOException {
+        if (done) {
+            return 0;
+        } 
+        int rc = bpis.read(b);
+        if (getBoundaryStatus()) {
+            finish();
         }
+        return rc;
+    }
 
-        // Boundary not found. Restoring bytes skipped.
-        // write first skipped byte, push back the rest
-
-        if (value != -1) { // Stream might have ended
-            inStream.unread(value);
+    /**
+     * Called when done reading
+     * This method detects trailing -- and alerts the parent
+     * @throws IOException
+     */
+    private void finish() throws IOException {
+        if (!done) {
+            int one = inStream.read();
+            
+            // Accept --
+            if (one != -1) {
+                int two = inStream.read();
+                if (two != -1) {
+                    if (one == 45 && two == 45) {
+                        // Accept --
+                        if (parent != null) {
+                            parent.setEndOfStream(true);
+                        }
+                    } else {
+                        inStream.unread(two);
+                        inStream.unread(one);
+                    }
+                } else {
+                    inStream.unread(one);
+                }
+            }
+            
+            one = inStream.read();
+            
+            // Accept /r/n
+            if (one != -1) {
+                int two = inStream.read();
+                if (two != -1) {
+                    if (one == 13 && two == 10) {
+                        // Accept /r/n and continue
+                    } else {
+                        inStream.unread(two);
+                        inStream.unread(one);
+                    }
+                } else {
+                    inStream.unread(one);
+                }
+            }
         }
-
-        inStream.unread(boundary, 0, boundaryIndex);
-        inStream.unread(10);
-        return 13;
+        done = true;
+    }
+    
+    
+    public boolean getBoundaryStatus()
+    {
+        return bpis.getBoundaryStatus();
     }
 }

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/PushbackFilePartInputStream.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/PushbackFilePartInputStream.java?view=diff&rev=528680&r1=528679&r2=528680
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/PushbackFilePartInputStream.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/PushbackFilePartInputStream.java Fri Apr 13 15:01:34 2007
@@ -52,30 +52,31 @@
         return data;
     }
 
-    public int read(byte b[], int off, int len) throws IOException {
+    public int read(byte[] b, int off, int len) throws IOException {
         if (count > 0) {
-            if (b == null) {
-                throw new NullPointerException();
-            } else if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length)
-                    || ((off + len) < 0)) {
-                throw new IndexOutOfBoundsException();
-            } else if (len == 0) {
-                return 0;
+            // Get the start of the internal buffer and the length to copy
+            int start = buffer.length - count;
+            int copyLen = Math.min(len, count); 
+            
+            // Copy the bytes to b
+            System.arraycopy(buffer, start, b, off, copyLen);
+            count = count - copyLen;
+            
+            // If more bytes are needed, read them from the inStream
+            if (len > copyLen) {
+                return inStream.read(b, off + copyLen, len - copyLen) + copyLen; 
+            } else {
+                return len;
             }
-            int bytesCopied;
-            if (count < len) {
-                System.arraycopy(buffer, (buffer.length - count), b, off, count);
-                bytesCopied = count;
-                count = 0;
-                return bytesCopied;
-            }
-            System.arraycopy(buffer, (buffer.length - count), b, off, len);
-            count -= len;
-            return len;
+        } else {
+            return inStream.read(b, off, len);
         }
-        return inStream.read(b, off, len);
     }
 
+    public int read(byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+    
     public int available() throws IOException {
         return count + inStream.available();
     }

Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/utils/ByteSearch.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/utils/ByteSearch.java?view=auto&rev=528680
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/utils/ByteSearch.java (added)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/attachments/utils/ByteSearch.java Fri Apr 13 15:01:34 2007
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2004,2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.axiom.attachments.utils;
+
+/**
+ * ByteSearch
+ *
+ * Various byte array searching utilities.
+ * This includes a "skip search", which is a 
+ * an optimized search for finding a byte pattern in 
+ * a large byte array.
+ *
+ * @author Richard Scheuerle (scheu@us.ibm.com)
+ *
+ */
+public class ByteSearch {
+
+    
+    /**
+     * skipSearch
+     * Searches the com.ibm.env.source byte stream for the pattern
+     * in the indicated direction.
+     * @param pattern  byte[] 
+     * @param direction true if forward, false if backward
+     * @param buffer byte[] to search
+     * @param start index to start search
+     * @param end index to end search (end index is not within the search)
+     * @param skipArray short[256]  A skipArray generated from a call to 
+     * generateSkipArray. 
+     * @return index or -1 if not found
+     */
+    public static int skipSearch(byte[] pattern,
+                                 boolean direction,
+                                 byte[] buffer,
+                                 int start,
+                                 int end,
+                                 short[] skip) {
+        
+        int patternLength = pattern.length;
+        
+        // If patternLength is larger than buffer,
+        // return not found
+        if (patternLength > (end - start)) {
+            return -1;
+        }
+        
+        if (direction) {
+            int k = 0;
+            for (k = start + patternLength - 1; 
+                k < end; // end is exclusive
+                k += skip[buffer[k] & (0xff)])   // SKIP NOTE below
+            {
+                try {
+                    // k is the location in the buffer
+                    // that may match the last byte in the pattern.
+                    if (isEqual(pattern, buffer, (k-patternLength)+1, end)) {
+                        return (k-patternLength)+1;
+                    }
+                    // SKIP NOTE: The next k index is calculated from 
+                    // the skip array.  Basically if the k byte is not
+                    // within the pattern, we skip ahead the length of the
+                    // pattern.  Otherwise we skip ahead a distance less
+                    // than the length.
+                } catch (ArrayIndexOutOfBoundsException e) {
+                    throw e;
+                }
+            }
+        } else {
+            for (int k = end - patternLength; 
+                k <= start;
+                k -= skip[buffer[k] & (0xff)]) 
+            {
+                try {
+                    // k is the location in the buffer
+                    // that may match the first byte in the pattern.
+                    if (isEqual(pattern, buffer, k, end)) {
+                        return k;
+                    }
+                } catch (ArrayIndexOutOfBoundsException e) {
+                    throw e;
+                }
+            }
+        }
+        return -1;            
+    }
+    
+    /**
+     * skipArray
+     * Builds a skip array for this pattern and direction.
+     * The skipArray is used in the optimized skipSearch
+     * @param pattern
+     * @param direction
+     * @return short[256]
+     */
+    public static short[] getSkipArray(byte[] pattern,
+                    boolean direction) {
+        
+        // The index key is a byte.
+        // The short[key] is the number of bytes that can
+        // be skipped that won't match the pattern
+        short[] skip = new short[256];
+
+        // If a byte is not within pattern, then we can
+        // skip ahead the entire length of the pattern.
+        // So fill the skip array with the pattern length
+        java.util.Arrays.fill(skip, (short) pattern.length);
+
+        if (direction) {
+            // If the byte is found in the pattern,
+            // this affects how far we can skip.
+            // The skip distance is the distance of the
+            // character from the end of the pattern.
+            // The last character in the pattern is excluded.
+            for (int k = 0; k < pattern.length -1; k++) {
+                skip[pattern[k] & (0xff)] = (short)(pattern.length - k - 1);
+            }
+        } else {
+            for (int k = pattern.length-2; k >= 0; k--) {
+                skip[pattern[k] &(0xff)] = (short)(pattern.length - k - 1);
+            }
+        }
+        return skip;
+    }
+    
+    /**
+     * 
+     * isEqual
+     * @param pattern
+     * @param buffer
+     * @param start index
+     * @param end index
+     * @return true if the bytes in buffer[start] equal pattern
+     * 
+     */
+    public static boolean isEqual(byte[] pattern, byte[] buffer, int start, int end) {
+//        if (pattern.length >= end-start) {
+        if (pattern.length > end-start) {
+            return false;
+        }
+        for (int j=0; j<pattern.length; j++) {
+            if (pattern[j] != buffer[start+j]) {
+                return false;
+            }
+        }
+        return true;
+    }
+    
+    /**
+     * search
+     * Look for the search bytes in the bytes array using a straight search.
+     * Use the skip search if the number of bytes to search is large or
+     * if this search is performed frequently.
+     * @param search byte[]
+     * @param bytes byte[] to search
+     * @param start starting index
+     * @param end end index (exclusive
+     * @param direction boolean (true indicates forward search, false is backwards search
+     * @return index or -1
+     */
+    public static int search(byte[] search, 
+            byte[] bytes, int start, int end, 
+            boolean direction) {
+        
+        int idx = -1;
+        if (search == null || search.length == 0 ||
+            bytes == null || bytes.length == 0 ||
+            start < 0 || end <= 0) {
+            return idx;
+        }
+        
+        // Search byte bytes array
+        if (direction) {
+            for (int i=start; idx < 0 && i< end; i++) {
+                if (bytes[i] == search[0]) {
+                    // Potential match..check remaining bytes
+                    boolean found = true;  // assume found
+                    for (int i2=1; found && i2<search.length; i2++) {
+                        if (i+i2 >= end) {
+                            found = false;
+                        } else {
+                            found = (bytes[i+i2] == search[i2]); 
+                        }
+                    }
+                    // If found match, set return idx
+                    if (found) {
+                        idx = i;
+                    }
+                }
+            }
+        } else {
+            for (int i=end-1; idx < 0 && i>=start; i--) {
+                if (bytes[i] == search[0]) {
+                    // Potential match..check remaining bytes
+                    boolean found = true;  // assume found
+                    for (int i2=1; found && i2<search.length; i2++) {
+                        if (i+i2 >= end) {
+                            found = false;
+                        } else {
+                            found = (bytes[i+i2] == search[i2]); 
+                        }
+                    }
+                    // If found match, set return idx
+                    if (found) {
+                        idx = i;
+                    }
+                }
+            }
+        }
+
+        return idx;
+    }
+}
+
+ 



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@ws.apache.org
For additional commands, e-mail: commons-dev-help@ws.apache.org