You are viewing a plain text version of this content. The canonical link for it is here.
Posted to derby-commits@db.apache.org by kr...@apache.org on 2007/03/26 08:48:38 UTC

svn commit: r522440 - /db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java

Author: kristwaa
Date: Sun Mar 25 23:48:37 2007
New Revision: 522440

URL: http://svn.apache.org/viewvc?view=rev&rev=522440
Log:
DERBY-2450: Clob.Position returning wrong value when operating on Reader. Pattern-search algorithm rewritten.

Patch contributed by Anurag Shekhar.

Modified:
    db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java

Modified: db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java?view=diff&rev=522440&r1=522439&r2=522440
==============================================================================
--- db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java (original)
+++ db/derby/code/trunk/java/engine/org/apache/derby/impl/jdbc/EmbedClob.java Sun Mar 25 23:48:37 2007
@@ -371,57 +371,22 @@
   /**
    * Determines the character position at which the specified substring
    * <code>searchstr</code> appears in the <code>CLOB</code>.  The search
-	* begins at position <code>start</code>. The method uses the following
-	* algorithm for the search
-	*
-	*
-	* 1)Is the length of the current pattern string to be matched greater than 256 ?
-	*
-	*	1.1)If "YES"
-	*		Extract the first 256 bytes as the current pattern to be matched
-	*
-	*		If "NO"
-	*		Make the pattern string itself as the current pattern to be matched
-	*
-	*	1.2)Initialize a variable that will indicate the character in the pattern
-	*		String being matched to zero. (say currPatternPos)
-	*
-	* 2)Read the 256 bytes of the Clob from the database
-	*
-	*	2.1)Initialize a variable that will indicate the current index in this array
-	*		to zero. (say currClobPos)
-	*	2.2)Exit if there are no more characters to be read in the Clob
-	*
-	* 3)Initialize a bestMatchPosition that will keep storing the next occurence of the 
-	*	first character in the pattern.This will be useful when we want to go back and 
-	*	start searching in the Clob array when a mismatch occurs.
-	*
-	* 4)Do the characters in currPatternPos and currClobPos match ?
-	*	4.1)If "YES" 
-	*
-	*		Increment currPatternPos and currClobPos. 
-	*
-	*		If currPatternPos is not 0 and the character in the 
-	*		currentClobPos is the same as the first character in the
-	*		pattern set bestMatchPosition = currentClobPos
-	*
-	*	4.2)If "No" 
-	*
-	*		set currClobPos = bestMatchPosition
-	*		set currPatternPos = 0
-	*
-	*	4.3)If currPatternPos > 256 
-	*		4.3.1)If "YES" 
-	*			  Return the current position in the Clob if all characters 
-	*			  have been matched otherwise perform step 1 to fetch the
-	*			  next 256 characters and increment matchCount
-	*		4.3.2)If "NO" repeat Step 4
-	*
-	*	4.4)If currClobPos > 256
-	*		4.4.1)If "YES"
-	*			  Repeat step 2 to fetch next 256 characters
-	*		4.4.2)If "NO"
-	*			  Repeat step 4
+   * begins at position <code>start</code>. The method uses the following
+   * algorithm for the search
+   * If the clob is materialized in string use String.indexOf
+   * else
+   * Read a block of 256 chars from start position
+   * compare the chars with the searchString
+   * If a match is found
+   * increment the matchCount
+   * if the matchCount is equal to lenght of searchString return 
+   * Remember the position where the stream has a char equal to the first char
+   * of the searchString. This position we will use to start next try for match
+   * if the current match fails.
+   * if a mismatch is found 
+   * start fresh match from the position remembered if there is no postion 
+   * found for next match start with current position + 1
+   * 
    * @param searchStr the substring for which to search
    * @param start the position at which to begin searching; the first position
    *              is 1
@@ -460,200 +425,71 @@
             }
             else // we have a stream
             {
-				Object synchronization = getConnectionSynchronization();
+                Object synchronization = getConnectionSynchronization();
                 synchronized (synchronization)
                 {
                     pushStack = !getEmbedConnection().isClosed();
                     if (pushStack)
                         setupContextStack();
+                    int matchCount = 0;
+                    long pos = start - 1;
+                    long newStart = -1;
+                    Reader reader = getCharacterStreamAtPos (start, this);
+                    char [] tmpClob = new char [256];
+                    boolean reset;
+                    for (;;) {
+                        reset = false;
+                        int readCount = reader.read (tmpClob);
+                        if (readCount == -1)
+                            return -1;
+                        if (readCount == 0)
+                            continue;            
+                        for (int clobOffset = 0; 
+                                    clobOffset < readCount; clobOffset++) {
+                            if (tmpClob [clobOffset] 
+                                            == searchStr.charAt (matchCount)) {
+                                //find the new starting position in 
+                                // case this match is unsuccessful
+                                if (matchCount != 0 && newStart == -1 
+                                        && tmpClob [clobOffset] 
+                                        == searchStr.charAt (0)) {
+                                    newStart = pos + clobOffset + 1;
+                                }
+                                matchCount ++;
+                                if (matchCount == searchStr.length()) {
+                                    //return after converting the position 
+                                    //to 1 based index
+                                    return pos + clobOffset 
+                                            - searchStr.length() + 1 + 1;
+                                }
+                            }
+                            else {
+                                if (matchCount > 0) {
+                                    matchCount = 0;
+                                    if (newStart == -1) {
+                                        continue;
+                                    }
+                                    if (newStart < pos) {
+                                        pos = newStart;
+                                        reader.close();
+                                        reader = getCharacterStreamAtPos 
+                                                    (newStart + 1, this);
+                                        newStart = -1;
+                                        reset = true;
+                                        break;
+                                    }                        
+                                    clobOffset = (int) (newStart - pos) - 1;
+                                    newStart = -1;
+                                    continue;
+                                }
+                            }
+                        }
+                        if (!reset) {
+                            pos += readCount;
+                        }
+                    }
 
-					char[] tmpClob = new char[256];
-					int patternLength = searchStr.length();
-
-restartPattern:
-					for (;;) {
-
-					//System.out.println("RESET " + start);
-						UTF8Reader clobReader = getCharacterStreamAtPos(start, synchronization);
-						if (clobReader == null)
-							return -1;
-
-
-
-						// start of any match of the complete pattern.
-
-						int patternIndex = 0;
-						char[] tmpPattern = null;
-						boolean needPattern = true;
-
-						// how many characters of the patter segment we have matched
-						int matchCount = 0;
-
-						long currentPosition = start;
-						int clobOffset = -1;
-						int read = -1;
-
-						// absolute position of a possible match
-						long matchPosition = -1;
-
-
-						// absolute position of the next possible match
-						long nextBestMatchPosition = -1;
-						//System.out.println("restartPattern: " + start);
-
-
-search:
-						for (;;)
-						{
-							//System.out.println("search: " + needPattern + " -- " + clobOffset);
-							if (needPattern) {
-
-								String tmpPatternS;
-								//Keep extracting substrings of length 256 from the pattern string
-								//and use these substrings for comparison with the data from the Clob
-								//if the subString remaining has a length > 256 then extract 256 bytes
-								//and return it
-								//otherwise return the remaining string 
-								if ((patternLength - patternIndex) > 256)
-									tmpPatternS = searchStr.substring(patternIndex , patternIndex + 256);
-								else
-									tmpPatternS = searchStr.substring(patternIndex , patternLength);
-
-								tmpPattern = tmpPatternS.toCharArray();
-								needPattern = false;
-								matchCount = 0;
-
-							}
-
-							if (clobOffset == -1) {
-								
-								read = clobReader.read(tmpClob, 0, tmpClob.length);
-							//System.out.println("MORE DATA " + read);
-								if (read == -1)
-									return -1;
-
-								if (read == 0)
-									continue search;
-
-								clobOffset = 0;
-							}
-
-
-							// find matches within our two temp arrays.
-compareArrays:
-							for (; clobOffset < read; clobOffset++) {
-
-								//System.out.println("compareArrays " + clobOffset);
-
-								char clobC = tmpClob[clobOffset];
-
-
-								if (clobC == tmpPattern[matchCount])
-								{
-									if (matchPosition == -1) {
-										matchPosition = currentPosition + clobOffset;
-									}
-
-									matchCount++;
-
-									// have we matched the entire pattern segment
-									if (matchCount == tmpPattern.length)
-									{
-										// move onto the next segment.
-										patternIndex += tmpPattern.length;
-										if (patternIndex == patternLength) {
-											// complete match !!
-											clobReader.close();
-											//System.out.println("COMPLETE@" + matchPosition);
-											return matchPosition;
-										}
-
-										needPattern = true;
-										//We need to increment clobOffset
-										//to start comparison from the 
-										//next character since the current
-										//character has already been compared
-										clobOffset++;
-										continue search;
-
-									}
-
-									if (clobC == tmpPattern[0]) {
-
-										// save the next best start position.
-
-										// must be the first character of the actual pattern
-										if (patternIndex == 0) {
-
-											// must not be just a repeat of the match of the first character
-											if (matchCount != 1) {
-
-												// must not have a previous next best.
-
-												if (nextBestMatchPosition == -1) {
-													nextBestMatchPosition = currentPosition + clobOffset;
-												}
-
-											}
-
-										}
-									}
-
-									continue compareArrays;
-								}
-								else
-								{
-									// not a match
-									//
-									// 
-									if (matchPosition != -1) {
-										// failed after we matched some amount of the pattern
-										matchPosition = -1;
-
-										// See if we found a next best match
-										if (nextBestMatchPosition == -1)
-										{
-											// NO - just continue on, re-starting at this character
-
-											if (patternIndex != 0) {
-												needPattern = true;
-												continue search;
-											}
-										}
-										else if (nextBestMatchPosition >= currentPosition)
-										{
-											// restart in the current array
-											clobOffset = (int) (nextBestMatchPosition - currentPosition);
-											nextBestMatchPosition = -1;
-									
-											if (patternIndex != 0) {
-												needPattern = true;
-												continue search;
-											}
-										}
-										else
-										{
-											clobReader.close();
-											start = nextBestMatchPosition;
-											continue restartPattern;
-										}
-
-										clobOffset--; // since the continue will increment it
-										matchCount = 0;
-										continue compareArrays;
-									}
-									
-									// no current match, just continue
-								}
-							}
-
-							currentPosition += read;
-
-							// indicates we need to read more data
-							clobOffset = -1;
-						}
-					}
-				}
+                }
             }
         }
         catch (Throwable t)