You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by zo...@apache.org on 2002/10/04 01:25:15 UTC

cvs commit: xml-xalan/java/src/org/apache/xalan/xsltc/dom DupFilterIterator.java

zongaro     2002/10/03 16:25:14

  Modified:    java/src/org/apache/xalan/xsltc/dom Tag: XSLTC_DTM
                        DupFilterIterator.java
  Log:
  Changed code to use a merge sort with a sweep through after to eliminate
  duplicates rather than using insertion sort.  The insertion sort performed
  poorly when there were large numbers of unique nodes.
  Might want to look at a synthesis of the two.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.8.10.2  +118 -32   xml-xalan/java/src/org/apache/xalan/xsltc/dom/DupFilterIterator.java
  
  Index: DupFilterIterator.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/dom/DupFilterIterator.java,v
  retrieving revision 1.8.10.1
  retrieving revision 1.8.10.2
  diff -u -r1.8.10.1 -r1.8.10.2
  --- DupFilterIterator.java	17 Apr 2002 18:13:44 -0000	1.8.10.1
  +++ DupFilterIterator.java	3 Oct 2002 23:25:14 -0000	1.8.10.2
  @@ -116,45 +116,131 @@
   
   	// KeyIndex iterators are always relative to the root node, so there
   	// is never any point in re-reading the iterator (and we SHOULD NOT).
  -	if ((_source instanceof KeyIndex) && (_data != null)) return this;
  +	if ((_source instanceof KeyIndex) && (_data != null)) {
  +            return this;
  +        }
   
   	// If the _data array is populated, and the current start node is
   	// equal to the new start node, we know we already have what we need.
   	if ((_data == null) || (node != _startNode)) {
  +            _startNode = node;
  +            _source.setStartNode(node);
  +            int[] data = new int[INIT_DATA_SIZE];
  +            int sourceNodeCount = 0;
   
  -	    _startNode = node;
  -	    _last = 0;
  -	    _source.setStartNode(node);
  -	    _data = new int[INIT_DATA_SIZE];
  -
  -	    // Gather all nodes from the source iterator, eliminate dups
  -	    while ((node = _source.next()) != END) {
  -		if (_last == _data.length) {
  -		    int[] newArray = new int[_data.length * 2];
  -		    System.arraycopy(_data, 0, newArray, 0, _last);
  -		    _data = newArray;
  -		}
  -
  -		// Go through all nodes in turn
  -		for (i=0; i<_last; i++) {
  -		    // Is this a duplicate of the new node
  -		    if (_data[i] == node) {
  -			break;
  -		    }
  -		    // Should the new node be inserted at this position?
  -		    else if (_data[i] > node) {
  -			for (j = _last++; j>i; j--)
  -			    _data[j] = _data[j-1];
  -			_data[i] = node;
  -			break;
  -		    }
  -		}
  -		if (i == _last) _data[_last++] = node;
  -	    }
  +	    // Gather all nodes from the source iterator
  +            while ((node = _source.next()) != END) {
  +                if (sourceNodeCount == data.length) {
  +                    int[] newArray = new int[data.length * 2];
  +                    System.arraycopy(data, 0, newArray, 0, sourceNodeCount);
  +                    data = newArray;
  +                }
  +                data[sourceNodeCount++] = node;
  +            }
  +
  +            // %REVIEW%:  Is this the best approach?  Code used to keep nodes
  +            // in sorted order as they were retrieved from _source.next(),
  +            // inserting at appropriate point at each step, eliminating
  +            // duplicates.  That was a win when there were relatively few
  +            // unique nodes.  When there were very many, the insertions
  +            // became very expensive.  Perhaps we could use that approach
  +            // while the number of nodes is small, and then switch to sorting
  +            // after the fact when it reaches a threshold.
  +
  +            // Factor out the trivial case to avoid overhead of sort
  +            if (sourceNodeCount > 1) {
  +                // Sort source nodes using merge sort:  Merge two sorted
  +                // subranges of the array into new sorted ranges, beginning
  +                // with trivially sorted subranges of size 1.
  +                int[] mergeArray = new int[sourceNodeCount];
  +
  +                int doubleMergeSize = 1;
  +                for (int mergeSize = 1;
  +                     mergeSize < sourceNodeCount;
  +                     mergeSize = doubleMergeSize) {
  +                    int mLow = 0;
  +                    doubleMergeSize = mergeSize + mergeSize;
  +
  +                    // Merge adjacent subranges of the array of appropriate size
  +                    for (int r1Low = 0;
  +                         r1Low < sourceNodeCount;
  +                         r1Low = r1Low + doubleMergeSize) {
  +                        final int r2Low = r1Low + mergeSize;
  +                        final int numElemsInSecondSet =
  +                                    Math.min(mergeSize, sourceNodeCount-r2Low);
  +                        merge(data, r1Low, mergeSize,
  +                              data, r2Low, numElemsInSecondSet,
  +                              mergeArray, mLow);
  +                        mLow = mLow + mergeSize + numElemsInSecondSet;
  +                    }
  +
  +                    // Now switch the arrays to double the merger ranges
  +                    int[] tempArr = mergeArray;
  +                    mergeArray = data;
  +                    data = tempArr;
  +                }
  +
  +                // Sweep through to see whether there are any duplicates
  +                for (i = 0;
  +                     i < sourceNodeCount-1 && data[i] != data[i+1];
  +                     i++);
  +
  +                // If any duplicates were found, start compacting them out
  +                if (i < sourceNodeCount-1) {
  +                    int nextUniqueIdx = i+1;
  +                    for (j = i+2; j < sourceNodeCount; j++) {
  +                        if (data[nextUniqueIdx-1] != data[j]) {
  +                            data[nextUniqueIdx++] = data[j];
  +                        }
  +                    }
  +                    sourceNodeCount = nextUniqueIdx;
  +                }
  +            }
  +            _last = sourceNodeCount;
  +            _data = data;
   	}
   
   	_current = 0;  // set to beginning 
   	return this;
  +    }
  +
  +    /**
  +     *
  +     * Merge two sorted subranges of arrays into a target array.  The resulting
  +     * elements in the target array will be in sorted order.
  +     *
  +     * @param a An array containing the first sorted subrange
  +     * @param aLow The starting index for the first array's subrange
  +     * @param aCount The number of elements in the first array's subrange
  +     * @param b An array containing the second sorted subrange
  +     * @param bLow The starting index for the second array's subrange
  +     * @param bCount The number of elements in the second array's subrange
  +     * @param t The target array which will contain the two merged subranges
  +     * @param tLow The starting index in the target array for the merged result
  +     *
  +     */
  +    private static void merge(int[] a, int aLow, int aCount,
  +                              int[] b, int bLow, int bCount,
  +                              int[] t, int tLow) {
  +        int aHigh = aLow + aCount - 1;
  +        int bHigh = bLow + bCount - 1;
  +        int tHigh = tLow + aCount + bCount - 1;
  +
  +        for (int i = tLow; i <= tHigh; i++) {
  +            if (aLow > aHigh) {
  +                for (int j = i; j <= tHigh; j++) {
  +                    t[j] = b[bLow++];
  +                }
  +                break;
  +            } else if (bLow > bHigh) {
  +                for (int j = i; j <= tHigh; j++) {
  +                    t[j] = a[aLow++];
  +                }
  +                break;
  +            }
  +
  +            t[i] = (a[aLow] < b[bLow]) ? a[aLow++] : b[bLow++];
  +        }
       }
   
       /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org