You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2018/01/24 21:45:52 UTC

svn commit: r1822166 - /uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java

Author: schor
Date: Wed Jan 24 21:45:52 2018
New Revision: 1822166

URL: http://svn.apache.org/viewvc?rev=1822166&view=rev
Log:
[UIMA-5707] convert lists to arrays

Modified:
    uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java

Modified: uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java
URL: http://svn.apache.org/viewvc/uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java?rev=1822166&r1=1822165&r2=1822166&view=diff
==============================================================================
--- uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java (original)
+++ uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java Wed Jan 24 21:45:52 2018
@@ -22,27 +22,44 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.function.IntUnaryOperator;
 import java.util.function.Predicate;
 
+import org.apache.uima.List_of_ints;
 import org.apache.uima.cas.CommonArrayFS;
 import org.apache.uima.cas.Feature;
 import org.apache.uima.cas.impl.SlotKinds.SlotKind;
+import org.apache.uima.internal.util.Int2ObjHashMap;
 import org.apache.uima.internal.util.Misc;
+import org.apache.uima.internal.util.Obj2IntIdentityHashMap;
 import org.apache.uima.internal.util.Pair;
+import org.apache.uima.internal.util.PositiveIntSet;
+import org.apache.uima.internal.util.PositiveIntSet_impl;
 import org.apache.uima.jcas.cas.BooleanArray;
 import org.apache.uima.jcas.cas.ByteArray;
+import org.apache.uima.jcas.cas.CommonList;
 import org.apache.uima.jcas.cas.DoubleArray;
 import org.apache.uima.jcas.cas.EmptyList;
 import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.FSList;
 import org.apache.uima.jcas.cas.FloatArray;
+import org.apache.uima.jcas.cas.FloatList;
 import org.apache.uima.jcas.cas.IntegerArray;
+import org.apache.uima.jcas.cas.IntegerList;
 import org.apache.uima.jcas.cas.LongArray;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.cas.NonEmptyFloatList;
+import org.apache.uima.jcas.cas.NonEmptyIntegerList;
+import org.apache.uima.jcas.cas.NonEmptyStringList;
 import org.apache.uima.jcas.cas.ShortArray;
 import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.jcas.cas.StringList;
 import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.util.IntEntry;
 
 /**
  * Used by tests for Binary Compressed de/serialization code.
@@ -135,6 +152,7 @@ import org.apache.uima.jcas.cas.TOP;
 public class CasCompare {
   
   private final static boolean IS_DEBUG_STOP_ON_MISCOMPARE = true;
+  private final static boolean IS_MEAS_LIST_2_ARRAY = false;
 
   /**
    * Compare 2 CASes, with perhaps different type systems.
@@ -322,25 +340,36 @@ public class CasCompare {
       featsByEase[3] = (FeatureImpl[]) refArrays.toArray(new FeatureImpl[refArrays.size()]);
     }
   }
-  
-//  /**
-//   * Information about list feature structures
-//   */
-//  static class ListFs {
-//    final int fss_index;     // index in c1FoundFss, c2FoundFss
-//    TOP prev;    // index in listFss (list of this structure) of first found predecessor
-//    boolean has_multiple_predecessors;  // true if more than one predecessor
-//    ListFs(int i) {
-//      fss_index = i;
-//    }
-//  }
+    
+  // must always be true, need to rework convert lists to arrays if not
+  private static final boolean IS_CANONICAL_EMPTY_LISTS = true;
 
-  final private Map<TypeImpl, FeatLists> type2featLists = new HashMap<>();
+  /* ****************************************************
+   * Data Structures for converting lists to arrays
+   * ****************************************************/
+  private static final CommonList removed_list_marker = new NonEmptyFSList<TOP>();
   
-//  /**
-//   * key = fs.id  value = ListFs instance
-//   */
-//  final private Int2ObjHashMap<ListFs, ListFs> listFss = new Int2ObjHashMap<>(ListFs.class);
+  /** 
+   * key = _id, value = arraylist holding well-formed list with this node in it 
+   */
+  final private Int2ObjHashMap<ArrayList<CommonList>, ArrayList<CommonList>> map_e_to_a_list = 
+            new Int2ObjHashMap(ArrayList.class);
+  
+  /** 
+   * set of list elements determined to be members of a non-linear structure 
+   */
+  final private PositiveIntSet non_linear_list_elements = new PositiveIntSet_impl();
+  
+  /**
+   * a map from list nodes which might be removed, to their place in the fss array list
+   *   The index is 1 more, to avoid colliding with the 0 value, used for missing value
+   */
+  final private Obj2IntIdentityHashMap<CommonList> node_indexes = 
+      new Obj2IntIdentityHashMap<CommonList>(CommonList.class, removed_list_marker);
+  
+  final private PositiveIntSet list_successor_seen = new PositiveIntSet_impl();
+  
+  final private Map<TypeImpl, FeatLists> type2featLists = new HashMap<>();
   
   final private CASImpl c1;
   final private CASImpl c2;
@@ -352,7 +381,6 @@ public class CasCompare {
     /** if true, continues comparison and reporting after finding the first miscompare */
   private boolean isCompareAll = false;
   private boolean isCompareIds = false;
-  private boolean isCanonicalEmptyLists = true;
 //    private boolean compareFSArraysAsSets = false;
     
 //    /** true when that FS._id (an Array of some kind) has been sorted */
@@ -387,6 +415,8 @@ public class CasCompare {
   private boolean isUsingStringCongruenceSets = false;
   private final TypeImpl fsArrayType;
     
+  private int maxId1;
+  private int maxId2;
 
           
   public CasCompare(CASImpl c1, CASImpl c2) {
@@ -423,9 +453,9 @@ public class CasCompare {
     isCompareIds = v;
   }
   
-  public void compareCanonicalEmptyLists(boolean v) {
-    isCanonicalEmptyLists = v; 
-  }
+//  public void compareCanonicalEmptyLists(boolean v) {
+//    isCanonicalEmptyLists = v; 
+//  }
   
   /**
    * Add a set of strings that should be considered equal when doing string comparisons.
@@ -476,12 +506,18 @@ public class CasCompare {
           
       int i1 = 0;
       int i2 = 0;
-      final int sz1 = c1FoundFSs.size();
-      final int sz2 = c2FoundFSs.size();
+
+      final int maxId1 = c1.peekNextFsId();
+      final int maxId2 = c2.peekNextFsId();
+          
+      // convert_linear_lists_to_arrays may add more items
       
-//      lists2arrays(c1FoundFSs);
-//      lists2arrays(c2FoundFSs);
+      convert_linear_lists_to_arrays(c1FoundFSs);
+      convert_linear_lists_to_arrays(c2FoundFSs);
  
+      final int sz1 = c1FoundFSs.size();  // max size for comparing, includes added arrays converted from lists
+      final int sz2 = c2FoundFSs.size();
+
       isSrcCas = true;   // avoids sorting on types/features not present in ts2
       sort(c1FoundFSs);
       
@@ -494,7 +530,23 @@ public class CasCompare {
         TOP fs1 = c1FoundFSs.get(i1);  // assumes the elements are in same order??
         TOP fs2 = c2FoundFSs.get(i2);
 
-        if (isCanonicalEmptyLists) {
+        if (null == fs1) {  // nulls at end indicate list elements converted to arrays
+          if (null != fs2) {
+            System.err.format("%,d Feature Structures in CAS2 with no matches in CAS2, e.g. %s%n",
+                sz2 - i2, fs2.toString(2));
+            return ! allOk;
+          } else {           
+            return allOk;
+          }
+        }
+        if (null == fs2) {  // nulls at end indicate list elements converted to arrays
+          System.err.format("%,d Feature Structures in CAS1 with no matches in CAS2, e.g. %s%n",
+              sz1 - i1, fs1.toString(2));
+          return ! allOk;
+        }
+        
+        
+        if (IS_CANONICAL_EMPTY_LISTS) {
           if (fs1 instanceof EmptyList && !(fs2 instanceof EmptyList)) {
             int start = i1;
             while (true) {
@@ -669,65 +721,197 @@ public class CasCompare {
     return () -> System.arraycopy(a, 0, fsArray._getTheArray(), 0, fsArray.size());
   }
   
-//  private lists2arrays(ArrayList<TOP> fss) {
-//    listFss.clear();
-//    populate_lists(listFss, fss); 
-//    
-//    TOP[] head = new TOP[1];  // place to store head
-//   
-//    for (IntEntry<ListFs> item : listFss) {
-//      boolean hasNoLoop = searchForLoops_forward(item);
-//      hasNoLoop = searchForLoops_backwards(item, hasNoLoop, head); // sets head
-//      if (hasNoLoop) {
-//        convertList2Array(head, fss);  // removes from listFss and from fss, adds to fss
-//      } 
-//      remove_list_from_listFss(head); // has been processed, remove all elements  
-//        
-//        
-//      }
-//    }
-//    
-//  }
-//  
-//  private void populate_lists(ArrayList<TOP> fss) {
-//    int sz = fss.size();
-//    for (int i = 0; i < sz; i++) {
-//      TOP item = fss.get(i);
-//      if (item instanceof CommonList) {
-//        ListFs existing = listFss.get(item._id);
-//        if (existing == null) {
-//          listFss.put(i, new ListFs(item._id));
-//          addBackChain(item);
-//        } 
-//      }
-//    }     
-//  }
-//  
-//  private void addBackChain(TOP item) {
-//    
-//    TOP current = item;
-//    while (true) {
-//      CommonList next = (current instanceof NonEmptyList) 
-//                          ? ((NonEmptyList)item).getCommonTail()
-//                          : null;
-//      if (next == null) return;
-//      if (!(next instanceof CommonList)) {
-//        // CAS Compare found UIMA list with list element item whose next element was next,
-//        // which was invalid.
-//      }
-//      assert next instanceof CommonList;
-//      
-//      ListFs existing = listFss.get(next._id());
-//      if (existing != null) {
-//        if (existing.prev != null && existing.prev != current) {
-//          existing.has_multiple_predecessors = true;
-//          
-//        
-//        
-//      
-//      
-//    }
-//  }
+  /* ******************************************************************************* 
+   *     Convert UIMA Lists to arrays, to make the compare go faster
+   * *******************************************************************************/
+  
+  private void convert_linear_lists_to_arrays(ArrayList<TOP> fss) {
+    map_e_to_a_list.clear();
+    non_linear_list_elements.clear();
+    node_indexes.clear();
+    
+    int sz = fss.size();
+    for (int i = 0; i < sz; i++) {
+      TOP fs = fss.get(i);
+    
+      if (! (fs instanceof CommonList)) continue;                   // skip: not list node
+      CommonList node = (CommonList) fs;
+      if (node.isEmpty()) {
+        fss.set(i, null); // clear it, empty list elements don't need to be compared
+        continue;
+      }
+
+      if (non_linear_list_elements.contains(node._id())) continue;  // skip: in non-linear list
+      if (null !=  map_e_to_a_list.get(node._id())) {
+        node_indexes.put(node,  i + 1);                             // case: added as a successor
+        continue;                                                   // skip: already seen/processed
+      }
+ 
+      node_indexes.put(node, i + 1);                   // in case we have to remove this later
+      if (!node.isEmpty()) {
+        ArrayList<CommonList> al = new ArrayList<>();    // start a new arraylist
+        al.add(node);                                    // add this node
+        map_e_to_a_list.put(node._id(), al);
+
+        if (addSuccessors(node, al)) continue;  
+        
+        // some successor was in a non-linear situation        
+        move_to_non_linear(al);
+      }      
+    }
+    
+    if (IS_MEAS_LIST_2_ARRAY) {
+      System.out.format("CasCompare converting lists to Arrays, "
+          + "nbr of list elements considered: %,d, number of looped lists skipped: %,d%n",  
+          node_indexes.size(), non_linear_list_elements.size());
+    }
+    CASImpl view = fss.get(0)._casView;
+    TypeSystemImpl tsi = view.getTypeSystemImpl();
+    
+    Set<ArrayList<CommonList>> processed = Collections.newSetFromMap(new IdentityHashMap<>());
+    
+    for (IntEntry<ArrayList<CommonList>> ie : map_e_to_a_list) {
+      ArrayList<CommonList> e = ie.getValue();
+      if (processed.add(e)) {
+        convert_to_array(e, fss, view, tsi); // changes list element to highest pseudo fs, adds array elements
+      }
+    }
+    if (IS_MEAS_LIST_2_ARRAY) {
+      System.out.format("CasCompare converted %,d lists to Arrays%n", processed.size());            
+    }
+
+    // allow gc
+    map_e_to_a_list.clear();
+    non_linear_list_elements.clear();    
+    node_indexes.clear();
+  }
+  
+  /**
+   * Convert an array list to a uima array  (int, float, fs, string) 
+   *   - add to fss
+   *   - go thru fss and null out list elements
+   *   
+   * @param al -
+   * @param fss -
+   */
+  private void convert_to_array(
+      ArrayList<CommonList> al, 
+      ArrayList<TOP> fss,
+      CASImpl view,
+      TypeSystemImpl tsi) {
+    
+    CommonList e = al.get(0);  
+    if (e instanceof FSList) {
+      assert al.size() > 0;
+      FSArray<TOP> fsa = new FSArray<>(tsi.fsArrayType, view, al.size());
+      int i = 0;
+      for (CommonList n : al) {
+        assert !n.isEmpty();
+        fsa.set(i++, ((NonEmptyFSList)n).getHead());
+        fss.set(node_indexes.get(n) - 1, null);
+      }
+      fss.add(fsa);
+    } else if (e instanceof IntegerList) {
+      IntegerArray a = new IntegerArray(tsi.intArrayType, view, al.size());
+      int i = 0;
+      for (CommonList n : al) {
+        a.set(i++, (n instanceof EmptyList) 
+                       ? Integer.MIN_VALUE 
+                       : ((NonEmptyIntegerList)n).getHead());
+        fss.set(node_indexes.get(n) - 1, null);
+      }
+      fss.add(a);
+    } else if (e instanceof FloatList) {
+      FloatArray a = new FloatArray(tsi.floatArrayType, view, al.size());
+      int i = 0;
+      for (CommonList n : al) {
+        a.set(i++, (n instanceof EmptyList) 
+                       ? Float.MIN_VALUE 
+                       : ((NonEmptyFloatList)n).getHead());
+        fss.set(node_indexes.get(n) - 1, null);
+      }
+      fss.add(a);
+    } else if (e instanceof StringList) {
+      StringArray a = new StringArray(tsi.stringArrayType, view, al.size());
+      int i = 0;
+      for (CommonList n : al) {
+        a.set(i++, (n instanceof EmptyList) 
+                       ? null 
+                       : ((NonEmptyStringList)n).getHead());
+        fss.set(node_indexes.get(n) - 1, null);
+      }
+      fss.add(a);
+    } else Misc.internalError();
+    
+  }
+  
+  /**
+   * walk down list, adding successors, looking for loops
+   *   - each element is added to the array list, and also to the map from id -> array list
+   *   - if loop found, stop and return false
+   *   
+   *   - before adding element, see if already in map from id -> array list
+   *     -- if so, couple the array lists
+   * @param node -
+   * @param al -
+   * @return false if loop found
+   */
+  private boolean addSuccessors(CommonList node, ArrayList al) {
+    try {
+      list_successor_seen.add(node._id());  // starts reset, reset at end
+      
+      while (!node.isEmpty()) {
+        node = node.getCommonTail();
+        if (node == null || node.isEmpty()) break;
+        
+        if (!list_successor_seen.add(node._id())) return false;  // stop if loop is found
+  
+        ArrayList<CommonList> other = map_e_to_a_list.get(node._id());
+        if (null != other) { 
+          couple_array_lists(al, other, node);
+          return true;  // rest of list already walked
+        } else {
+          al.add(node);
+          map_e_to_a_list.put(node._id(), al);   // every element maps to containing al
+        }
+      }
+      return true;
+    } finally {
+      list_successor_seen.clear();
+    }
+  }
+  
+  /**
+   * merge a2 to follow a1, starting from position where commonNode is in a2
+   * @param a1 -
+   * @param a2 -
+   * @param commonNode -
+   */
+  private void couple_array_lists(ArrayList<CommonList> a1, ArrayList<CommonList> a2, CommonList commonNode) {
+    int i = 0;
+    int sz2 = a2.size();
+    for (; i < sz2; i++) {
+      if (commonNode == a2.get(i)) break;
+    }
+    
+    if (i == sz2) Misc.internalError();
+    
+    for (; i < sz2; i++) {
+      CommonList node = a2.get(i);
+      map_e_to_a_list.put(node._id(), a1);  // remove a2 value, put in a1 value
+      a1.add(node);
+    }    
+  }
+  
+  private void move_to_non_linear(ArrayList<CommonList> al) {
+    for (CommonList e : al) {
+      map_e_to_a_list.remove(e._id());
+      non_linear_list_elements.add(e._id());
+    }
+  }
+
+  
+  
   
   private void clearPrevFss() {
     prevCompare.clear();
@@ -777,7 +961,7 @@ public class CasCompare {
     }
  
     if (isCompareIds && !inSortContext) {
-      if (fs1._id != fs2._id) {
+      if (fs1._id < maxId1 && fs2._id < maxId2 && fs1._id != fs2._id) {
         mismatchFs(fs1, fs2, "IDs miscompare");        
         return Integer.compare(fs1._id, fs2._id);
       }
@@ -1160,7 +1344,7 @@ public class CasCompare {
   private int compareRefResult(TOP rfs1, TOP rfs2) {
     
     // exception: treat canonical empty lists
-    if (!inSortContext && isCanonicalEmptyLists && rfs1 instanceof EmptyList) {
+    if (!inSortContext && IS_CANONICAL_EMPTY_LISTS && rfs1 instanceof EmptyList) {
 //      if (prev1.size() <= 0 || prev2.size() <= 0) {
         return 0;
 //      }
@@ -1348,6 +1532,13 @@ public class CasCompare {
    */
   private int sortCompare(TOP scFs1, TOP scFs2) {  
     
+    if (scFs1 == null) {
+      return (scFs2 == null) ? 0 : 1;
+    } 
+    if (scFs2 == null) {
+      return -1;
+    }
+    
 //    miscompares.clear();
     prev1.clear();
     prev2.clear();