You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2019/05/09 21:09:41 UTC
svn commit: r1859041 -
/uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java
Author: schor
Date: Thu May 9 21:09:41 2019
New Revision: 1859041
URL: http://svn.apache.org/viewvc?rev=1859041&view=rev
Log:
[UIMA-6042] add progress indication mode to cas compare. add sort + dedup.
Modified:
uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java
Modified: uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java
URL: http://svn.apache.org/viewvc/uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java?rev=1859041&r1=1859040&r2=1859041&view=diff
==============================================================================
--- uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java (original)
+++ uima/uv3/uimaj-v3/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasCompare.java Thu May 9 21:09:41 2019
@@ -165,6 +165,7 @@ public class CasCompare {
private final static boolean IS_MEAS_LIST_2_ARRAY = false;
private static final String BLANKS_89 = Misc.blanks.substring(0, 89);
+ private static boolean IS_SHOW_PROGRESS = false;
/**
* Compare 2 CASes, with perhaps different type systems.
@@ -447,6 +448,7 @@ public class CasCompare {
private int maxId2;
private int miscompare_index; // used to pass back additional value from compareAllArrayElements
private int s1maxLen = 0;
+ private static int working_on;
/**
@@ -540,7 +542,9 @@ public class CasCompare {
*/
public List<Runnable> type_feature_to_runnable(String typeName, String featureBaseName, BiFunction<TOP, Feature, Runnable> c) {
List<Runnable> r = new ArrayList<>();
+ working_on = 1;
r.addAll(type_feature_to_runnable(c1, typeName, featureBaseName, c));
+ working_on = 2;
r.addAll(type_feature_to_runnable(c2, typeName, featureBaseName, c));
return r;
}
@@ -582,6 +586,10 @@ public class CasCompare {
sortFSArray((FSArray<?>)fs.getFeatureValue(feat)));
}
+ public List<Runnable> sort_dedup_FSArray(String typeName, String featureBaseName) {
+ return type_feature_to_runnable(typeName, featureBaseName, (fs, feat) ->
+ sort_dedup_FSArray(fs, feat));
+ }
public List<Runnable> sortStringArray(String typeName, String featureBaseName) {
// stringArraysToSort.add(typeName + ":" + featureBaseName);
return type_feature_to_runnable(typeName, featureBaseName, (fs, feat) ->
@@ -712,6 +720,13 @@ public class CasCompare {
}
/**
+ * call this to show progress of the compare - useful for long compares
+ */
+ public static void showProgress() {
+ IS_SHOW_PROGRESS = true;
+ }
+
+ /**
* This does the actual comparison operation of the previously specified CASes
* @return true if compare is OK
*/
@@ -726,6 +741,7 @@ public class CasCompare {
// processIndexedFeatureStructures(c1, false);
Predicate<TOP> includeFilter = isTypeMapping ? (fs -> isTypeInTgt(fs)) : null;
+ if (IS_SHOW_PROGRESS) System.out.println("Finding all FSs in cas 1");
// this next call doesn't get just the indexed ones, it includes the "reachable" ones too
c1FoundFSs = new AllFSs(c1, null, includeFilter, isTypeMapping ? typeMapper : null)
.getAllFSsAllViews_sofas_reachable()
@@ -734,6 +750,7 @@ public class CasCompare {
// c1FoundFSs = fssToSerialize; // all reachable FSs, filtered by CAS1 -> CAS2 type systems.
// processIndexedFeatureStructures(c2, false);
+ if (IS_SHOW_PROGRESS) System.out.println("Finding all FSs in cas 2");
c2FoundFSs = new AllFSs(c2, null, null, null)
.getAllFSsAllViews_sofas_reachable()
.getAllFSs(); // get just the indexed ones.
@@ -781,15 +798,31 @@ public class CasCompare {
final int sz2 = c2FoundFSs.size();
isSrcCas = true; // avoids sorting on types/features not present in ts2
+ if (IS_SHOW_PROGRESS) System.out.println("Sorting FSs in cas 1");
sort(c1FoundFSs);
isSrcCas = false; // avoids sorting on types/features not present in ts1
+ if (IS_SHOW_PROGRESS) System.out.println("Sorting FSs in cas 2");
sort(c2FoundFSs);
// miscompares.clear();
prevReport.clear();
+ int fsz = Math.max(sz1, sz2);
+ int fsz100 = Math.max(1, fsz/100);
+ int prev_done = 0;
+ if (IS_SHOW_PROGRESS) {
+ System.out.format("Starting compare loop, for %,d FSs%n", Math.max(sz1, sz2));
+ }
+
while (i1 < sz1 && i2 < sz2) {
+ if (IS_SHOW_PROGRESS) {
+ int done = Math.max(i1, i2);
+ if (done - prev_done >= fsz100) {
+ System.out.format("percent done: %d%n", (int) Math.round((done * 100F)/fsz));
+ prev_done = done;
+ }
+ }
TOP fs1 = c1FoundFSs.get(i1); // assumes the elements are in same order??
TOP fs2 = c2FoundFSs.get(i2);
@@ -1011,6 +1044,50 @@ public class CasCompare {
/**
* This is an optional pre-compare operation.
*
+ * It is identical to the method above, except that
+ * after sorting, it removes duplicates.
+
+ * @param fsArray the array to be sorted
+ * @return a runnable, which (when invoked) updates the original array with the sorted result.
+ */
+ public Runnable sort_dedup_FSArray(TOP fs, Feature feat) {
+ FSArray<?> fsArray = (FSArray<?>)(fs.getFeatureValue(feat));
+ if (fsArray == null || fsArray.size() < 2) {
+ return null;
+ }
+ TOP[] a = fsArray._getTheArray().clone();
+ clearPrevFss();
+ inSortContext = true;
+ Arrays.sort(a, (TOP afs1, TOP afs2) -> {
+ return compareRefs(afs1, afs2, null, null);
+ });
+ ArrayList<TOP> dedup = new ArrayList<>(a.length);
+ TOP prev = null;
+ for (TOP top : a) {
+ if (top == prev) {
+ continue;
+ }
+ prev = top;
+ dedup.add(top);
+ }
+ TOP[] r = dedup.toArray(new TOP[dedup.size()]);
+ if (r.length == a.length) {
+ return () -> System.arraycopy(a, 0, fsArray._getTheArray(), 0, fsArray.size());
+ } else {
+ CASImpl cas = fs.getCASImpl();
+ FSArray<?> fsa = (FSArray<?>) cas.createArray(fsArray._getTypeImpl(), r.length);
+// FSArray<?> fsa = new FSArray<>(fs.getJCas(), r.length);
+ if (IS_SHOW_PROGRESS) {
+ System.out.format("Dedup found dup in cas %d for type/feature %s, removed %d%n", working_on, feat.getName(), a.length - r.length);
+ }
+ fsa.copyFromArray(r, 0, 0, r.length);
+ return () -> fs.setFeatureValue(feat, fsa);
+ }
+ }
+
+ /**
+ * This is an optional pre-compare operation.
+ *
* Somtimes, when comparing StringArrays, the order of the elements is not significant,
* and the compare should be done ignoring order differences.
*
@@ -2036,6 +2113,9 @@ public class CasCompare {
* @return a StringBuilder with a report
*/
public static StringBuilder compareNumberOfFSsByType(CAS cas1, CAS cas2) {
+ if (IS_SHOW_PROGRESS) {
+ System.out.println("comparing the number of FSs by type");
+ }
CASImpl ci1 = (CASImpl)cas1;
CASImpl ci2 = (CASImpl)cas2;
Iterator<FsIndex_singletype<TOP>> il1 = ci1.indexRepository.streamNonEmptyIndexes(TOP.class).collect(Collectors.toList()).iterator();