You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2021/08/12 09:38:05 UTC

[uima-uimaj] branch origin/feature/UIMA-6374-Create-CAS-de--serialization-test-suite created (now db276e3)

This is an automated email from the ASF dual-hosted git repository.

rec pushed a change to branch origin/feature/UIMA-6374-Create-CAS-de--serialization-test-suite
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git.


      at db276e3  [UIMA-6374] Create CAS (de)serialization test suite

This branch includes the following new commits:

     new db276e3  [UIMA-6374] Create CAS (de)serialization test suite

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[uima-uimaj] 01/01: [UIMA-6374] Create CAS (de)serialization test suite

Posted by re...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch origin/feature/UIMA-6374-Create-CAS-de--serialization-test-suite
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit db276e30c5590a41eaa1cdc11ff7333fccd4479e
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Thu Aug 12 11:37:05 2021 +0200

    [UIMA-6374] Create CAS (de)serialization test suite
    
    - Allow configuration of empty/null elements in StringArrayFS when the MultiFeatureRandomCasDataSuite is used
    - Fixes XMI and XCAS tests against the MultiFeatureRandomCasDataSuite
    - Improve stability of order of feature structures in CasToComparableText by introducing a non-recursive "featureHash"
---
 ...rializationDeserialization_BINARY_TSI_Test.java |   8 +-
 ...asSerializationDeserialization_BINARY_Test.java |   8 +-
 ...serialization_COMPRESSED_FILTERED_TSI_Test.java |   8 +-
 ...izationDeserialization_SERIALIZED_TSI_Test.java |   8 +-
 .../CasSerializationDeserialization_XCAS_Test.java |  14 +-
 ...izationDeserialization_XMI_1_0_PRETTY_Test.java |  14 +-
 ...izationDeserialization_XMI_1_1_PRETTY_Test.java |  14 +-
 .../uima/cas/serdes/CasToComparableText.java       | 161 ++++++++++++++++++++-
 .../uima/cas/serdes/SerDesCasIOTestUtils.java      |  42 +++---
 .../uima/cas/serdes/datasuites/CasDataSuite.java   |  26 ++++
 .../datasuites/MultiFeatureRandomCasDataSuite.java |  71 ++++++++-
 .../datasuites/MultiTypeRandomCasDataSuite.java    |  89 ++++++++++--
 .../ProgrammaticallyCreatedCasDataSuite.java       |  42 +++++-
 .../generators/MultiFeatureRandomCasGenerator.java |  72 +++++++--
 14 files changed, 495 insertions(+), 82 deletions(-)

diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_TSI_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_TSI_Test.java
index 2ef4c45..303ada1 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_TSI_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_TSI_Test.java
@@ -29,6 +29,8 @@ import static org.apache.uima.util.CasLoadMode.REINIT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -68,11 +70,13 @@ public class CasSerializationDeserialization_BINARY_TSI_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build(),
+            MultiTypeRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_Test.java
index d28a3bd..3da78df 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_BINARY_Test.java
@@ -30,6 +30,8 @@ import static org.apache.uima.util.CasLoadMode.REINIT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -69,11 +71,13 @@ public class CasSerializationDeserialization_BINARY_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build(),
+            MultiTypeRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_COMPRESSED_FILTERED_TSI_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_COMPRESSED_FILTERED_TSI_Test.java
index 2662d9a..f7d487d 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_COMPRESSED_FILTERED_TSI_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_COMPRESSED_FILTERED_TSI_Test.java
@@ -28,6 +28,8 @@ import static org.apache.uima.util.CasLoadMode.REINIT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -67,11 +69,13 @@ public class CasSerializationDeserialization_COMPRESSED_FILTERED_TSI_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build(),
+            MultiTypeRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_SERIALIZED_TSI_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_SERIALIZED_TSI_Test.java
index 4072e3e..e1664fe 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_SERIALIZED_TSI_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_SERIALIZED_TSI_Test.java
@@ -28,6 +28,8 @@ import static org.apache.uima.util.CasLoadMode.REINIT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -67,11 +69,13 @@ public class CasSerializationDeserialization_SERIALIZED_TSI_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build(),
+            MultiTypeRandomCasDataSuite.builder().withIterations(RANDOM_CAS_ITERATIONS).build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XCAS_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XCAS_Test.java
index bcf3c30..a8051f7 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XCAS_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XCAS_Test.java
@@ -24,6 +24,7 @@ import static org.apache.uima.cas.serdes.SerDesAssuptions.assumeNotKnownToFail;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.createCasMaybeWithTypesystem;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.desser;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.serdes;
+import static org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode.EMPTY_STRINGS_AS_NULL;
 import static org.apache.uima.util.CasCreationUtils.createCas;
 import static org.apache.uima.util.CasLoadMode.DEFAULT;
 import static org.apache.uima.util.CasLoadMode.LENIENT;
@@ -31,6 +32,8 @@ import static org.apache.uima.util.CasLoadMode.LENIENT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -70,11 +73,18 @@ public class CasSerializationDeserialization_XCAS_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .withStringArrayMode(EMPTY_STRINGS_AS_NULL) //
+                    .build(),
+            MultiTypeRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_0_PRETTY_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_0_PRETTY_Test.java
index 298c8d9..d5231e5 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_0_PRETTY_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_0_PRETTY_Test.java
@@ -24,6 +24,7 @@ import static org.apache.uima.cas.serdes.SerDesAssuptions.assumeNotKnownToFail;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.desser;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.serdes;
 import static org.apache.uima.cas.serdes.datasuites.XmiFileDataSuite.DATA_XMI;
+import static org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode.NULL_STRINGS_AS_EMPTY;
 import static org.apache.uima.util.CasCreationUtils.createCas;
 import static org.apache.uima.util.CasLoadMode.DEFAULT;
 import static org.apache.uima.util.CasLoadMode.LENIENT;
@@ -31,6 +32,8 @@ import static org.apache.uima.util.CasLoadMode.LENIENT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -71,7 +74,7 @@ public class CasSerializationDeserialization_XMI_1_0_PRETTY_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   // private static List<DesSerTestScenario> desSerScenarios() {
@@ -102,7 +105,14 @@ public class CasSerializationDeserialization_XMI_1_0_PRETTY_Test {
   // }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .withStringArrayMode(NULL_STRINGS_AS_EMPTY) //
+                    .build(),
+            MultiTypeRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_1_PRETTY_Test.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_1_PRETTY_Test.java
index 763c4b0..c277b52 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_1_PRETTY_Test.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasSerializationDeserialization_XMI_1_1_PRETTY_Test.java
@@ -24,6 +24,7 @@ import static org.apache.uima.cas.serdes.SerDesAssuptions.assumeNotKnownToFail;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.desser;
 import static org.apache.uima.cas.serdes.SerDesCasIOTestUtils.serdes;
 import static org.apache.uima.cas.serdes.datasuites.XmiFileDataSuite.DATA_XMI;
+import static org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode.NULL_STRINGS_AS_EMPTY;
 import static org.apache.uima.util.CasCreationUtils.createCas;
 import static org.apache.uima.util.CasLoadMode.DEFAULT;
 import static org.apache.uima.util.CasLoadMode.LENIENT;
@@ -31,6 +32,8 @@ import static org.apache.uima.util.CasLoadMode.LENIENT;
 import java.util.List;
 
 import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
@@ -70,11 +73,18 @@ public class CasSerializationDeserialization_XMI_1_1_PRETTY_Test {
   }
 
   private static List<SerDesTestScenario> serDesScenarios() {
-    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles);
+    return SerDesCasIOTestUtils.programmaticSerDesScenarios(serDesCycles);
   }
 
   private static List<SerDesTestScenario> randomSerDesScenarios() {
-    return SerDesCasIOTestUtils.randomSerDesScenarios(serDesCycles, RANDOM_CAS_ITERATIONS);
+    return SerDesCasIOTestUtils.serDesScenarios(serDesCycles,
+            MultiFeatureRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .withStringArrayMode(NULL_STRINGS_AS_EMPTY) //
+                    .build(),
+            MultiTypeRandomCasDataSuite.builder() //
+                    .withIterations(RANDOM_CAS_ITERATIONS) //
+                    .build());
   }
 
   @ParameterizedTest
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasToComparableText.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasToComparableText.java
index d9e4447..f478432 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasToComparableText.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/CasToComparableText.java
@@ -63,6 +63,7 @@ import org.apache.uima.cas.ShortArrayFS;
 import org.apache.uima.cas.StringArrayFS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.CASImpl;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.AnnotationBase;
@@ -77,6 +78,7 @@ public class CasToComparableText {
   private boolean markView = true;
   private boolean coveredTextColumnEnabled = true;
   private boolean indexedColumnEnabled = false;
+  private boolean treatEmptyStringsAsNull = false;
   private int maxLengthCoveredText = 30;
   private boolean sortAnnotationsInMultiValuedFeatures = true;
   private boolean uniqueAnchors = true;
@@ -231,6 +233,14 @@ public class CasToComparableText {
     nullValue = aNullValue;
   }
 
+  public void setTreatEmptyStringsAsNull(boolean aTreatEmptyStringsAsNull) {
+    treatEmptyStringsAsNull = aTreatEmptyStringsAsNull;
+  }
+
+  public boolean isTreatEmptyStringsAsNull() {
+    return treatEmptyStringsAsNull;
+  }
+
   private Pattern pattern(String aRegex) {
 
     return regexCache.computeIfAbsent(aRegex, ex -> Pattern.compile(ex));
@@ -281,8 +291,10 @@ public class CasToComparableText {
       return;
     }
 
-    if (aSeeds.stream().anyMatch(fs -> fs.getCAS() != cas)) {
-      throw new IllegalArgumentException("FeatureStructure does not belong to CAS");
+    for (FeatureStructure fs : aSeeds) {
+      if (fs.getCAS() != cas && fs.getCAS() != ((CASImpl) cas).getBaseCAS()) {
+        throw new IllegalArgumentException("FeatureStructure does not belong to CAS");
+      }
     }
 
     Set<FeatureStructure> reachableFses = findReachableFeatureStructures(aSeeds);
@@ -427,6 +439,11 @@ public class CasToComparableText {
       }
 
       // Primitive features can be rendered as strings
+      if (feature.getRange().isStringOrStringSubtype()) {
+        data.add(escape(renderStringValue(aFS.getFeatureValueAsString(feature))));
+        continue;
+      }
+
       if (feature.getRange().isPrimitive()) {
         data.add(escape(aFS.getFeatureValueAsString(feature)));
         continue;
@@ -513,8 +530,13 @@ public class CasToComparableText {
         continue nextItem;
       }
 
-      if (item instanceof String || item.getClass().isPrimitive()) {
-        items.add(String.valueOf(item));
+      if (item instanceof String) {
+        items.add(escape(renderStringValue((String) item)));
+        continue nextItem;
+      }
+
+      if (item.getClass().isPrimitive()) {
+        items.add(escape(String.valueOf(item)));
         continue nextItem;
       }
 
@@ -536,6 +558,18 @@ public class CasToComparableText {
     return items.stream().collect(joining(",", "[", "]"));
   }
 
+  private String renderStringValue(String aString) {
+    if (aString == null) {
+      return nullValue;
+    }
+
+    if (treatEmptyStringsAsNull && aString.isEmpty()) {
+      return nullValue;
+    }
+
+    return aString;
+  }
+
   // This method was derived from uimaFIT FSUtil.getFeature()
   private List<Object> multiValuedFeatureStructureToList(FeatureStructure aValue) {
 
@@ -730,6 +764,108 @@ public class CasToComparableText {
     return seen;
   }
 
+  private int featureHash(FeatureStructure aFS) {
+    int hash = 0;
+    for (Feature f : aFS.getType().getFeatures()) {
+      if (f.getRange().isStringOrStringSubtype() || f.getRange().isPrimitive()) {
+        String value = renderStringValue(aFS.getFeatureValueAsString(f));
+        hash += value != null ? value.hashCode() : 0;
+        continue;
+      }
+
+      if (f.getRange().isArray()) {
+        if (f.getRange().getComponentType().isStringOrStringSubtype()) {
+          StringArrayFS array = ((StringArrayFS) aFS.getFeatureValue(f));
+          if (array != null) {
+            for (int i = 0; i < array.size(); i++) {
+              String v = renderStringValue(array.get(i));
+              hash += v != null ? v.hashCode() : 0;
+            }
+          }
+          continue;
+        }
+
+        switch (f.getRange().getComponentType().getName()) {
+          case CAS.TYPE_NAME_BOOLEAN: {
+            BooleanArrayFS array = ((BooleanArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += array.get(i) ? -(i + 1) : (i + 1);
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_BYTE: {
+            ByteArrayFS array = ((ByteArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += array.get(i);
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_DOUBLE: {
+            DoubleArrayFS array = ((DoubleArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += Double.hashCode(array.get(i));
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_FLOAT: {
+            FloatArrayFS array = ((FloatArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += Float.hashCode(array.get(i));
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_INTEGER: {
+            IntArrayFS array = ((IntArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += array.get(i);
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_LONG: {
+            LongArrayFS array = ((LongArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += Long.hashCode(array.get(i));
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_SHORT: {
+            ShortArrayFS array = ((ShortArrayFS) aFS.getFeatureValue(f));
+            if (array != null) {
+              for (int i = 0; i < array.size(); i++) {
+                hash += array.get(i);
+              }
+            }
+            break;
+          }
+          case CAS.TYPE_NAME_FS_ARRAY:
+            // We cannot really recursively calculate the hash... let's just use the array length
+            if (aFS.getFeatureValue(f) != null) {
+              hash *= ((CommonArrayFS) aFS.getFeatureValue(f)).size() + 1;
+            }
+            break;
+        }
+      }
+
+      // If we get here, it is a feature structure reference... we cannot really recursively
+      // go into it to calculate a recursive hash... so we just check if the value is non-null
+      hash *= aFS.getFeatureValue(f) != null ? 1 : -1;
+    }
+
+    return hash;
+  }
+
   private static class CloseShieldAppendable implements Appendable, Closeable {
 
     private final Appendable delegate;
@@ -764,11 +900,15 @@ public class CasToComparableText {
     }
   }
 
-  private static class FSComparator implements Comparator<FeatureStructure> {
+  private class FSComparator implements Comparator<FeatureStructure> {
 
     @Override
     public int compare(FeatureStructure aFS1, FeatureStructure aFS2) {
 
+      if (aFS1 == aFS2) {
+        return 0;
+      }
+
       // Same name?
       int nameCmp = aFS2.getType().getName().compareTo(aFS2.getType().getName());
       if (nameCmp != 0) {
@@ -793,8 +933,15 @@ public class CasToComparableText {
         }
       }
 
-      // Giving up here. Hopefully the caller has a good idea to further sort FSes which
-      // have the same type and location
+      // Ok, so let's calculate a hash over the features then...
+      int fh1 = featureHash(aFS1);
+      int fh2 = featureHash(aFS2);
+      if (fh1 < fh2) {
+        return -1;
+      }
+      if (fh1 > fh2) {
+        return 1;
+      }
       return 0;
     }
   }
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/SerDesCasIOTestUtils.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/SerDesCasIOTestUtils.java
index 827ad0b..a40a29b 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/SerDesCasIOTestUtils.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/SerDesCasIOTestUtils.java
@@ -24,6 +24,7 @@ import static java.util.stream.Collectors.toList;
 import static org.apache.uima.cas.SerialFormat.XMI_PRETTY;
 import static org.apache.uima.cas.serdes.TestType.ONE_WAY;
 import static org.apache.uima.cas.serdes.TestType.ROUND_TRIP;
+import static org.apache.uima.cas.serdes.TestType.SER_DES;
 import static org.apache.uima.cas.serdes.TestType.SER_REF;
 import static org.apache.uima.util.CasCreationUtils.createCas;
 import static org.apache.uima.util.TypeSystemUtil.typeSystem2TypeSystemDescription;
@@ -44,8 +45,7 @@ import org.apache.commons.lang3.NotImplementedException;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.SerialFormat;
-import org.apache.uima.cas.serdes.datasuites.MultiFeatureRandomCasDataSuite;
-import org.apache.uima.cas.serdes.datasuites.MultiTypeRandomCasDataSuite;
+import org.apache.uima.cas.serdes.datasuites.CasDataSuite;
 import org.apache.uima.cas.serdes.datasuites.ProgrammaticallyCreatedCasDataSuite;
 import org.apache.uima.cas.serdes.datasuites.XmiFileDataSuite;
 import org.apache.uima.cas.serdes.scenario.DesSerTestScenario;
@@ -53,7 +53,6 @@ import org.apache.uima.cas.serdes.scenario.SerDesTestScenario;
 import org.apache.uima.cas.serdes.scenario.SerRefTestScenario;
 import org.apache.uima.cas.serdes.transitions.CasDesSerCycleConfiguration;
 import org.apache.uima.cas.serdes.transitions.CasSerDesCycleConfiguration;
-import org.apache.uima.cas.serdes.transitions.CasSourceTargetConfiguration;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
@@ -79,7 +78,7 @@ public class SerDesCasIOTestUtils {
   public static List<SerRefTestScenario> serRefScenarios(SerialFormat aFormat,
           String aCasFileName) {
     Class<?> caller = getCallerClass();
-    return ProgrammaticallyCreatedCasDataSuite.configurations().stream()
+    return ProgrammaticallyCreatedCasDataSuite.builder().build().stream()
             .map(conf -> SerRefTestScenario.builder(caller, conf, SER_REF, aCasFileName)
                     .withSerializer((cas, path) -> ser(cas, path, aFormat)) //
                     .build())
@@ -128,37 +127,23 @@ public class SerDesCasIOTestUtils {
    * {@link ProgrammaticallyCreatedCasDataSuite} and applying them to each of the configured
    * serialization/deserialization cycles.
    */
-  public static List<SerDesTestScenario> serDesScenarios(
+  public static List<SerDesTestScenario> programmaticSerDesScenarios(
           Collection<CasSerDesCycleConfiguration> aSerDesCycles) {
-    List<SerDesTestScenario> confs = new ArrayList<>();
-
-    for (CasSerDesCycleConfiguration cycle : aSerDesCycles) {
-      for (CasSourceTargetConfiguration data : ProgrammaticallyCreatedCasDataSuite
-              .configurations()) {
-        confs.add(new SerDesTestScenario(data, cycle));
-      }
-    }
 
-    return confs;
+    return serDesScenarios(aSerDesCycles, ProgrammaticallyCreatedCasDataSuite.builder().build());
   }
 
   /**
-   * SERIALIZE -> DESERIALIZE scenarios using randomized CASes
+   * SERIALIZE -> DESERIALIZE scenarios using the given data suites (typically randomized suites)
    */
-  public static List<SerDesTestScenario> randomSerDesScenarios(
-          Collection<CasSerDesCycleConfiguration> aSerDesCycles, int aIterations) {
+  public static List<SerDesTestScenario> serDesScenarios(
+          Collection<CasSerDesCycleConfiguration> aSerDesCycles, CasDataSuite... aDataSuites) {
 
     List<SerDesTestScenario> confs = new ArrayList<>();
 
     for (CasSerDesCycleConfiguration cycle : aSerDesCycles) {
-      for (CasSourceTargetConfiguration data : MultiTypeRandomCasDataSuite
-              .configurations(aIterations / 2)) {
-        confs.add(new SerDesTestScenario(data, cycle));
-      }
-
-      for (CasSourceTargetConfiguration data : MultiFeatureRandomCasDataSuite
-              .configurations(aIterations / 2)) {
-        confs.add(new SerDesTestScenario(data, cycle));
+      for (CasDataSuite suite : aDataSuites) {
+        suite.forEach(data -> confs.add(new SerDesTestScenario(data, cycle)));
       }
     }
 
@@ -206,6 +191,13 @@ public class SerDesCasIOTestUtils {
       tsiBuffer = tsiTarget.toByteArray();
     }
 
+    Path targetFile = SER_DES.getTargetFolder(getCallerClass())
+            .resolve("data." + aFormat.getDefaultFileExtension());
+    Files.createDirectories(targetFile.getParent());
+    try (OutputStream os = Files.newOutputStream(targetFile)) {
+      os.write(casBuffer);
+    }
+
     // Deserialize the CAS
     try (ByteArrayInputStream casSource = new ByteArrayInputStream(casBuffer);
             ByteArrayInputStream tsiSource = new ByteArrayInputStream(tsiBuffer)) {
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/CasDataSuite.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/CasDataSuite.java
new file mode 100644
index 0000000..bc805ad
--- /dev/null
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/CasDataSuite.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.cas.serdes.datasuites;
+
+import java.util.Collection;
+
+import org.apache.uima.cas.serdes.transitions.CasSourceTargetConfiguration;
+
+public interface CasDataSuite extends Collection<CasSourceTargetConfiguration> {
+}
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiFeatureRandomCasDataSuite.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiFeatureRandomCasDataSuite.java
index 8dc09fc..e2dccb4 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiFeatureRandomCasDataSuite.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiFeatureRandomCasDataSuite.java
@@ -18,21 +18,37 @@
  */
 package org.apache.uima.cas.serdes.datasuites;
 
+import static org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode.ALLOW_NULL_AND_EMPTY_STRINGS;
+
+import java.util.AbstractCollection;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.uima.cas.serdes.generators.CasConfiguration;
 import org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator;
+import org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode;
 import org.apache.uima.cas.serdes.transitions.CasSourceTargetConfiguration;
 
-public class MultiFeatureRandomCasDataSuite {
+public class MultiFeatureRandomCasDataSuite extends AbstractCollection<CasSourceTargetConfiguration>
+        implements CasDataSuite {
+  private final int sizeFactor;
+  private final StringArrayMode stringArrayMode;
+  private final int iterations;
+
+  private MultiFeatureRandomCasDataSuite(Builder builder) {
+    sizeFactor = builder.sizeFactor;
+    stringArrayMode = builder.stringArrayMode;
+    iterations = builder.iterations;
+  }
 
-  public static List<CasSourceTargetConfiguration> configurations(int aCount) {
+  @Override
+  public Iterator<CasSourceTargetConfiguration> iterator() {
     List<CasSourceTargetConfiguration> confs = new ArrayList<>();
 
-    for (int n = 0; n < aCount; n++) {
+    for (int n = 0; n < iterations; n++) {
       MultiFeatureRandomCasGenerator randomizer = MultiFeatureRandomCasGenerator.builder() //
-              .withSize((aCount + 1) * 10) //
+              .withStringArrayMode(stringArrayMode).withSize((n + 1) * sizeFactor) //
               .build();
 
       CasConfiguration cfg = new CasConfiguration(randomizer);
@@ -44,6 +60,51 @@ public class MultiFeatureRandomCasDataSuite {
               .build());
     }
 
-    return confs;
+    return confs.iterator();
+  }
+
+  @Override
+  public int size() {
+    return iterations;
+  }
+
+  /**
+   * Creates builder to build {@link MultiFeatureRandomCasDataSuite}.
+   * 
+   * @return created builder
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Builder to build {@link MultiFeatureRandomCasDataSuite}.
+   */
+  public static final class Builder {
+    private int iterations = 10;
+    private int sizeFactor = 10;
+    private StringArrayMode stringArrayMode = ALLOW_NULL_AND_EMPTY_STRINGS;
+
+    private Builder() {
+    }
+
+    public Builder withSizeFactory(int aSizeFactory) {
+      sizeFactor = aSizeFactory;
+      return this;
+    }
+
+    public Builder withStringArrayMode(StringArrayMode aStringArrayMode) {
+      stringArrayMode = aStringArrayMode;
+      return this;
+    }
+
+    public Builder withIterations(int aIterations) {
+      iterations = aIterations;
+      return this;
+    }
+
+    public MultiFeatureRandomCasDataSuite build() {
+      return new MultiFeatureRandomCasDataSuite(this);
+    }
   }
 }
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiTypeRandomCasDataSuite.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiTypeRandomCasDataSuite.java
index afcfff9..4981898 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiTypeRandomCasDataSuite.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/MultiTypeRandomCasDataSuite.java
@@ -18,28 +18,42 @@
  */
 package org.apache.uima.cas.serdes.datasuites;
 
+import java.util.AbstractCollection;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.uima.cas.serdes.generators.CasConfiguration;
 import org.apache.uima.cas.serdes.generators.MultiTypeRandomCasGenerator;
 import org.apache.uima.cas.serdes.transitions.CasSourceTargetConfiguration;
 
-public class MultiTypeRandomCasDataSuite {
+public class MultiTypeRandomCasDataSuite extends AbstractCollection<CasSourceTargetConfiguration>
+        implements CasDataSuite {
 
-  private static final int SIZE_FACTOR = 10;
+  private final int iterations;
+  private final int sizeFactor;
+  private final int minimumAnnotationLength;
+  private final Long randomSeed;
 
-  public static List<CasSourceTargetConfiguration> configurations(int aCount) {
+  private MultiTypeRandomCasDataSuite(Builder builder) {
+    sizeFactor = builder.sizeFactor;
+    minimumAnnotationLength = builder.minimumAnnotationLength;
+    randomSeed = builder.randomSeed;
+    iterations = builder.iterations;
+  }
+
+  @Override
+  public Iterator<CasSourceTargetConfiguration> iterator() {
     List<CasSourceTargetConfiguration> confs = new ArrayList<>();
 
-    for (int n = 0; n < aCount; n++) {
-      MultiTypeRandomCasGenerator randomizer = MultiTypeRandomCasGenerator.builder() //
-              // NOTE: When you need to debug a certain configuration, comment out and set the
-              // random seed for the broken configuration. Do not commit with this line active!
-              // .withRandomSeed(-5987713889340419492l) //
-              .withTypeCount(n + 1) //
-              .withMinimumAnnotationLength(0) //
-              .withSize((n + 1) * SIZE_FACTOR) //
+    for (int n = 0; n < iterations; n++) {
+      MultiTypeRandomCasGenerator.Builder randomizerBuilder = MultiTypeRandomCasGenerator.builder();
+      if (randomSeed != null) {
+        randomizerBuilder.withRandomSeed(randomSeed);
+      }
+      MultiTypeRandomCasGenerator randomizer = randomizerBuilder.withTypeCount(n + 1) //
+              .withMinimumAnnotationLength(minimumAnnotationLength) //
+              .withSize((n + 1) * sizeFactor) //
               .build();
 
       CasConfiguration cfg = new CasConfiguration(randomizer);
@@ -52,6 +66,57 @@ public class MultiTypeRandomCasDataSuite {
               .build());
     }
 
-    return confs;
+    return confs.iterator();
+  }
+
+  @Override
+  public int size() {
+    return iterations;
+  }
+
+  /**
+   * Creates builder to build {@link MultiTypeRandomCasDataSuite}.
+   * 
+   * @return created builder
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Builder to build {@link MultiTypeRandomCasDataSuite}.
+   */
+  public static final class Builder {
+    private int iterations = 10;
+    private int sizeFactor;
+    private int minimumAnnotationLength;
+    private Long randomSeed;
+
+    private Builder() {
+    }
+
+    public Builder withSizeFactor(int aSizeFactor) {
+      this.sizeFactor = aSizeFactor;
+      return this;
+    }
+
+    public Builder withMinimumAnnotationLength(int aMinimumAnnotationLength) {
+      this.minimumAnnotationLength = aMinimumAnnotationLength;
+      return this;
+    }
+
+    public Builder withRandomSeed(long aRandomSeed) {
+      this.randomSeed = aRandomSeed;
+      return this;
+    }
+
+    public Builder withIterations(int aIterations) {
+      iterations = aIterations;
+      return this;
+    }
+
+    public MultiTypeRandomCasDataSuite build() {
+      return new MultiTypeRandomCasDataSuite(this);
+    }
   }
 }
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/ProgrammaticallyCreatedCasDataSuite.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/ProgrammaticallyCreatedCasDataSuite.java
index 432b1d4..b66b8cf 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/ProgrammaticallyCreatedCasDataSuite.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/datasuites/ProgrammaticallyCreatedCasDataSuite.java
@@ -21,6 +21,8 @@ package org.apache.uima.cas.serdes.datasuites;
 import static java.util.Arrays.asList;
 
 import java.nio.charset.StandardCharsets;
+import java.util.AbstractCollection;
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.uima.cas.ByteArrayFS;
@@ -29,10 +31,13 @@ import org.apache.uima.cas.serdes.transitions.CasSourceTargetConfiguration;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.util.CasCreationUtils;
 
-public class ProgrammaticallyCreatedCasDataSuite {
+public class ProgrammaticallyCreatedCasDataSuite
+        extends AbstractCollection<CasSourceTargetConfiguration> implements CasDataSuite {
 
-  public static List<CasSourceTargetConfiguration> configurations() {
-    return asList( //
+  private final List<CasSourceTargetConfiguration> confs;
+
+  private ProgrammaticallyCreatedCasDataSuite(Builder builder) {
+    confs = asList( //
             CasSourceTargetConfiguration.builder() //
                     .withTitle("casWithText") //
                     .withSourceCasSupplier(ProgrammaticallyCreatedCasDataSuite::casWithText) //
@@ -57,6 +62,16 @@ public class ProgrammaticallyCreatedCasDataSuite {
                     .build());
   }
 
+  @Override
+  public Iterator<CasSourceTargetConfiguration> iterator() {
+    return confs.iterator();
+  }
+
+  @Override
+  public int size() {
+    return confs.size();
+  }
+
   public static CAS emptyCas() throws Exception {
     return CasCreationUtils.createCas();
   }
@@ -98,4 +113,25 @@ public class ProgrammaticallyCreatedCasDataSuite {
 
     return cas;
   }
+
+  /**
+   * Creates builder to build {@link ProgrammaticallyCreatedCasDataSuite}.
+   * 
+   * @return created builder
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Builder to build {@link ProgrammaticallyCreatedCasDataSuite}.
+   */
+  public static final class Builder {
+    private Builder() {
+    }
+
+    public ProgrammaticallyCreatedCasDataSuite build() {
+      return new ProgrammaticallyCreatedCasDataSuite(this);
+    }
+  }
 }
diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/generators/MultiFeatureRandomCasGenerator.java b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/generators/MultiFeatureRandomCasGenerator.java
index 9efb718..3e51d1e 100644
--- a/uimaj-core/src/test/java/org/apache/uima/cas/serdes/generators/MultiFeatureRandomCasGenerator.java
+++ b/uimaj-core/src/test/java/org/apache/uima/cas/serdes/generators/MultiFeatureRandomCasGenerator.java
@@ -19,12 +19,14 @@
 package org.apache.uima.cas.serdes.generators;
 
 import static org.apache.uima.UIMAFramework.getResourceSpecifierFactory;
+import static org.apache.uima.cas.serdes.generators.MultiFeatureRandomCasGenerator.StringArrayMode.ALLOW_NULL_AND_EMPTY_STRINGS;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import org.apache.uima.cas.ArrayFS;
 import org.apache.uima.cas.ByteArrayFS;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.DoubleArrayFS;
@@ -90,7 +92,9 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
   private final boolean includeUid;
   private final Random rnd;
   private final int size;
+  private final StringArrayMode stringArrayMode;
 
+  // akof = all kinds of features
   private Type akof;
   private Feature akofUid;
   private Feature akofInt;
@@ -116,10 +120,11 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
   private AtomicInteger aint;
 
   private MultiFeatureRandomCasGenerator(Builder builder) {
-    this.isKeep = builder.isKeep;
-    this.includeUid = builder.includeUid;
-    this.rnd = builder.randomGenerator;
-    this.size = builder.size;
+    isKeep = builder.isKeep;
+    includeUid = builder.includeUid;
+    rnd = builder.randomGenerator;
+    size = builder.size;
+    stringArrayMode = builder.stringArrayMode;
     aint = includeUid ? new AtomicInteger(0) : null;
   }
 
@@ -203,6 +208,7 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
     // Randomly link feature structures to each other
     for (FeatureStructure fs : lfss) {
       fs.setFeatureValue(akofFs, lfss.get(rnd.nextInt(lfss.size())));
+      ((ArrayFS) fs.getFeatureValue(akofAfs)).set(0, lfss.get(rnd.nextInt(lfss.size())));
     }
   }
 
@@ -243,15 +249,25 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
   }
 
   private String randomString(Random r) {
-    int i = r.nextInt(7);
-    return STRING_VALUES[i];
+    String v = STRING_VALUES[r.nextInt(STRING_VALUES.length)];
+
+    switch (stringArrayMode) {
+      case ALLOW_NULL_AND_EMPTY_STRINGS:
+        return v;
+      case EMPTY_STRINGS_AS_NULL:
+        return v != null && v.isEmpty() ? null : v;
+      case NULL_STRINGS_AS_EMPTY:
+        return v == null ? "" : v;
+      default:
+        throw new IllegalArgumentException("Unsupported string array mode: " + stringArrayMode);
+    }
   }
 
   private StringArrayFS randomStringA(Random r) {
     int length = r.nextInt(2) + 1;
     StringArrayFS fs = maybeKeep(cas.createStringArrayFS(length));
     for (int i = 0; i < length; i++) {
-      fs.set(i, STRING_VALUES[r.nextInt(STRING_VALUES.length)]);
+      fs.set(i, randomString(r));
     }
     return fs;
   }
@@ -334,31 +350,37 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
    * Builder to build {@link MultiFeatureRandomCasGenerator}.
    */
   public static final class Builder {
-    private boolean isKeep;
+    private boolean isKeep = true;
     private boolean includeUid;
     private Random randomGenerator;
     private int size;
+    private StringArrayMode stringArrayMode = ALLOW_NULL_AND_EMPTY_STRINGS;
 
     private Builder() {
     }
 
-    public Builder withReferenceKeeping(boolean isKeep) {
-      this.isKeep = isKeep;
+    public Builder withReferenceKeeping(boolean aIsKeep) {
+      isKeep = aIsKeep;
+      return this;
+    }
+
+    public Builder withUid(boolean aIncludeUid) {
+      includeUid = aIncludeUid;
       return this;
     }
 
-    public Builder withUid(boolean includeUid) {
-      this.includeUid = includeUid;
+    public Builder withRandomGenerator(Random aRandom) {
+      randomGenerator = aRandom;
       return this;
     }
 
-    public Builder withRandomGenerator(Random rnd) {
-      this.randomGenerator = rnd;
+    public Builder withSize(int aSize) {
+      size = aSize;
       return this;
     }
 
-    public Builder withSize(int size) {
-      this.size = size;
+    public Builder withStringArrayMode(StringArrayMode aStringArrayMode) {
+      stringArrayMode = aStringArrayMode;
       return this;
     }
 
@@ -370,4 +392,22 @@ public class MultiFeatureRandomCasGenerator implements CasGenerator {
       return new MultiFeatureRandomCasGenerator(this);
     }
   }
+
+  public enum StringArrayMode {
+    /**
+     * Instead of generating an empty string, generate a {@code null} value (mainly for XCAS).
+     */
+    EMPTY_STRINGS_AS_NULL,
+
+    /**
+     * Instead of generating a {@code null} value, generate an empty string (mainly for XMI).
+     */
+    NULL_STRINGS_AS_EMPTY,
+
+    /**
+     * Generate both {@code null} values and empty strings (this is what (de)serializers should
+     * normally support and be tested with).
+     */
+    ALLOW_NULL_AND_EMPTY_STRINGS;
+  }
 }