You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/27 18:44:34 UTC

[incubator-annotator] 03/06: Some fixes to satisfy tests

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 380fbe4bde714a0ae723a41b4a06e64ffeced901
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 19:48:06 2020 +0200

    Some fixes to satisfy tests
    
    Always give a prefix and suffix, as recommended in the spec:
      “Each TextQuoteSelector SHOULD have exactly 1 prefix property”
    …so now it is an empty string when no prefix/suffix is needed.
    I am not sure if this is valuable or a waste a bytes. I’d be equally
    happy to revert this behaviour.
    
    Fix mistakes leading to needless prefix or suffix in some cases, and
    a lack of them when selecting the first or last characters (partly
    regressions from c94ccda and 8b29fec, or already faulty before that).
---
 packages/dom/src/text-quote/describe.ts | 76 ++++++++++++---------------------
 1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 57a587c..819fa12 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -87,66 +87,46 @@ async function calculateContextForDisambiguation(
       continue;
     }
 
-    // Determine how many prefix characters are shared.
-    const prefixLength = overlapRight(
-      text.substring(0, matchStartIndex),
+    // Count how many characters before & after them the false match and target have in common.
+    const sufficientPrefixLength = charactersNeededToBeUnique(
       text.substring(0, startIndex),
+      text.substring(0, matchStartIndex),
+      true,
     );
-
-    // Determine how many suffix characters are shared.
-    const suffixLength = overlap(
-      text.substring(matchEndIndex),
+    const sufficientSuffixLength = charactersNeededToBeUnique(
       text.substring(endIndex),
+      text.substring(matchEndIndex),
+      false,
     );
-
-    // Record the affix lengths that would have precluded this match.
-    affixLengthPairs.push([prefixLength + 1, suffixLength + 1]);
-  }
-
-  // Construct and return an unambiguous selector.
-  let prefix, suffix;
-  if (affixLengthPairs.length) {
-    const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
-
-    if (prefixLength > 0 && startIndex > 0) {
-      prefix = text.substring(startIndex - prefixLength, startIndex);
-    }
-
-    if (suffixLength > 0 && endIndex < text.length) {
-      suffix = text.substring(endIndex, endIndex + suffixLength);
-    }
+    affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
   }
 
+  // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
+  // for prefix and suffix combined.
+  const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
+  const prefix = text.substring(startIndex - prefixLength, startIndex);
+  const suffix = text.substring(endIndex, endIndex + suffixLength);
   return { prefix, suffix };
 }
 
-function overlap(text1: string, text2: string) {
-  let count = 0;
-
-  while (count < text1.length && count < text2.length) {
-    const c1 = text1[count];
-    const c2 = text2[count];
-    if (c1 !== c2) break;
-    count++;
-  }
-
-  return count;
-}
-
-function overlapRight(text1: string, text2: string) {
-  let count = 0;
-
-  while (count < text1.length && count < text2.length) {
-    const c1 = text1[text1.length - 1 - count];
-    const c2 = text2[text2.length - 1 - count];
-    if (c1 !== c2) break;
-    count++;
-  }
-
-  return count;
+function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
+  // Count how many characters the two strings have in common.
+  let overlap = 0;
+  while (reverse
+    ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
+    : target[overlap] === impostor[overlap]
+  )
+    overlap++;
+  if (overlap === target.length)
+    return Infinity; // (no substring of target can make it distinguishable from its impostor)
+  else
+    return overlap + 1;
 }
 
 function minimalSolution(requirements: Array<[number, number]>): [number, number] {
+  // Ensure we try solutions with an empty prefix or suffix.
+  requirements.push([0, 0]);
+
   // Build all the pairs and order them by their sums.
   const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]]));
   pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));