You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/27 18:44:36 UTC

[incubator-annotator] 05/06: Make match & describe handle empty quotes ‘correctly’

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit da1981addaa5596cdf16ac268d8ff9665bfffb4b
Author: Gerben <ge...@treora.com>
AuthorDate: Wed May 27 17:32:30 2020 +0200

    Make match & describe handle empty quotes ‘correctly’
    
    What constitutes correct behaviour could be debated, but it seems
    reasonable to match at every possible position.
---
 packages/dom/src/text-quote/describe.ts        |  2 +-
 packages/dom/src/text-quote/match.ts           |  2 +-
 packages/dom/test/text-quote-describe-cases.ts | 16 ++++++-
 packages/dom/test/text-quote-match-cases.ts    | 66 ++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 572e218..784881c 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -52,7 +52,7 @@ function calculateContextForDisambiguation(
   // Find all matches of the text in the scope.
   const stringMatches: number[] = [];
   let fromIndex = 0;
-  while (fromIndex < scopeText.length) {
+  while (fromIndex <= scopeText.length) {
     const matchIndex = scopeText.indexOf(exactText, fromIndex);
     if (matchIndex === -1) break;
     stringMatches.push(matchIndex);
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index f32afce..18b077e 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -54,7 +54,7 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
       : 0;
 
     let fromIndex = 0;
-    while (fromIndex < scopeText.length) {
+    while (fromIndex <= scopeText.length) {
       // Find the quote with its prefix and suffix in the string.
       const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
       if (patternStartIndex === -1) return;
diff --git a/packages/dom/test/text-quote-describe-cases.ts b/packages/dom/test/text-quote-describe-cases.ts
index cfc1435..4f34c92 100644
--- a/packages/dom/test/text-quote-describe-cases.ts
+++ b/packages/dom/test/text-quote-describe-cases.ts
@@ -83,9 +83,23 @@ const testCases: {
       suffix: '',
     },
   },
+  'empty quote': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 11,
+      endContainerXPath: '//b/text()',
+      endOffset: 11,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'e',
+      suffix: ' ',
+    },
+  },
 
   // TODO test for:
-  // emtpy range
   // empty scope
   // custom scope
   // element edges, across elements, etc.
diff --git a/packages/dom/test/text-quote-match-cases.ts b/packages/dom/test/text-quote-match-cases.ts
index 33d66de..0fd757a 100644
--- a/packages/dom/test/text-quote-match-cases.ts
+++ b/packages/dom/test/text-quote-match-cases.ts
@@ -264,6 +264,72 @@ const testCases: {
       },
     ],
   },
+  'empty quote': {
+    html: '<b>lorem</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+    },
+    // A five character string contains six spots to find an empty string
+    expected: Array(6).fill(null).map((_, i) => ({
+      startContainerXPath: '//b/text()',
+      startOffset: i,
+      endContainerXPath: '//b/text()',
+      endOffset: i,
+    }))
+  },
+  'empty quote, with prefix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'dolor',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 17,
+      endContainerXPath: '//b/text()',
+      endOffset: 17,
+    }]
+  },
+  'empty quote, with suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      suffix: 'i',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 6,
+      endContainerXPath: '//b/text()',
+      endOffset: 6,
+    }]
+  },
+  'empty quote, with prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'lorem ',
+      suffix: 'ipsum',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 6,
+      endContainerXPath: '//b/text()',
+      endOffset: 6,
+    }]
+  },
+  'empty quote, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'X',
+    },
+    expected: [],
+  }
 };
 
 export default testCases;