You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/25 18:03:24 UTC

[incubator-annotator] branch dom-tests updated (e3efb3b -> 8f2f4ea)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard e3efb3b  Remove superfluous seek, tidy up code
     new 6a09edf  Remove superfluous seek, tidy up code
     new c9643a9  Add tests with prefix, suffix, no matches
     new d4d5602  Move test cases into own file
     new e1ac57a  Test if describe inverts test cases for match
     new d5bd5cb  Simple (but failing!) tests for minimal prefix+suffix
     new 035eeb7  f describe tests name
     new b3734c0  Some fixes to satisfy tests
     new 8f2f4ea  WIP

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (e3efb3b)
            \
             N -- N -- N   refs/heads/dom-tests (8f2f4ea)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 8 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 packages/dom/src/text-quote/describe.ts        | 179 ++++++++--------
 packages/dom/src/text-quote/match.ts           |  25 ++-
 packages/dom/test/text-quote-describe-cases.ts |  95 +++++++++
 packages/dom/test/text-quote-describe.ts       |  58 ++++++
 packages/dom/test/text-quote-match-cases.ts    | 269 +++++++++++++++++++++++++
 packages/dom/test/text-quote-match.ts          | 164 +--------------
 packages/dom/test/utils.ts                     |  25 +++
 7 files changed, 552 insertions(+), 263 deletions(-)
 create mode 100644 packages/dom/test/text-quote-describe-cases.ts
 create mode 100644 packages/dom/test/text-quote-describe.ts
 create mode 100644 packages/dom/test/text-quote-match-cases.ts
 create mode 100644 packages/dom/test/utils.ts


[incubator-annotator] 07/08: Some fixes to satisfy tests

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit b3734c0d7b532fb3ba58b31d048a75c23b8dc39e
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 19:48:06 2020 +0200

    Some fixes to satisfy tests
    
    Always give a prefix and suffix, as recommended in the spec:
      “Each TextQuoteSelector SHOULD have exactly 1 prefix property”
    …so now it is an empty string when no prefix/suffix is needed.
    I am not sure if this is valuable or a waste a bytes. I’d be equally
    happy to revert this behaviour.
    
    Fix mistakes leading to needless prefix or suffix in some cases, and
    a lack of them when selecting the first or last characters (partly
    regressions from c94ccda and 8b29fec, or already faulty before that).
---
 packages/dom/src/text-quote/describe.ts | 76 ++++++++++++---------------------
 1 file changed, 28 insertions(+), 48 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 57a587c..819fa12 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -87,66 +87,46 @@ async function calculateContextForDisambiguation(
       continue;
     }
 
-    // Determine how many prefix characters are shared.
-    const prefixLength = overlapRight(
-      text.substring(0, matchStartIndex),
+    // Count how many characters before & after them the false match and target have in common.
+    const sufficientPrefixLength = charactersNeededToBeUnique(
       text.substring(0, startIndex),
+      text.substring(0, matchStartIndex),
+      true,
     );
-
-    // Determine how many suffix characters are shared.
-    const suffixLength = overlap(
-      text.substring(matchEndIndex),
+    const sufficientSuffixLength = charactersNeededToBeUnique(
       text.substring(endIndex),
+      text.substring(matchEndIndex),
+      false,
     );
-
-    // Record the affix lengths that would have precluded this match.
-    affixLengthPairs.push([prefixLength + 1, suffixLength + 1]);
-  }
-
-  // Construct and return an unambiguous selector.
-  let prefix, suffix;
-  if (affixLengthPairs.length) {
-    const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
-
-    if (prefixLength > 0 && startIndex > 0) {
-      prefix = text.substring(startIndex - prefixLength, startIndex);
-    }
-
-    if (suffixLength > 0 && endIndex < text.length) {
-      suffix = text.substring(endIndex, endIndex + suffixLength);
-    }
+    affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
   }
 
+  // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
+  // for prefix and suffix combined.
+  const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
+  const prefix = text.substring(startIndex - prefixLength, startIndex);
+  const suffix = text.substring(endIndex, endIndex + suffixLength);
   return { prefix, suffix };
 }
 
-function overlap(text1: string, text2: string) {
-  let count = 0;
-
-  while (count < text1.length && count < text2.length) {
-    const c1 = text1[count];
-    const c2 = text2[count];
-    if (c1 !== c2) break;
-    count++;
-  }
-
-  return count;
-}
-
-function overlapRight(text1: string, text2: string) {
-  let count = 0;
-
-  while (count < text1.length && count < text2.length) {
-    const c1 = text1[text1.length - 1 - count];
-    const c2 = text2[text2.length - 1 - count];
-    if (c1 !== c2) break;
-    count++;
-  }
-
-  return count;
+function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
+  // Count how many characters the two strings have in common.
+  let overlap = 0;
+  while (reverse
+    ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
+    : target[overlap] === impostor[overlap]
+  )
+    overlap++;
+  if (overlap === target.length)
+    return Infinity; // (no substring of target can make it distinguishable from its impostor)
+  else
+    return overlap + 1;
 }
 
 function minimalSolution(requirements: Array<[number, number]>): [number, number] {
+  // Ensure we try solutions with an empty prefix or suffix.
+  requirements.push([0, 0]);
+
   // Build all the pairs and order them by their sums.
   const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]]));
   pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));


[incubator-annotator] 04/08: Test if describe inverts test cases for match

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit e1ac57a96fb828056de0332ab2cf6fcb83d67af4
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 14:05:03 2020 +0200

    Test if describe inverts test cases for match
---
 packages/dom/test/text-quote-describe.ts | 49 ++++++++++++++++++++++++++++++++
 packages/dom/test/utils.ts               |  7 +++++
 2 files changed, 56 insertions(+)

diff --git a/packages/dom/test/text-quote-describe.ts b/packages/dom/test/text-quote-describe.ts
new file mode 100644
index 0000000..8d9f74c
--- /dev/null
+++ b/packages/dom/test/text-quote-describe.ts
@@ -0,0 +1,49 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { assert } from 'chai';
+import { describeTextQuote } from '../src/text-quote/describe';
+import testMatchCases from './text-quote-match-cases';
+import { hydrateRange } from './utils';
+
+const domParser = new window.DOMParser();
+
+describe('createTextQuoteSelectorMatcher', () => {
+  describe('inverts test cases of text quote matcher', () => {
+    const applicableTestCases = Object.entries(testMatchCases)
+      .filter(([_, { expected }]) => expected.length > 0);
+
+    for (const [name, { html, selector, expected }] of applicableTestCases) {
+      it(`case: '${name}'`, async () => {
+        const doc = domParser.parseFromString(html, 'text/html');
+        for (const rangeInfo of expected) {
+          const range = hydrateRange(rangeInfo, doc);
+          const result = await describeTextQuote(range, doc);
+          assert.equal(result.exact, selector.exact);
+          // Our result may have a different combination of prefix/suffix; only check for obvious inconsistency.
+          if (selector.prefix && result.prefix)
+            assert(selector.prefix.endsWith(result.prefix.substring(result.prefix.length - selector.prefix.length)), 'Inconsistent prefixes');
+          if (selector.suffix && result.suffix)
+            assert(selector.suffix.startsWith(result.suffix.substring(0, selector.suffix.length)), 'Inconsistent suffixes');
+        }
+      });
+    }
+  });
+});
diff --git a/packages/dom/test/utils.ts b/packages/dom/test/utils.ts
index 7aaa9c9..889059c 100644
--- a/packages/dom/test/utils.ts
+++ b/packages/dom/test/utils.ts
@@ -16,3 +16,10 @@ export function evaluateXPath(doc: Document, xpath: string): Node {
   );
   return nodes[0];
 }
+
+export function hydrateRange(rangeInfo: RangeInfo, doc: Document): Range {
+  const range = doc.createRange();
+  range.setStart(evaluateXPath(doc, rangeInfo.startContainerXPath), rangeInfo.startOffset);
+  range.setEnd(evaluateXPath(doc, rangeInfo.endContainerXPath), rangeInfo.endOffset);
+  return range;
+}


[incubator-annotator] 02/08: Add tests with prefix, suffix, no matches

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit c9643a93924f5223158763172a84b4ee022bce10
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 13:01:33 2020 +0200

    Add tests with prefix, suffix, no matches
---
 packages/dom/test/text-quote-match.ts | 124 ++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index ac5b13a..77f8bd4 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -172,6 +172,130 @@ const testCases: {
       },
     ]
   },
+  'no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'holy grail',
+    },
+    expected: []
+  },
+  'with prefix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada',
+      prefix: 't ',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 23,
+        endContainerXPath: '//b/text()',
+        endOffset: 27,
+      },
+    ]
+  },
+  'with suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      suffix: 'l',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 13,
+        endContainerXPath: '//b/text()',
+        endOffset: 14,
+      },
+    ]
+  },
+  'with prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      prefix: 'l',
+      suffix: 're',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 1,
+        endContainerXPath: '//b/text()',
+        endOffset: 2,
+      },
+    ]
+  },
+  'with prefix and suffix, two matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      prefix: 'l',
+      suffix: 'r',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 1,
+        endContainerXPath: '//b/text()',
+        endOffset: 2,
+      },
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 15,
+        endContainerXPath: '//b/text()',
+        endOffset: 16,
+      },
+    ]
+  },
+  'with prefix, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      prefix: 'oopsum ',
+    },
+    expected: []
+  },
+  'with suffix, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      suffix: ' amot',
+    },
+    expected: []
+  },
+  'with suffix, no matches due to whitespace': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      suffix: 'a',
+    },
+    expected: []
+  },
+  'with empty prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+      prefix: '',
+      suffix: '',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 12,
+        endContainerXPath: '//b/text()',
+        endOffset: 20,
+      },
+    ]
+  },
 };
 
 describe('createTextQuoteSelectorMatcher', () => {


[incubator-annotator] 08/08: WIP

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 8f2f4ea8efb1a66041da960078a13fec134c16f8
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 20:02:41 2020 +0200

    WIP
---
 packages/dom/src/text-quote/describe.ts | 111 +++++++++++++++++++-------------
 1 file changed, 65 insertions(+), 46 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 819fa12..2b85560 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -23,16 +23,24 @@ import seek from 'dom-seek';
 import { TextQuoteSelector } from '../../../selector/src';
 import { DomScope } from '../types';
 import { ownerDocument, rangeFromScope } from '../scope';
-import { createTextQuoteSelectorMatcher } from './match';
 
 function firstTextNodeInRange(range: Range): Text {
-  const { startContainer } = range;
-
-  if (isTextNode(startContainer)) return startContainer;
-
-  const root = range.commonAncestorContainer;
-  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-  return iter.nextNode() as Text;
+  // Find the first text node inside the range.
+  const iter = document.createNodeIterator(
+    range.commonAncestorContainer,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        // Only reveal nodes within the range; and skip any empty text nodes.
+        return range.intersectsNode(node) && node.length > 0
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT
+      },
+    },
+  );
+  const node = iter.nextNode() as Text | null;
+  if (node === null) throw new Error('Range contains no text nodes');
+  return node;
 }
 
 export async function describeTextQuote(
@@ -43,59 +51,50 @@ export async function describeTextQuote(
 
   const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };
 
-  const { prefix, suffix } = await calculateContextForDisambiguation(range, result, scope);
+  const { prefix, suffix } = calculateContextForDisambiguation(range, result, scope);
   result.prefix = prefix;
   result.suffix = suffix;
 
   return result
 }
 
-async function calculateContextForDisambiguation(
+function calculateContextForDisambiguation(
   range: Range,
   selector: TextQuoteSelector,
   scope: DomScope,
-): Promise<{ prefix?: string, suffix?: string }> {
-  const scopeAsRange = rangeFromScope(scope);
-  const root = scopeAsRange.commonAncestorContainer;
-  const text = scopeAsRange.toString();
-
-  const matcher = createTextQuoteSelectorMatcher(selector);
-
-  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-
-  const startNode = firstTextNodeInRange(range);
-  const startIndex =
-    isTextNode(range.startContainer)
-      ? seek(iter, startNode) + range.startOffset
-      : seek(iter, startNode);
-  const endIndex = startIndex + selector.exact.length;
+): { prefix?: string, suffix?: string } {
+  const exactText = selector.exact;
+  const scopeText = rangeFromScope(scope).toString();
+  const targetStartIndex = getRangeTextPosition(range, scope);
+  const targetEndIndex = targetStartIndex + exactText.length;
+
+  // Find all matches of the text in the scope.
+  const stringMatches: number[] = [];
+  let fromIndex = 0;
+  while (fromIndex < scopeText.length) {
+    const matchIndex = scopeText.indexOf(exactText, fromIndex);
+    if (matchIndex === -1) break;
+    stringMatches.push(matchIndex);
+    fromIndex = matchIndex + 1;
+  }
 
+  // Count for each undesired match the required prefix and suffix lengths, such that either of them
+  // would have invalidated the match.
   const affixLengthPairs: Array<[number, number]> = [];
-
-  for await (const match of matcher(scopeAsRange)) {
-    const matchIter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-
-    const matchStartNode = firstTextNodeInRange(match);
-    const matchStartIndex =
-      isTextNode(match.startContainer)
-        ? seek(matchIter, matchStartNode) + match.startOffset
-        : seek(matchIter, matchStartNode);
-    const matchEndIndex = matchStartIndex + match.toString().length;
-
-    // If the match is the same as the input range, continue.
-    if (matchStartIndex === startIndex || matchEndIndex === endIndex) {
+  for (const matchStartIndex of stringMatches) {
+    // Skip the found match if it is the actual target.
+    if (matchStartIndex === targetStartIndex)
       continue;
-    }
 
     // Count how many characters before & after them the false match and target have in common.
     const sufficientPrefixLength = charactersNeededToBeUnique(
-      text.substring(0, startIndex),
-      text.substring(0, matchStartIndex),
+      scopeText.substring(0, targetStartIndex),
+      scopeText.substring(0, matchStartIndex),
       true,
     );
     const sufficientSuffixLength = charactersNeededToBeUnique(
-      text.substring(endIndex),
-      text.substring(matchEndIndex),
+      scopeText.substring(targetStartIndex + exactText.length),
+      scopeText.substring(matchStartIndex + exactText.length),
       false,
     );
     affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
@@ -104,8 +103,8 @@ async function calculateContextForDisambiguation(
   // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
   // for prefix and suffix combined.
   const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
-  const prefix = text.substring(startIndex - prefixLength, startIndex);
-  const suffix = text.substring(endIndex, endIndex + suffixLength);
+  const prefix = scopeText.substring(targetStartIndex - prefixLength, targetStartIndex);
+  const suffix = scopeText.substring(targetEndIndex, targetEndIndex + suffixLength);
   return { prefix, suffix };
 }
 
@@ -143,6 +142,26 @@ function minimalSolution(requirements: Array<[number, number]>): [number, number
   return pairs[pairs.length - 1];
 }
 
+function getRangeTextPosition(range: Range, scope: DomScope): number {
+  const scopeAsRange = rangeFromScope(scope);
+  const iter = document.createNodeIterator(
+    scopeAsRange.commonAncestorContainer,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        // Only reveal nodes within the range
+        return scopeAsRange.intersectsNode(node)
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT
+      },
+    },
+  );
+  if (isTextNode(range.startContainer))
+    return seek(iter, range.startContainer) + range.startOffset;
+  else
+    return seek(iter, firstTextNodeInRange(range));
+}
+
 function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE
+  return node.nodeType === Node.TEXT_NODE;
 }


[incubator-annotator] 03/08: Move test cases into own file

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit d4d560226248f686c56c87c1bf407015d2e1c190
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 13:30:21 2020 +0200

    Move test cases into own file
---
 packages/dom/test/text-quote-match-cases.ts | 269 ++++++++++++++++++++++++++
 packages/dom/test/text-quote-match.ts       | 288 +---------------------------
 packages/dom/test/utils.ts                  |  18 ++
 3 files changed, 291 insertions(+), 284 deletions(-)

diff --git a/packages/dom/test/text-quote-match-cases.ts b/packages/dom/test/text-quote-match-cases.ts
new file mode 100644
index 0000000..33d66de
--- /dev/null
+++ b/packages/dom/test/text-quote-match-cases.ts
@@ -0,0 +1,269 @@
+import { TextQuoteSelector } from "../../selector/src";
+import { RangeInfo } from "./utils";
+
+const testCases: {
+  [name: string]: {
+    html: string,
+    selector: TextQuoteSelector,
+    expected: RangeInfo[],
+  }
+} = {
+  'simple': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 12,
+        endContainerXPath: '//b/text()',
+        endOffset: 20,
+      },
+    ],
+  },
+  'first characters': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'lorem ipsum',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 0,
+        endContainerXPath: '//b/text()',
+        endOffset: 11,
+      },
+    ],
+  },
+  'last characters': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada yada',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 23,
+        endContainerXPath: '//b/text()',
+        endOffset: 32,
+      },
+    ],
+  },
+  'across elements': {
+    html: '<b>lorem <i>ipsum</i> dolor <u>amet</u> yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()[2]',
+        startOffset: 1,
+        endContainerXPath: '//u/text()',
+        endOffset: 2,
+      },
+    ],
+  },
+  'exact element contents': {
+    html: '<b>lorem <i>ipsum dolor</i> amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'ipsum dolor',
+    },
+    expected: [
+      {
+        startContainerXPath: '//i/text()',
+        startOffset: 0,
+        endContainerXPath: '//b/text()[2]',
+        endOffset: 0,
+      },
+    ],
+  },
+  'text inside <head>': {
+    html: '<head><title>The title</title></head><b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'title',
+    },
+    expected: [
+      {
+        startContainerXPath: '//title/text()',
+        startOffset: 4,
+        endContainerXPath: '//b/text()[1]',
+        endOffset: 0,
+      },
+    ],
+  },
+  'two matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 23,
+        endContainerXPath: '//b/text()',
+        endOffset: 27,
+      },
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 28,
+        endContainerXPath: '//b/text()',
+        endOffset: 32,
+      },
+    ],
+  },
+  'overlapping matches': {
+    html: '<b>bananas</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'ana',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 1,
+        endContainerXPath: '//b/text()',
+        endOffset: 4,
+      },
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 3,
+        endContainerXPath: '//b/text()',
+        endOffset: 6,
+      },
+    ],
+  },
+  'no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'holy grail',
+    },
+    expected: [],
+  },
+  'with prefix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada',
+      prefix: 't ',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 23,
+        endContainerXPath: '//b/text()',
+        endOffset: 27,
+      },
+    ],
+  },
+  'with suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      suffix: 'l',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 13,
+        endContainerXPath: '//b/text()',
+        endOffset: 14,
+      },
+    ],
+  },
+  'with prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      prefix: 'l',
+      suffix: 're',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 1,
+        endContainerXPath: '//b/text()',
+        endOffset: 2,
+      },
+    ],
+  },
+  'with prefix and suffix, two matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'o',
+      prefix: 'l',
+      suffix: 'r',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 1,
+        endContainerXPath: '//b/text()',
+        endOffset: 2,
+      },
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 15,
+        endContainerXPath: '//b/text()',
+        endOffset: 16,
+      },
+    ],
+  },
+  'with prefix, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      prefix: 'oopsum ',
+    },
+    expected: [],
+  },
+  'with suffix, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      suffix: ' amot',
+    },
+    expected: [],
+  },
+  'with suffix, no matches due to whitespace': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      suffix: 'a',
+    },
+    expected: [],
+  },
+  'with empty prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+      prefix: '',
+      suffix: '',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 12,
+        endContainerXPath: '//b/text()',
+        endOffset: 20,
+      },
+    ],
+  },
+};
+
+export default testCases;
diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index 77f8bd4..1144bc3 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -20,284 +20,13 @@
 
 import { assert } from 'chai';
 import { createTextQuoteSelectorMatcher } from '../src/text-quote/match';
-import { TextQuoteSelector } from '../../selector/src';
+import { TextQuoteSelector } from '../../selector/src/types';
 import { DomScope } from '../src/types';
+import testCases from './text-quote-match-cases';
+import { evaluateXPath, RangeInfo } from './utils';
 
 const domParser = new window.DOMParser();
 
-// RangeInfo serialises a Range’s start and end containers as XPaths.
-type RangeInfo = {
-  startContainerXPath: string,
-  startOffset: number,
-  endContainerXPath: string,
-  endOffset: number,
-};
-
-const testCases: {
-  [name: string]: {
-    html: string,
-    selector: TextQuoteSelector,
-    expected: RangeInfo[],
-  }
-} = {
-  'simple': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor am',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 12,
-        endContainerXPath: '//b/text()',
-        endOffset: 20,
-      },
-    ]
-  },
-  'first characters': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'lorem ipsum',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 0,
-        endContainerXPath: '//b/text()',
-        endOffset: 11,
-      },
-    ]
-  },
-  'last characters': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'yada yada',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 23,
-        endContainerXPath: '//b/text()',
-        endOffset: 32,
-      },
-    ]
-  },
-  'across elements': {
-    html: '<b>lorem <i>ipsum</i> dolor <u>amet</u> yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor am',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()[2]',
-        startOffset: 1,
-        endContainerXPath: '//u/text()',
-        endOffset: 2,
-      },
-    ]
-  },
-  'exact element contents': {
-    html: '<b>lorem <i>ipsum dolor</i> amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'ipsum dolor',
-    },
-    expected: [
-      {
-        startContainerXPath: '//i/text()',
-        startOffset: 0,
-        endContainerXPath: '//b/text()[2]',
-        endOffset: 0,
-      },
-    ]
-  },
-  'text inside <head>': {
-    html: '<head><title>The title</title></head><b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'title',
-    },
-    expected: [
-      {
-        startContainerXPath: '//title/text()',
-        startOffset: 4,
-        endContainerXPath: '//b/text()[1]',
-        endOffset: 0,
-      },
-    ]
-  },
-  'two matches': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'yada',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 23,
-        endContainerXPath: '//b/text()',
-        endOffset: 27,
-      },
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 28,
-        endContainerXPath: '//b/text()',
-        endOffset: 32,
-      },
-    ]
-  },
-  'overlapping matches': {
-    html: '<b>bananas</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'ana',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 1,
-        endContainerXPath: '//b/text()',
-        endOffset: 4,
-      },
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 3,
-        endContainerXPath: '//b/text()',
-        endOffset: 6,
-      },
-    ]
-  },
-  'no matches': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'holy grail',
-    },
-    expected: []
-  },
-  'with prefix': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'yada',
-      prefix: 't ',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 23,
-        endContainerXPath: '//b/text()',
-        endOffset: 27,
-      },
-    ]
-  },
-  'with suffix': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'o',
-      suffix: 'l',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 13,
-        endContainerXPath: '//b/text()',
-        endOffset: 14,
-      },
-    ]
-  },
-  'with prefix and suffix': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'o',
-      prefix: 'l',
-      suffix: 're',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 1,
-        endContainerXPath: '//b/text()',
-        endOffset: 2,
-      },
-    ]
-  },
-  'with prefix and suffix, two matches': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'o',
-      prefix: 'l',
-      suffix: 'r',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 1,
-        endContainerXPath: '//b/text()',
-        endOffset: 2,
-      },
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 15,
-        endContainerXPath: '//b/text()',
-        endOffset: 16,
-      },
-    ]
-  },
-  'with prefix, no matches': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor',
-      prefix: 'oopsum ',
-    },
-    expected: []
-  },
-  'with suffix, no matches': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor',
-      suffix: ' amot',
-    },
-    expected: []
-  },
-  'with suffix, no matches due to whitespace': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor',
-      suffix: 'a',
-    },
-    expected: []
-  },
-  'with empty prefix and suffix': {
-    html: '<b>lorem ipsum dolor amet yada yada</b>',
-    selector: {
-      type: 'TextQuoteSelector',
-      exact: 'dolor am',
-      prefix: '',
-      suffix: '',
-    },
-    expected: [
-      {
-        startContainerXPath: '//b/text()',
-        startOffset: 12,
-        endContainerXPath: '//b/text()',
-        endOffset: 20,
-      },
-    ]
-  },
-};
-
 describe('createTextQuoteSelectorMatcher', () => {
   for (const [name, { html, selector, expected }] of Object.entries(testCases)) {
     it(`works for case: '${name}'`, async () => {
@@ -420,7 +149,7 @@ async function testMatcher(
   doc: Document,
   scope: DomScope,
   selector: TextQuoteSelector,
-  expected: RangeInfo[]
+  expected: RangeInfo[],
 ) {
   const matcher = createTextQuoteSelectorMatcher(selector);
   const matches = [];
@@ -444,15 +173,6 @@ async function testMatcher(
   });
 }
 
-function evaluateXPath(doc: Document, xpath: string): Node {
-  const result = doc.evaluate(xpath, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
-  const nodes = new Array(result.snapshotLength).fill(undefined).map((_, i) => result.snapshotItem(i));
-  assert.equal(nodes.length, 1,
-    `Test suite contains XPath with ${nodes.length} results instead of 1: '${xpath}'`
-  );
-  return nodes[0];
-}
-
 function prettyNodeName(node: Node) {
   switch (node.nodeType) {
     case Node.TEXT_NODE:
diff --git a/packages/dom/test/utils.ts b/packages/dom/test/utils.ts
new file mode 100644
index 0000000..7aaa9c9
--- /dev/null
+++ b/packages/dom/test/utils.ts
@@ -0,0 +1,18 @@
+import { assert } from "chai";
+
+// RangeInfo serialises a Range’s start and end containers as XPaths.
+export type RangeInfo = {
+  startContainerXPath: string,
+  startOffset: number,
+  endContainerXPath: string,
+  endOffset: number,
+};
+
+export function evaluateXPath(doc: Document, xpath: string): Node {
+  const result = doc.evaluate(xpath, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
+  const nodes = new Array(result.snapshotLength).fill(undefined).map((_, i) => result.snapshotItem(i));
+  assert.equal(nodes.length, 1,
+    `Test suite contains XPath with ${nodes.length} results instead of 1: '${xpath}'`
+  );
+  return nodes[0];
+}


[incubator-annotator] 05/08: Simple (but failing!) tests for minimal prefix+suffix

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit d5bd5cb0fefb8fdee8f6900b00c13f7796550eb5
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 15:52:37 2020 +0200

    Simple (but failing!) tests for minimal prefix+suffix
---
 packages/dom/test/text-quote-describe-cases.ts | 95 ++++++++++++++++++++++++++
 packages/dom/test/text-quote-describe.ts       |  9 +++
 2 files changed, 104 insertions(+)

diff --git a/packages/dom/test/text-quote-describe-cases.ts b/packages/dom/test/text-quote-describe-cases.ts
new file mode 100644
index 0000000..29c0d83
--- /dev/null
+++ b/packages/dom/test/text-quote-describe-cases.ts
@@ -0,0 +1,95 @@
+import { TextQuoteSelector } from "../../selector/src";
+import { RangeInfo } from "./utils";
+
+const testCases: {
+  [name: string]: {
+    html: string,
+    range: RangeInfo,
+    expected: TextQuoteSelector,
+  }
+} = {
+  'simple': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 12,
+      endContainerXPath: '//b/text()',
+      endOffset: 20,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+      prefix: '',
+      suffix: '',
+    },
+  },
+  'minimal prefix': {
+    html: '<b>To annotate or not to annotate.</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 22,
+      endContainerXPath: '//b/text()',
+      endOffset: 26,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'anno',
+      prefix: 'to ',
+      suffix: '',
+    },
+  },
+  'minimal suffix': {
+    html: '<b>To annotate or not to annotate.</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 7,
+      endContainerXPath: '//b/text()',
+      endOffset: 11,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'tate',
+      prefix: '',
+      suffix: ' ',
+    },
+  },
+  'use suffix for start of text': {
+    html: '<b>to annotate or not to annotate.</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 0,
+      endContainerXPath: '//b/text()',
+      endOffset: 2,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'to',
+      prefix: '',
+      suffix: ' annotate ',
+    },
+  },
+  'use prefix for end of text': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 26,
+      endContainerXPath: '//b/text()',
+      endOffset: 30,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'tate',
+      prefix: 'to anno',
+      suffix: '',
+    },
+  },
+
+  // TODO test for:
+  // emtpy range
+  // empty scope
+  // custom scope
+  // string edges
+  // element edges, across elements, etc.
+};
+
+export default testCases;
diff --git a/packages/dom/test/text-quote-describe.ts b/packages/dom/test/text-quote-describe.ts
index 8d9f74c..15447b7 100644
--- a/packages/dom/test/text-quote-describe.ts
+++ b/packages/dom/test/text-quote-describe.ts
@@ -20,12 +20,21 @@
 
 import { assert } from 'chai';
 import { describeTextQuote } from '../src/text-quote/describe';
+import testCases from './text-quote-describe-cases';
 import testMatchCases from './text-quote-match-cases';
 import { hydrateRange } from './utils';
 
 const domParser = new window.DOMParser();
 
 describe('createTextQuoteSelectorMatcher', () => {
+  for (const [name, { html, range, expected }] of Object.entries(testCases)) {
+    it(`works for case: ${name}`, async () => {
+      const doc = domParser.parseFromString(html, 'text/html');
+      const result = await describeTextQuote(hydrateRange(range, doc), doc);
+      assert.deepEqual(result, expected);
+    })
+  }
+
   describe('inverts test cases of text quote matcher', () => {
     const applicableTestCases = Object.entries(testMatchCases)
       .filter(([_, { expected }]) => expected.length > 0);


[incubator-annotator] 06/08: f describe tests name

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 035eeb750585ea2f751d05cdfa4d61b57aabdf26
Author: Gerben <ge...@treora.com>
AuthorDate: Mon May 25 18:25:20 2020 +0200

    f describe tests name
---
 packages/dom/test/text-quote-describe.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dom/test/text-quote-describe.ts b/packages/dom/test/text-quote-describe.ts
index 15447b7..749c083 100644
--- a/packages/dom/test/text-quote-describe.ts
+++ b/packages/dom/test/text-quote-describe.ts
@@ -26,7 +26,7 @@ import { hydrateRange } from './utils';
 
 const domParser = new window.DOMParser();
 
-describe('createTextQuoteSelectorMatcher', () => {
+describe('describeTextQuote', () => {
   for (const [name, { html, range, expected }] of Object.entries(testCases)) {
     it(`works for case: ${name}`, async () => {
       const doc = domParser.parseFromString(html, 'text/html');


[incubator-annotator] 01/08: Remove superfluous seek, tidy up code

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 6a09edf485d198c62807eb890284705fb075c466
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 19:34:20 2020 +0200

    Remove superfluous seek, tidy up code
---
 packages/dom/src/text-quote/match.ts | 46 ++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 3104ad3..f32afce 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -27,66 +27,62 @@ import { ownerDocument, rangeFromScope } from '../scope';
 export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): DomMatcher {
   return async function* matchAll(scope: DomScope) {
     const document = ownerDocument(scope);
-    const range = rangeFromScope(scope);
-    const root = range.commonAncestorContainer;
-    const text = range.toString();
+    const scopeAsRange = rangeFromScope(scope);
+    const scopeText = scopeAsRange.toString();
 
     const exact = selector.exact;
     const prefix = selector.prefix || '';
     const suffix = selector.suffix || '';
-    const pattern = prefix + exact + suffix;
+    const searchPattern = prefix + exact + suffix;
 
     const iter = document.createNodeIterator(
-      root,
+      scopeAsRange.commonAncestorContainer,
       NodeFilter.SHOW_TEXT,
       {
         acceptNode(node: Text) {
           // Only reveal nodes within the range; and skip any empty text nodes.
-          return range.intersectsNode(node) && node.length > 0
+          return scopeAsRange.intersectsNode(node) && node.length > 0
             ? NodeFilter.FILTER_ACCEPT
             : NodeFilter.FILTER_REJECT
         },
       },
     );
 
-    let fromIndex = 0;
-    let referenceNodeIndex = 0;
-
-    if (isTextNode(range.startContainer)) {
-      referenceNodeIndex -= range.startOffset;
-    }
+    // The index of the first character of iter.referenceNode inside the text.
+    let referenceNodeIndex = isTextNode(scopeAsRange.startContainer)
+      ? -scopeAsRange.startOffset
+      : 0;
 
-    while (fromIndex < text.length) {
-      const patternStartIndex = text.indexOf(pattern, fromIndex);
+    let fromIndex = 0;
+    while (fromIndex < scopeText.length) {
+      // Find the quote with its prefix and suffix in the string.
+      const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
       if (patternStartIndex === -1) return;
 
-      const match = document.createRange();
-
+      // Correct for the prefix and suffix lengths.
       const matchStartIndex = patternStartIndex + prefix.length;
       const matchEndIndex = matchStartIndex + exact.length;
 
-      // Seek to the start of the match.
-      referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
+      // Create a range to represent this exact quote in the dom.
+      const match = document.createRange();
 
-      // Record the start container and offset.
+      // Seek to the start of the match, make the range start there.
+      referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
       match.setStart(iter.referenceNode, matchStartIndex - referenceNodeIndex);
 
-      // Seek to the end of the match.
+      // Seek to the end of the match, make the range end there.
       referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
-
-      // Record the end container and offset.
       match.setEnd(iter.referenceNode, matchEndIndex - referenceNodeIndex);
 
       // Yield the match.
       yield match;
 
-      // Advance the search forward.
+      // Advance the search forward to detect multiple occurrences.
       fromIndex = matchStartIndex + 1;
-      referenceNodeIndex += seek(iter, fromIndex - referenceNodeIndex);
     }
   };
 }
 
 function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE
+  return node.nodeType === Node.TEXT_NODE;
 }