You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/22 17:39:55 UTC

[incubator-annotator] branch dom-tests updated (2b26f66 -> e3efb3b)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard 2b26f66  Add a few more cases.
     new a956d31  Add a few more cases.
     new e8f2ad9  Refactor helper functions
     new 68e16db  More tests — some failing
     new f9008da  Make reporting of failed tests more readable.
     new 76cc9c8  Update to dom-seek v5, improve its type declaration
     new a07e791  Remove dom-node-iterator polyfill
     new be49aa2  Fix failing test case
     new f57efc7  Simplify skipping of empty text nodes
     new e3efb3b  Remove superfluous seek, tidy up code

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (2b26f66)
            \
             N -- N -- N   refs/heads/dom-tests (e3efb3b)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 9 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 @types/dom-node-iterator/index.d.ts     |   4 -
 @types/dom-seek/index.d.ts              |   2 +-
 packages/dom/package.json               |   3 +-
 packages/dom/src/text-quote/describe.ts |   7 +-
 packages/dom/src/text-quote/match.ts    |  68 +++-----
 packages/dom/test/text-quote-match.ts   | 274 +++++++++++++++++++++++++++-----
 yarn.lock                               |  26 +--
 7 files changed, 268 insertions(+), 116 deletions(-)
 delete mode 100644 @types/dom-node-iterator/index.d.ts


[incubator-annotator] 03/09: More tests — some failing

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 68e16db42de9e70619ab596c515d34a939cd2b49
Author: Gerben <ge...@treora.com>
AuthorDate: Thu May 21 16:07:16 2020 +0200

    More tests — some failing
    
    Three tests fail because of an opinionated expectation: I expect a match
    at the start of a the text node to have that textNode as start container
    rather than its parent element (although technically such a Range should
    be equivalent).
    
    Two tests fail because we really mess up when the ancestorContainer of
    the scope contains text before its startContainer. Ouch.
---
 packages/dom/test/text-quote-match.ts | 149 ++++++++++++++++++++++++++++++++--
 1 file changed, 144 insertions(+), 5 deletions(-)

diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index 835a3dd..e348dbe 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -55,18 +55,48 @@ const testCases: {
       },
     ]
   },
+  'first characters': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'lorem ipsum',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 0,
+        endContainerXPath: '//b/text()',
+        endOffset: 11,
+      },
+    ]
+  },
+  'last characters': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada yada',
+    },
+    expected: [
+      {
+        startContainerXPath: '//b/text()',
+        startOffset: 23,
+        endContainerXPath: '//b/text()',
+        endOffset: 32,
+      },
+    ]
+  },
   'across elements': {
-    html: '<b>lorem <i>ipsum dolor</i> amet yada yada</b>',
+    html: '<b>lorem <i>ipsum</i> dolor <u>amet</u> yada yada</b>',
     selector: {
       type: 'TextQuoteSelector',
       exact: 'dolor am',
     },
     expected: [
       {
-        startContainerXPath: '//i/text()',
-        startOffset: 6,
-        endContainerXPath: '//b/text()[2]',
-        endOffset: 3,
+        startContainerXPath: '//b/text()[2]',
+        startOffset: 1,
+        endContainerXPath: '//u/text()',
+        endOffset: 2,
       },
     ]
   },
@@ -151,6 +181,115 @@ describe('createTextQuoteSelectorMatcher', () => {
       await testMatcher(doc, doc, selector, expected);
     });
   }
+
+  it('handles adjacent text nodes', async () => {
+    const { html, selector } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+    const textNode = evaluateXPath(doc, '//b/text()') as Text;
+
+    for (let index = textNode.length - 1; index > 0; index--)
+      textNode.splitText(index);
+    // console.log([...textNode.parentNode.childNodes].map(node => node.textContent))
+    // → 'l',  'o', 'r', 'e', 'm', …
+
+    await testMatcher(doc, doc, selector, [
+      {
+        startContainerXPath: '//b/text()[13]',
+        startOffset: 0,
+        endContainerXPath: '//b/text()[21]',
+        endOffset: 0,
+      },
+    ]);
+  });
+
+  it('handles empty text nodes', async () => {
+    const { html, selector } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const textNode = evaluateXPath(doc, '//b/text()') as Text;
+    textNode.splitText(textNode.length);
+    textNode.splitText(20);
+    textNode.splitText(20);
+    textNode.splitText(17);
+    textNode.splitText(17);
+    textNode.splitText(12);
+    textNode.splitText(12);
+    textNode.splitText(0);
+    // console.log([...textNode.parentNode.childNodes].map(node => node.textContent))
+    // → '', 'lorem ipsum ', '', 'dolor', '', ' am', '', 'et yada yada', ''
+
+    await testMatcher(doc, doc, selector, [
+      {
+        startContainerXPath: '//b/text()[4]', // "dolor"
+        startOffset: 0,
+        endContainerXPath: '//b/text()[8]', // "et yada yada"
+        endOffset: 0,
+      },
+    ]);
+  });
+
+  it('works with parent of text as scope', async () => {
+    const { html, selector, expected } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    await testMatcher(doc, evaluateXPath(doc, '//b'), selector, expected);
+  });
+
+  it('works with parent of text as scope, when matching its first characters', async () => {
+    const { html, selector, expected } = testCases['first characters'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    await testMatcher(doc, evaluateXPath(doc, '//b'), selector, expected);
+  });
+
+  it('works with parent of text as scope, when matching its first characters, with an empty text node', async () => {
+    const { html, selector } = testCases['first characters'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const textNode = evaluateXPath(doc, '//b/text()') as Text;
+    textNode.splitText(0);
+
+    await testMatcher(doc, evaluateXPath(doc, '//b'), selector, [
+      {
+        startContainerXPath: '//b/text()[2]',
+        startOffset: 0,
+        endContainerXPath: '//b/text()[2]',
+        endOffset: 11,
+      },
+    ]);
+  });
+
+  it('works when scope is a Range within one text node', async () => {
+    const { html, selector, expected } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    // Use the substring ‘ipsum dolor amet’ as scope.
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b/text()'), 6);
+    scope.setEnd(evaluateXPath(doc, '//b/text()'), 22);
+    await testMatcher(doc, scope, selector, expected);
+  });
+
+  it('works when scope is a Range with both ends inside text nodes', async () => {
+    const { html, selector, expected } = testCases['across elements'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    // Use the substring ‘sum dolor am’ as scope.
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//i/text()'), 2);
+    scope.setEnd(evaluateXPath(doc, '//u/text()'), 2);
+    await testMatcher(doc, scope, selector, expected);
+  });
+
+  it('works when scope is a Range with both ends inside elements', async () => {
+    const { html, selector, expected } = testCases['across elements'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b'), 1); // before the <i>
+    scope.setEnd(evaluateXPath(doc, '//b'), 4); // before the " yada yada"
+    await testMatcher(doc, scope, selector, expected);
+  });
 });
 
 async function testMatcher(


[incubator-annotator] 01/09: Add a few more cases.

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit a956d31fd1f88e9cd4581625588e347750114bc4
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 15 19:15:59 2020 +0200

    Add a few more cases.
---
 packages/dom/test/text-quote-match.ts | 93 +++++++++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 3 deletions(-)

diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index 4e9eb57..65d5c15 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -39,8 +39,8 @@ const testCases: {
     expected: RangeInfo[],
   }
 } = {
-  "simple": {
-    html: `<b>lorem ipsum dolor amet yada yada</b>`,
+  'simple': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
     selector: {
       type: 'TextQuoteSelector',
       exact: 'dolor am',
@@ -54,6 +54,93 @@ const testCases: {
       },
     ]
   },
+  'across elements': {
+    html: '<b>lorem <i>ipsum dolor</i> amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+    },
+    expected: [
+      {
+        startContainer: '//i/text()',
+        startOffset: 6,
+        endContainer: '//b/text()[2]',
+        endOffset: 3,
+      },
+    ]
+  },
+  'exact element contents': {
+    html: '<b>lorem <i>ipsum dolor</i> amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'ipsum dolor',
+    },
+    expected: [
+      {
+        startContainer: '//i/text()',
+        startOffset: 0,
+        endContainer: '//b/text()[2]',
+        endOffset: 0,
+      },
+    ]
+  },
+  'text inside <head>': {
+    html: '<head><title>The title</title></head><b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'title',
+    },
+    expected: [
+      {
+        startContainer: '//title/text()',
+        startOffset: 4,
+        endContainer: '//b/text()[1]',
+        endOffset: 0,
+      },
+    ]
+  },
+  'two matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'yada',
+    },
+    expected: [
+      {
+        startContainer: '//b/text()',
+        startOffset: 23,
+        endContainer: '//b/text()',
+        endOffset: 27,
+      },
+      {
+        startContainer: '//b/text()',
+        startOffset: 28,
+        endContainer: '//b/text()',
+        endOffset: 32,
+      },
+    ]
+  },
+  'overlapping matches': {
+    html: '<b>bananas</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'ana',
+    },
+    expected: [
+      {
+        startContainer: '//b/text()',
+        startOffset: 1,
+        endContainer: '//b/text()',
+        endOffset: 4,
+      },
+      {
+        startContainer: '//b/text()',
+        startOffset: 3,
+        endContainer: '//b/text()',
+        endOffset: 6,
+      },
+    ]
+  },
 };
 
 describe('createTextQuoteSelectorMatcher', () => {
@@ -61,7 +148,7 @@ describe('createTextQuoteSelectorMatcher', () => {
     it(`works for case: '${name}'`, async () => {
       const doc = domParser.parseFromString(html, 'text/html');
       const matcher = createTextQuoteSelectorMatcher(selector);
-      const matches = await asyncIterableToArray(matcher(doc.body));
+      const matches = await asyncIterableToArray(matcher(doc));
       assert.equal(matches.length, expected.length);
       matches.forEach((match, i) => {
         assert.include(match, hydrateRange(expected[i], doc));


[incubator-annotator] 02/09: Refactor helper functions

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit e8f2ad9577e4b8293cb4284f8e7b542c3612af7f
Author: Gerben <ge...@treora.com>
AuthorDate: Thu May 21 14:58:05 2020 +0200

    Refactor helper functions
---
 packages/dom/test/text-quote-match.ts | 79 ++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index 65d5c15..835a3dd 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -21,14 +21,15 @@
 import { assert } from 'chai';
 import { createTextQuoteSelectorMatcher } from '../src/text-quote/match';
 import { TextQuoteSelector } from '../../selector/src';
+import { DomScope } from '../src/types';
 
 const domParser = new window.DOMParser();
 
 // RangeInfo serialises a Range’s start and end containers as XPaths.
 type RangeInfo = {
-  startContainer: string,
+  startContainerXPath: string,
   startOffset: number,
-  endContainer: string,
+  endContainerXPath: string,
   endOffset: number,
 };
 
@@ -47,9 +48,9 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//b/text()',
+        startContainerXPath: '//b/text()',
         startOffset: 12,
-        endContainer: '//b/text()',
+        endContainerXPath: '//b/text()',
         endOffset: 20,
       },
     ]
@@ -62,9 +63,9 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//i/text()',
+        startContainerXPath: '//i/text()',
         startOffset: 6,
-        endContainer: '//b/text()[2]',
+        endContainerXPath: '//b/text()[2]',
         endOffset: 3,
       },
     ]
@@ -77,9 +78,9 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//i/text()',
+        startContainerXPath: '//i/text()',
         startOffset: 0,
-        endContainer: '//b/text()[2]',
+        endContainerXPath: '//b/text()[2]',
         endOffset: 0,
       },
     ]
@@ -92,9 +93,9 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//title/text()',
+        startContainerXPath: '//title/text()',
         startOffset: 4,
-        endContainer: '//b/text()[1]',
+        endContainerXPath: '//b/text()[1]',
         endOffset: 0,
       },
     ]
@@ -107,15 +108,15 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//b/text()',
+        startContainerXPath: '//b/text()',
         startOffset: 23,
-        endContainer: '//b/text()',
+        endContainerXPath: '//b/text()',
         endOffset: 27,
       },
       {
-        startContainer: '//b/text()',
+        startContainerXPath: '//b/text()',
         startOffset: 28,
-        endContainer: '//b/text()',
+        endContainerXPath: '//b/text()',
         endOffset: 32,
       },
     ]
@@ -128,15 +129,15 @@ const testCases: {
     },
     expected: [
       {
-        startContainer: '//b/text()',
+        startContainerXPath: '//b/text()',
         startOffset: 1,
-        endContainer: '//b/text()',
+        endContainerXPath: '//b/text()',
         endOffset: 4,
       },
       {
-        startContainer: '//b/text()',
+        startContainerXPath: '//b/text()',
         startOffset: 3,
-        endContainer: '//b/text()',
+        endContainerXPath: '//b/text()',
         endOffset: 6,
       },
     ]
@@ -147,31 +148,31 @@ describe('createTextQuoteSelectorMatcher', () => {
   for (const [name, { html, selector, expected }] of Object.entries(testCases)) {
     it(`works for case: '${name}'`, async () => {
       const doc = domParser.parseFromString(html, 'text/html');
-      const matcher = createTextQuoteSelectorMatcher(selector);
-      const matches = await asyncIterableToArray(matcher(doc));
-      assert.equal(matches.length, expected.length);
-      matches.forEach((match, i) => {
-        assert.include(match, hydrateRange(expected[i], doc));
-      });
+      await testMatcher(doc, doc, selector, expected);
     });
   }
 });
 
-async function asyncIterableToArray<T>(source: AsyncIterable<T>): Promise<T[]> {
-  const values = [];
-  for await (const value of source) {
-    values.push(value);
-  };
-  return values;
-}
-
-// Evaluate the XPath expressions to the corresponding Nodes in the DOM.
-function hydrateRange(rangeInfo: RangeInfo, doc: Document): Partial<Range> {
-  return {
-    ...rangeInfo,
-    startContainer: evaluateXPath(doc, rangeInfo.startContainer),
-    endContainer: evaluateXPath(doc, rangeInfo.endContainer),
-  }
+async function testMatcher(
+  doc: Document,
+  scope: DomScope,
+  selector: TextQuoteSelector,
+  expected: RangeInfo[]
+) {
+  const matcher = createTextQuoteSelectorMatcher(selector);
+  const matches = [];
+  for await (const value of matcher(scope))
+    matches.push(value);
+  assert.equal(matches.length, expected.length);
+  matches.forEach((match, i) => {
+    const expectedRange = expected[i];
+    assert.include(match, {
+      startContainer: evaluateXPath(doc, expectedRange.startContainerXPath),
+      startOffset: expectedRange.startOffset,
+      endContainer: evaluateXPath(doc, expectedRange.endContainerXPath),
+      endOffset: expectedRange.endOffset,
+    });
+  });
 }
 
 function evaluateXPath(doc: Document, xpath: string): Node {


[incubator-annotator] 07/09: Fix failing test case

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit be49aa238fc313141873df3367004dfd5bdb48e9
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 17:59:20 2020 +0200

    Fix failing test case
    
    Our iterator would include text outside the scope’s range but within
    its commonAncestorContainer.
    
    Same mistake is also present in describe.ts; asks for a better approach.
---
 packages/dom/src/text-quote/match.ts | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index d5ded88..cc1de71 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -36,7 +36,16 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
     const suffix = selector.suffix || '';
     const pattern = prefix + exact + suffix;
 
-    const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
+    const iter = document.createNodeIterator(
+      root,
+      NodeFilter.SHOW_TEXT,
+      {
+        acceptNode: node =>
+          range.intersectsNode(node)
+            ? NodeFilter.FILTER_ACCEPT
+            : NodeFilter.FILTER_REJECT
+      },
+    );
 
     let fromIndex = 0;
     let referenceNodeIndex = 0;


[incubator-annotator] 08/09: Simplify skipping of empty text nodes

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit f57efc7a68bd35ca6d39d6435bb41e3e8638eb7e
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 19:04:34 2020 +0200

    Simplify skipping of empty text nodes
---
 packages/dom/src/text-quote/match.ts | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index cc1de71..3104ad3 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -40,10 +40,12 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
       root,
       NodeFilter.SHOW_TEXT,
       {
-        acceptNode: node =>
-          range.intersectsNode(node)
+        acceptNode(node: Text) {
+          // Only reveal nodes within the range; and skip any empty text nodes.
+          return range.intersectsNode(node) && node.length > 0
             ? NodeFilter.FILTER_ACCEPT
             : NodeFilter.FILTER_REJECT
+        },
       },
     );
 
@@ -66,34 +68,12 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
       // Seek to the start of the match.
       referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
 
-      // Peek forward and skip over any empty nodes.
-      if (iter.nextNode()) {
-        while ((iter.referenceNode.nodeValue as String).length === 0) {
-          iter.nextNode();
-        }
-
-        // The iterator now points to the end of the reference node.
-        // Move the iterator back to the start of the reference node.
-        iter.previousNode();
-      }
-
       // Record the start container and offset.
       match.setStart(iter.referenceNode, matchStartIndex - referenceNodeIndex);
 
       // Seek to the end of the match.
       referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
 
-      // Peek forward and skip over any empty nodes.
-      if (iter.nextNode()) {
-        while ((iter.referenceNode.nodeValue as String).length === 0) {
-          iter.nextNode();
-        }
-
-        // The iterator now points to the end of the reference node.
-        // Move the iterator back to the start of the reference node.
-        iter.previousNode();
-      }
-
       // Record the end container and offset.
       match.setEnd(iter.referenceNode, matchEndIndex - referenceNodeIndex);
 


[incubator-annotator] 05/09: Update to dom-seek v5, improve its type declaration

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 76cc9c80accdeddf150d5a5504f717b7fee1400c
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 15:54:27 2020 +0200

    Update to dom-seek v5, improve its type declaration
    
    Fixes failing tests due to not seeking at all. Although perhaps we
    should not rely on dom-seek to keep its exact behaviour as is?
---
 @types/dom-seek/index.d.ts           |  2 +-
 packages/dom/package.json            |  2 +-
 packages/dom/src/text-quote/match.ts | 39 ++++++++++++++----------------------
 yarn.lock                            | 21 ++++---------------
 4 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/@types/dom-seek/index.d.ts b/@types/dom-seek/index.d.ts
index 0ba0753..5bc1bc2 100644
--- a/@types/dom-seek/index.d.ts
+++ b/@types/dom-seek/index.d.ts
@@ -1,3 +1,3 @@
 declare module 'dom-seek' {
-  export default function seek(iter: NodeIterator, where: number | Node): number;
+  export default function seek(iter: NodeIterator, where: number | Text): number;
 }
diff --git a/packages/dom/package.json b/packages/dom/package.json
index fb1f5a6..fb1aede 100644
--- a/packages/dom/package.json
+++ b/packages/dom/package.json
@@ -16,7 +16,7 @@
     "cartesian": "^1.0.1",
     "core-js": "^3.6.4",
     "dom-node-iterator": "^3.5.3",
-    "dom-seek": "^4.0.3"
+    "dom-seek": "^5.1.0"
   },
   "engines": {
     "node": ">=10.0.0"
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 2657a6f..4118fd0 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -58,18 +58,15 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
       // Seek to the start of the match.
       referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
 
-      // Normalize the reference to the start of the match.
-      if (!iter.pointerBeforeReferenceNode) {
-        // Peek forward and skip over any empty nodes.
-        if (iter.nextNode()) {
-          while ((iter.referenceNode.nodeValue as String).length === 0) {
-            iter.nextNode();
-          }
-
-          // The iterator now points to the end of the reference node.
-          // Move the iterator back to the start of the reference node.
-          iter.previousNode();
+      // Peek forward and skip over any empty nodes.
+      if (iter.nextNode()) {
+        while ((iter.referenceNode.nodeValue as String).length === 0) {
+          iter.nextNode();
         }
+
+        // The iterator now points to the end of the reference node.
+        // Move the iterator back to the start of the reference node.
+        iter.previousNode();
       }
 
       // Record the start container and offset.
@@ -78,21 +75,15 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
       // Seek to the end of the match.
       referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
 
-      // Normalize the reference to the end of the match.
-      if (!iter.pointerBeforeReferenceNode) {
-        // Peek forward and skip over any empty nodes.
-        if (iter.nextNode()) {
-          while ((iter.referenceNode.nodeValue as String).length === 0) {
-            iter.nextNode();
-          }
-
-          // The iterator now points to the end of the reference node.
-          // Move the iterator back to the start of the reference node.
-          iter.previousNode();
+      // Peek forward and skip over any empty nodes.
+      if (iter.nextNode()) {
+        while ((iter.referenceNode.nodeValue as String).length === 0) {
+          iter.nextNode();
         }
 
-        // Maybe seek backwards to the start of the node.
-        referenceNodeIndex += seek(iter, iter.referenceNode);
+        // The iterator now points to the end of the reference node.
+        // Move the iterator back to the start of the reference node.
+        iter.previousNode();
       }
 
       // Record the end container and offset.
diff --git a/yarn.lock b/yarn.lock
index 7a68012..c20b2b2 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2212,11 +2212,6 @@ ajv@^6.12.0:
     json-schema-traverse "^0.4.1"
     uri-js "^4.2.2"
 
-ancestors@0.0.3:
-  version "0.0.3"
-  resolved "https://registry.yarnpkg.com/ancestors/-/ancestors-0.0.3.tgz#124eb944447d68b302057047d15d077a9da5179d"
-  integrity sha1-Ek65RER9aLMCBXBH0V0Hep2lF50=
-
 ansi-colors@3.2.3:
   version "3.2.3"
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.3.tgz#57d35b8686e851e2cc04c403f1c00203976a1813"
@@ -3959,13 +3954,10 @@ dom-node-iterator@^3.5.3:
   resolved "https://registry.yarnpkg.com/dom-node-iterator/-/dom-node-iterator-3.5.3.tgz#32b68aa440962f1734487029f544a3db704637b7"
   integrity sha1-MraKpECWLxc0SHAp9USj23BGN7c=
 
-dom-seek@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/dom-seek/-/dom-seek-4.0.3.tgz#f14dddf04b3fb062d901c7b00b0c142a06e0a94b"
-  integrity sha1-8U3d8Es/sGLZAcewCwwUKgbgqUs=
-  dependencies:
-    ancestors "0.0.3"
-    index-of "^0.2.0"
+dom-seek@^5.1.0:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/dom-seek/-/dom-seek-5.1.1.tgz#4e35bee763b6ba082f372345823ec9665d1fbf26"
+  integrity sha512-1strSwd201Gfhfkfsk77SX9xyJGzu12gqUo5Q0W3Njtj2QxcfQTwCDOynZ6npZ4ASUFRQq0asjYDRlFxYPKwTA==
 
 domain-browser@^1.1.1:
   version "1.2.0"
@@ -5657,11 +5649,6 @@ indent-string@^4.0.0:
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
   integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
 
-index-of@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/index-of/-/index-of-0.2.0.tgz#38c1e2367ea55dffad3b6eb592ec1cc3090d7d65"
-  integrity sha1-OMHiNn6lXf+tO261kuwcwwkNfWU=
-
 indexes-of@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/indexes-of/-/indexes-of-1.0.1.tgz#f30f716c8e2bd346c7b67d3df3915566a7c05607"


[incubator-annotator] 04/09: Make reporting of failed tests more readable.

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit f9008da04636a54d95120f9f498a43e5c2bfcb20
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 14:21:21 2020 +0200

    Make reporting of failed tests more readable.
---
 packages/dom/test/text-quote-match.ts | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index e348dbe..ac5b13a 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -305,12 +305,18 @@ async function testMatcher(
   assert.equal(matches.length, expected.length);
   matches.forEach((match, i) => {
     const expectedRange = expected[i];
-    assert.include(match, {
-      startContainer: evaluateXPath(doc, expectedRange.startContainerXPath),
-      startOffset: expectedRange.startOffset,
-      endContainer: evaluateXPath(doc, expectedRange.endContainerXPath),
-      endOffset: expectedRange.endOffset,
-    });
+    const expectedStartContainer = evaluateXPath(doc, expectedRange.startContainerXPath);
+    const expectedEndContainer = evaluateXPath(doc, expectedRange.endContainerXPath);
+    assert(match.startContainer === expectedStartContainer,
+      `unexpected start container: ${prettyNodeName(match.startContainer)}; `
+      + `expected ${prettyNodeName(expectedStartContainer)}`
+    );
+    assert.equal(match.startOffset, expectedRange.startOffset);
+    assert(match.endContainer === evaluateXPath(doc, expectedRange.endContainerXPath),
+      `unexpected end container: ${prettyNodeName(match.endContainer)}; `
+      + `expected ${prettyNodeName(expectedEndContainer)}`
+    );
+    assert.equal(match.endOffset, expectedRange.endOffset);
   });
 }
 
@@ -322,3 +328,15 @@ function evaluateXPath(doc: Document, xpath: string): Node {
   );
   return nodes[0];
 }
+
+function prettyNodeName(node: Node) {
+  switch (node.nodeType) {
+    case Node.TEXT_NODE:
+      const text = (node as Text).nodeValue;
+      return `#text "${text.length > 50 ? text.substring(0, 50) + '…' : text}"`;
+    case Node.ELEMENT_NODE:
+      return `<${(node as Element).tagName.toLowerCase()}>`;
+    default:
+      return node.nodeName.toLowerCase();
+  }
+}


[incubator-annotator] 09/09: Remove superfluous seek, tidy up code

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit e3efb3b9aefd82c55a01fd5cf8810b2f35207c01
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 19:34:20 2020 +0200

    Remove superfluous seek, tidy up code
---
 packages/dom/src/text-quote/match.ts | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 3104ad3..7e869f0 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -27,9 +27,8 @@ import { ownerDocument, rangeFromScope } from '../scope';
 export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): DomMatcher {
   return async function* matchAll(scope: DomScope) {
     const document = ownerDocument(scope);
-    const range = rangeFromScope(scope);
-    const root = range.commonAncestorContainer;
-    const text = range.toString();
+    const scopeAsRange = rangeFromScope(scope);
+    const fullText = scopeAsRange.toString();
 
     const exact = selector.exact;
     const prefix = selector.prefix || '';
@@ -37,27 +36,26 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
     const pattern = prefix + exact + suffix;
 
     const iter = document.createNodeIterator(
-      root,
+      scopeAsRange.commonAncestorContainer,
       NodeFilter.SHOW_TEXT,
       {
         acceptNode(node: Text) {
           // Only reveal nodes within the range; and skip any empty text nodes.
-          return range.intersectsNode(node) && node.length > 0
+          return scopeAsRange.intersectsNode(node) && node.length > 0
             ? NodeFilter.FILTER_ACCEPT
             : NodeFilter.FILTER_REJECT
         },
       },
     );
 
-    let fromIndex = 0;
-    let referenceNodeIndex = 0;
-
-    if (isTextNode(range.startContainer)) {
-      referenceNodeIndex -= range.startOffset;
-    }
+    // The index of the first character of iter.referenceNode inside the text.
+    let referenceNodeIndex = isTextNode(scopeAsRange.startContainer)
+      ? -scopeAsRange.startOffset
+      : 0;
 
-    while (fromIndex < text.length) {
-      const patternStartIndex = text.indexOf(pattern, fromIndex);
+    let fromIndex = 0;
+    while (fromIndex < fullText.length) {
+      const patternStartIndex = fullText.indexOf(pattern, fromIndex);
       if (patternStartIndex === -1) return;
 
       const match = document.createRange();
@@ -82,11 +80,10 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
 
       // Advance the search forward.
       fromIndex = matchStartIndex + 1;
-      referenceNodeIndex += seek(iter, fromIndex - referenceNodeIndex);
     }
   };
 }
 
 function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE
+  return node.nodeType === Node.TEXT_NODE;
 }


[incubator-annotator] 06/09: Remove dom-node-iterator polyfill

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit a07e7912470f60fe1c16bc08248d42a771438f7a
Author: Gerben <ge...@treora.com>
AuthorDate: Fri May 22 17:31:30 2020 +0200

    Remove dom-node-iterator polyfill
---
 @types/dom-node-iterator/index.d.ts     | 4 ----
 packages/dom/package.json               | 1 -
 packages/dom/src/text-quote/describe.ts | 7 +++----
 packages/dom/src/text-quote/match.ts    | 3 +--
 yarn.lock                               | 5 -----
 5 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/@types/dom-node-iterator/index.d.ts b/@types/dom-node-iterator/index.d.ts
deleted file mode 100644
index 0e10887..0000000
--- a/@types/dom-node-iterator/index.d.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-declare module 'dom-node-iterator' {
-  let createNodeIterator: Document['createNodeIterator'];
-  export default createNodeIterator;
-}
diff --git a/packages/dom/package.json b/packages/dom/package.json
index fb1aede..4f0baed 100644
--- a/packages/dom/package.json
+++ b/packages/dom/package.json
@@ -15,7 +15,6 @@
     "@babel/runtime-corejs3": "^7.8.7",
     "cartesian": "^1.0.1",
     "core-js": "^3.6.4",
-    "dom-node-iterator": "^3.5.3",
     "dom-seek": "^5.1.0"
   },
   "engines": {
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index ef7eb53..57a587c 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -18,7 +18,6 @@
  * under the License.
  */
 
-import createNodeIterator from 'dom-node-iterator';
 import seek from 'dom-seek';
 
 import { TextQuoteSelector } from '../../../selector/src';
@@ -32,7 +31,7 @@ function firstTextNodeInRange(range: Range): Text {
   if (isTextNode(startContainer)) return startContainer;
 
   const root = range.commonAncestorContainer;
-  const iter = createNodeIterator(root, NodeFilter.SHOW_TEXT);
+  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
   return iter.nextNode() as Text;
 }
 
@@ -62,7 +61,7 @@ async function calculateContextForDisambiguation(
 
   const matcher = createTextQuoteSelectorMatcher(selector);
 
-  const iter = createNodeIterator(root, NodeFilter.SHOW_TEXT);
+  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
 
   const startNode = firstTextNodeInRange(range);
   const startIndex =
@@ -74,7 +73,7 @@ async function calculateContextForDisambiguation(
   const affixLengthPairs: Array<[number, number]> = [];
 
   for await (const match of matcher(scopeAsRange)) {
-    const matchIter = createNodeIterator(root, NodeFilter.SHOW_TEXT);
+    const matchIter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
 
     const matchStartNode = firstTextNodeInRange(match);
     const matchStartIndex =
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 4118fd0..d5ded88 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -18,7 +18,6 @@
  * under the License.
  */
 
-import createNodeIterator from 'dom-node-iterator';
 import seek from 'dom-seek';
 
 import { TextQuoteSelector } from '../../../selector/src';
@@ -37,7 +36,7 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
     const suffix = selector.suffix || '';
     const pattern = prefix + exact + suffix;
 
-    const iter = createNodeIterator(root, NodeFilter.SHOW_TEXT);
+    const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
 
     let fromIndex = 0;
     let referenceNodeIndex = 0;
diff --git a/yarn.lock b/yarn.lock
index c20b2b2..c7cf27a 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3949,11 +3949,6 @@ doctrine@^3.0.0:
   dependencies:
     esutils "^2.0.2"
 
-dom-node-iterator@^3.5.3:
-  version "3.5.3"
-  resolved "https://registry.yarnpkg.com/dom-node-iterator/-/dom-node-iterator-3.5.3.tgz#32b68aa440962f1734487029f544a3db704637b7"
-  integrity sha1-MraKpECWLxc0SHAp9USj23BGN7c=
-
 dom-seek@^5.1.0:
   version "5.1.1"
   resolved "https://registry.yarnpkg.com/dom-seek/-/dom-seek-5.1.1.tgz#4e35bee763b6ba082f372345823ec9665d1fbf26"