You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ra...@apache.org on 2019/06/30 16:45:20 UTC

[incubator-annotator] 03/03: Refactor @annotator/dom to be DOM-centric

This is an automated email from the ASF dual-hosted git repository.

randall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit c8597333ec34c19d34c065504ecaae727fa6086f
Author: Randall Leeds <ra...@apache.org>
AuthorDate: Sun Jun 30 09:40:35 2019 -0700

    Refactor @annotator/dom to be DOM-centric
    
    Refactor @annotator/dom to accept Node and Range as the selector scope
    and to yield Range matches.
    
    Rather than try to fit the DOM selectors into the shape of a RegExp
    match, focus on the the DOM as the way to model selector scopes and
    matches. Refinement becomes straightforward because it can now expect
    that any refinable selector can accept its matches as scopes. The range
    selector implemenattion gets simpler, too.
    
    Yielding Range objects directly from the DOM selectors means that
    callers can more easily work with the matches without dealing with DOM
    text traversal.
---
 demo/search.js                 |  25 +----
 package.json                   |   2 -
 packages/dom/src/text-quote.js | 214 +++++++++++++++++++++++++++++------------
 packages/range/src/index.js    |  21 ++--
 packages/selector/src/index.js |   9 +-
 yarn.lock                      |   5 -
 6 files changed, 164 insertions(+), 112 deletions(-)

diff --git a/demo/search.js b/demo/search.js
index f66fb25..7760088 100644
--- a/demo/search.js
+++ b/demo/search.js
@@ -16,8 +16,6 @@
 import { makeRefinable } from '@annotator/selector';
 import { createRangeSelectorCreator } from '@annotator/range';
 import { createTextQuoteSelector } from '@annotator/dom';
-import createNodeIterator from 'dom-node-iterator';
-import seek from 'dom-seek';
 
 const createSelector = makeRefinable(selector => {
   const selectorCreator = {
@@ -39,26 +37,5 @@ const createSelector = makeRefinable(selector => {
  * @return {Range}
  */
 export async function* search(root, selector) {
-  const matches = createSelector(selector)(root);
-
-  for await (let match of matches) {
-    const matchIndex = match.index;
-    const matchLength = match[0].length;
-
-    const iter = createNodeIterator(root, NodeFilter.SHOW_TEXT);
-
-    const startIndex = seek(iter, matchIndex);
-    const startContainer = iter.referenceNode;
-    const startOffset = match.index - startIndex;
-
-    const endIndex = startIndex + seek(iter, startOffset + matchLength);
-    const endContainer = iter.referenceNode;
-    const endOffset = matchIndex + matchLength - endIndex;
-
-    const range = document.createRange();
-    range.setStart(startContainer, startOffset);
-    range.setEnd(endContainer, endOffset);
-
-    yield range;
-  }
+  yield* createSelector(selector)(root);
 }
diff --git a/package.json b/package.json
index c52bf48..514191f 100644
--- a/package.json
+++ b/package.json
@@ -49,8 +49,6 @@
     "core-js": "3",
     "cross-env": "^5.2.0",
     "dom-highlight-range": "^1.0.1",
-    "dom-node-iterator": "^3.5.3",
-    "dom-seek": "^4.0.3",
     "eslint": "^5.16.0",
     "eslint-config-prettier": "^4.1.0",
     "eslint-import-resolver-babel-module": "^5.0.1",
diff --git a/packages/dom/src/text-quote.js b/packages/dom/src/text-quote.js
index 7484f65..12e5a4a 100644
--- a/packages/dom/src/text-quote.js
+++ b/packages/dom/src/text-quote.js
@@ -22,99 +22,187 @@ const TEXT_NODE = 3;
 // NodeFilter constants
 const SHOW_TEXT = 4;
 
-// Range constants
-const START_TO_START = 0;
-const END_TO_END = 2;
-
-function textContent(scope) {
-  return scope instanceof Object && 'textContent' in scope
-    ? scope.textContent
-    : String(scope);
+function firstTextNodeInRange(range) {
+  const { startContainer } = range;
+
+  if (startContainer.nodeType === TEXT_NODE) return startContainer;
+
+  const root = range.commonAncestorContainer;
+  const iter = createNodeIterator(root, SHOW_TEXT);
+  return iter.nextNode();
+}
+
+function ownerDocument(scope) {
+  if ('commonAncestorContainer' in scope) {
+    return scope.commonAncestorContainer.ownerDocument;
+  }
+
+  return scope.ownerDocument;
+}
+
+function rangeFromScope(scope) {
+  if ('commonAncestorContainer' in scope) {
+    return scope;
+  }
+
+  const document = scope.ownerDocument;
+  const range = document.createRange();
+
+  range.selectNodeContents(scope);
+
+  return range;
 }
 
 export function createTextQuoteSelector(selector) {
   return async function* matchAll(scope) {
-    const text = textContent(scope);
+    const document = ownerDocument(scope);
+    const range = rangeFromScope(scope);
+    const root = range.commonAncestorContainer;
+    const text = range.toString();
 
+    const exact = selector.exact;
     const prefix = selector.prefix || '';
     const suffix = selector.suffix || '';
-    const pattern = prefix + selector.exact + suffix;
+    const pattern = prefix + exact + suffix;
+
+    const iter = createNodeIterator(root, SHOW_TEXT);
+
+    let fromIndex = 0;
+    let referenceNodeIndex = 0;
+
+    if (range.startContainer.nodeType === TEXT_NODE) {
+      referenceNodeIndex -= range.startOffset;
+    }
+
+    while (fromIndex < text.length) {
+      const patternStartIndex = text.indexOf(pattern, fromIndex);
+      if (patternStartIndex === -1) return;
+
+      const match = document.createRange();
+
+      const matchStartIndex = patternStartIndex + prefix.length;
+      const matchEndIndex = matchStartIndex + exact.length;
+
+      // Seek to the start of the match.
+      referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
+
+      // Normalize the reference to the start of the match.
+      if (!iter.pointerBeforeReferenceNode) {
+        // Peek forward and skip over any empty nodes.
+        if (iter.nextNode()) {
+          while (iter.referenceNode.nodeValue.length === 0) {
+            iter.nextNode();
+          }
+
+          // The iterator now points to the end of the reference node.
+          // Move the iterator back to the start of the reference node.
+          iter.previousNode();
+        }
+      }
+
+      // Record the start container and offset.
+      match.setStart(iter.referenceNode, matchStartIndex - referenceNodeIndex);
+
+      // Seek to the end of the match.
+      referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
 
-    let fromIndex = -1;
+      // Normalize the reference to the end of the match.
+      if (!iter.pointerBeforeReferenceNode) {
+        // Peek forward and skip over any empty nodes.
+        if (iter.nextNode()) {
+          while (iter.referenceNode.nodeValue.length === 0) {
+            iter.nextNode();
+          }
 
-    while (true) {
-      const matchIndex = text.indexOf(pattern, fromIndex + 1);
-      if (matchIndex == -1) return;
+          // The iterator now points to the end of the reference node.
+          // Move the iterator back to the start of the reference node.
+          iter.previousNode();
+        }
 
-      const result = [selector.exact];
-      result.index = matchIndex + prefix.length;
-      result.input = text;
+        // Maybe seek backwards to the start of the node.
+        referenceNodeIndex += seek(iter, iter.referenceNode);
+      }
 
-      yield result;
+      // Record the end container and offset.
+      match.setEnd(iter.referenceNode, matchEndIndex - referenceNodeIndex);
 
-      fromIndex = matchIndex;
+      // Yield the match.
+      yield match;
+
+      // Advance the search forward.
+      fromIndex = matchStartIndex + 1;
+      referenceNodeIndex += seek(iter, fromIndex - referenceNodeIndex);
     }
   };
 }
 
 export async function describeTextQuoteByRange({ range, context }) {
-  if (context.compareBoundaryPoints(START_TO_START, range) > 0) {
-    range.setStart(context.startContainer, context.startOffset);
-  }
-
-  if (context.compareBoundaryPoints(END_TO_END, range) < 0) {
-    range.setEnd(context.endContainer, context.endOffset);
-  }
+  const root = context.commonAncestorContainer;
+  const text = context.toString();
 
-  const contextText = context.toString();
   const exact = range.toString();
+  const selector = createTextQuoteSelector({ exact });
 
-  const selector = {
-    type: 'TextQuoteSelector',
-    exact,
-  };
-
-  const root = context.commonAncestorContainer;
   const iter = createNodeIterator(root, SHOW_TEXT);
 
-  const rangeIndex =
+  const startNode = firstTextNodeInRange(range);
+  const startIndex =
     range.startContainer.nodeType === TEXT_NODE
-      ? seek(iter, range.startContainer) + range.startOffset
-      : seek(iter, range.startContainer);
-
-  const rangeEndIndex = rangeIndex + exact.length;
+      ? seek(iter, startNode) + range.startOffset
+      : seek(iter, startNode);
+  const endIndex = startIndex + exact.length;
 
-  const matches = createTextQuoteSelector(selector)(context);
   const minSuffixes = [];
   const minPrefixes = [];
-  for await (let match of matches) {
-    // For every match that is not our range, we look how many characters we
-    // have to add as prefix or suffix to disambiguate.
-    if (match.index !== rangeIndex) {
-      const matchEndIndex = match.index + match[0].length;
-      const suffixOverlap = overlap(
-        contextText.substring(matchEndIndex),
-        contextText.substring(rangeEndIndex),
-      );
-      minSuffixes.push(suffixOverlap + 1);
-      const prefixOverlap = overlapRight(
-        contextText.substring(0, match.index),
-        contextText.substring(0, rangeIndex),
-      );
-      minPrefixes.push(prefixOverlap + 1);
+
+  for await (const match of selector(context)) {
+    const matchIter = createNodeIterator(root, SHOW_TEXT);
+
+    const matchStartNode = firstTextNodeInRange(match);
+    const matchStartIndex =
+      match.startContainer.nodeType === TEXT_NODE
+        ? seek(matchIter, matchStartNode) + match.startOffset
+        : seek(matchIter, matchStartNode);
+    const matchEndIndex = matchStartIndex + match.toString().length;
+
+    // If the match is the same as the input range, continue.
+    if (matchStartIndex === startIndex || matchEndIndex === endIndex) {
+      continue;
     }
-  }
-  const [minSuffix, minPrefix] = minimalSolution(minSuffixes, minPrefixes);
-  if (minSuffix > 0) {
-    selector.suffix = contextText.substring(
-      rangeEndIndex,
-      rangeEndIndex + minSuffix,
+
+    // Determine how many prefix characters are shared.
+    const prefixOverlap = overlapRight(
+      text.substring(0, matchStartIndex),
+      text.substring(0, startIndex),
+    );
+
+    // Determine how many suffix characters are shared.
+    const suffixOverlap = overlap(
+      text.substring(matchEndIndex),
+      text.substring(endIndex),
     );
+
+    // Record the prefix or suffix lengths that would not have matched.
+    minPrefixes.push(prefixOverlap + 1);
+    minSuffixes.push(suffixOverlap + 1);
   }
-  if (minPrefix > 0) {
-    selector.prefix = contextText.substring(rangeIndex - minPrefix, rangeIndex);
+
+  // Construct and return an unambiguous selector.
+  const result = { type: 'TextQuoteSelector', exact };
+
+  if (minPrefixes.length > 0 || minSuffixes.length > 0) {
+    const [minPrefix, minSuffix] = minimalSolution(minPrefixes, minSuffixes);
+
+    if (minPrefix > 0) {
+      result.prefix = text.substring(startIndex - minPrefix, startIndex);
+    }
+
+    if (minSuffix > 0) {
+      result.suffix = text.substring(endIndex, endIndex + minSuffix);
+    }
   }
-  return selector;
+
+  return result;
 }
 
 function overlap(text1, text2) {
diff --git a/packages/range/src/index.js b/packages/range/src/index.js
index e67aa7d..2adb680 100644
--- a/packages/range/src/index.js
+++ b/packages/range/src/index.js
@@ -15,10 +15,12 @@
 
 import { product } from './cartesian.js';
 
-function textContent(scope) {
-  return scope instanceof Object && 'textContent' in scope
-    ? scope.textContent
-    : String(scope);
+function ownerDocument(scope) {
+  if ('commonAncestorContainer' in scope) {
+    return scope.commonAncestorContainer.ownerDocument;
+  }
+
+  return scope.ownerDocument;
 }
 
 export function createRangeSelectorCreator(createSelector) {
@@ -27,7 +29,7 @@ export function createRangeSelectorCreator(createSelector) {
     const endSelector = createSelector(selector.endSelector);
 
     return async function* matchAll(scope) {
-      const text = textContent(scope);
+      const document = ownerDocument(scope);
 
       const startMatches = startSelector(scope);
       const endMatches = endSelector(scope);
@@ -35,13 +37,12 @@ export function createRangeSelectorCreator(createSelector) {
       const pairs = product(startMatches, endMatches);
 
       for await (let [start, end] of pairs) {
-        if (start.index > end.index) continue;
+        const result = document.createRange();
 
-        const result = [text.substring(start.index, end.index)];
-        result.index = start.index;
-        result.input = text;
+        result.setStart(start.endContainer, start.endOffset);
+        result.setEnd(end.startContainer, end.startOffset);
 
-        yield result;
+        if (!result.collapsed) yield result;
       }
     };
   };
diff --git a/packages/selector/src/index.js b/packages/selector/src/index.js
index 1017419..698159a 100644
--- a/packages/selector/src/index.js
+++ b/packages/selector/src/index.js
@@ -22,14 +22,7 @@ export function makeRefinable(selectorCreator) {
 
       return async function* matchAll(scope) {
         for await (const match of selector(scope)) {
-          const start = match.index;
-          const end = start + match[0].length;
-
-          for await (const refiningMatch of refiningSelector(scope)) {
-            if (refiningMatch.index < start) continue;
-            if (refiningMatch.index + refiningMatch[0].length > end) continue;
-            yield refiningMatch;
-          }
+          yield* refiningSelector(match);
         }
       };
     }
diff --git a/yarn.lock b/yarn.lock
index f7f81c5..28b30d3 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -4770,11 +4770,6 @@ index-of@^0.2.0:
   resolved "https://registry.yarnpkg.com/index-of/-/index-of-0.2.0.tgz#38c1e2367ea55dffad3b6eb592ec1cc3090d7d65"
   integrity sha1-OMHiNn6lXf+tO261kuwcwwkNfWU=
 
-index-of@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/index-of/-/index-of-0.2.0.tgz#38c1e2367ea55dffad3b6eb592ec1cc3090d7d65"
-  integrity sha1-OMHiNn6lXf+tO261kuwcwwkNfWU=
-
 indexes-of@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/indexes-of/-/indexes-of-1.0.1.tgz#f30f716c8e2bd346c7b67d3df3915566a7c05607"