You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/11 17:38:01 UTC

[incubator-annotator] branch import-dom-seek updated (e1df8ed -> 0f13c97)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard e1df8ed  Change approach, (re)implement normalizeRange
     new 0f13c97  Change approach, (re)implement normalizeRange

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (e1df8ed)
            \
             N -- N -- N   refs/heads/import-dom-seek (0f13c97)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 packages/dom/src/chunker.ts             | 30 +++++++++++++++++++-----------
 packages/dom/src/normalize-range.ts     |  8 ++++----
 packages/dom/src/seek.ts                |  8 +++-----
 packages/dom/src/text-quote/describe.ts |  4 ++--
 4 files changed, 28 insertions(+), 22 deletions(-)

[incubator-annotator] 01/01: Change approach, (re)implement normalizeRange

Posted by ge...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 0f13c976cb7779e317cd9076425a2259d2b8bafb
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 11 16:54:10 2020 +0100

    Change approach, (re)implement normalizeRange
---
 packages/dom/src/chunker.ts             |  25 ++++++
 packages/dom/src/normalize-range.ts     | 135 ++++++++++++++++++++++++++++++++
 packages/dom/src/seek.ts                |  21 +----
 packages/dom/src/text-quote/describe.ts |  20 +----
 4 files changed, 166 insertions(+), 35 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index c8e3015..7209d7a 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -18,6 +18,7 @@
  * under the License.
  */
 
+import { normalizeRange } from "./normalize-range";
 import { ownerDocument } from "./owner-document";
 
 // A Chunk represents a fragment (typically a string) of some document.
@@ -78,6 +79,12 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
     const node = this.iter.referenceNode;
     if (!isText(node))
       return null;
+    return this.nodeToChunk(node);
+  }
+
+  nodeToChunk(node: Text): PartialTextNode {
+    if (!this.scope.intersectsNode(node))
+      throw new Error('Cannot convert node to chunk, as it falls outside of chunker’s scope.');
     const startOffset = (node === this.scope.startContainer) ? this.scope.startOffset : 0;
     const endOffset = (node === this.scope.endContainer) ? this.scope.endOffset : node.length;
     return {
@@ -85,9 +92,27 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
       startOffset,
       endOffset,
       data: node.data.substring(startOffset, endOffset),
+      equals(other) {
+        return (
+          other.node === this.node
+          && other.startOffset === this.startOffset
+          && other.endOffset === this.endOffset
+        );
+      },
     }
   }
 
+  rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> {
+    const textRange = normalizeRange(range);
+
+    const startChunk = this.nodeToChunk(textRange.startContainer);
+    const startIndex = textRange.startOffset - startChunk.startOffset;
+    const endChunk = this.nodeToChunk(textRange.endContainer);
+    const endIndex = textRange.endOffset - endChunk.endOffset;
+
+    return { startChunk, startIndex, endChunk, endIndex };
+  }
+
   constructor(private scope: Range) {
     this.iter = ownerDocument(scope).createNodeIterator(
       scope.commonAncestorContainer,
diff --git a/packages/dom/src/normalize-range.ts b/packages/dom/src/normalize-range.ts
new file mode 100644
index 0000000..8616bce
--- /dev/null
+++ b/packages/dom/src/normalize-range.ts
@@ -0,0 +1,135 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ownerDocument } from "./owner-document";
+
+// TextRange is a Range that guarantees to always have Text nodes as its start
+// and end nodes. To ensure the type remains correct, it also restricts usage
+// of methods that would modify these nodes (note that a user can simply cast
+// the TextRange back to a Range to remove these restrictions).
+export interface TextRange extends Range {
+  readonly startContainer: Text;
+  readonly endContainer: Text;
+  cloneRange(): TextRange;
+
+  // Allow only Text nodes to be passed to these methods.
+  insertNode(node: Text): void;
+  selectNodeContents(node: Text): void;
+  setEnd(node: Text, offset: number): void;
+  setStart(node: Text, offset: number): void;
+
+  // Do not allow these methods to be used at all.
+  selectNode(node: never): void;
+  setEndAfter(node: never): void;
+  setEndBefore(node: never): void;
+  setStartAfter(node: never): void;
+  setStartBefore(node: never): void;
+  surroundContents(newParent: never): void;
+}
+
+// Normalise a range such that both its start and end are text nodes, and that
+// if there are equivalent text selections it takes the narrowest option (i.e.
+// it prefers the start not to be at the end of a text node, and vice versa).
+//
+// Note that if the given range does not contain non-empty text nodes, it will
+// end up pointing at a text node outside of it (after it if possible, else
+// before). If the document does not contain any text nodes, an error is thrown.
+export function normalizeRange(range: Range): TextRange {
+  const document = ownerDocument(range);
+  const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+
+  let [ startContainer, startOffset ] = snapBoundaryPointToTextNode(range.startContainer, range.startOffset);
+
+  // If we point at the end of a text node, move to the start of the next one.
+  // The step is repeated to skip over empty text nodes.
+  walker.currentNode = startContainer;
+  while (startOffset === startContainer.length && walker.nextNode()) {
+    startContainer = walker.currentNode as Text;
+    startOffset = 0;
+  }
+
+  range.setStart(startContainer, startOffset);
+
+  let [ endContainer, endOffset ] = snapBoundaryPointToTextNode(range.endContainer, range.endOffset);
+
+  // If we point at the start of a text node, move to the end of the previous one.
+  // The step is repeated to skip over empty text nodes.
+  walker.currentNode = endContainer;
+  while (endOffset === 0 && walker.previousNode()) {
+    endContainer = walker.currentNode as Text;
+    endOffset = endContainer.length;
+  }
+
+  range.setEnd(endContainer, endOffset);
+
+  return range as TextRange;
+}
+
+// Given an arbitrary boundary point, this returns either:
+// - that same boundary point, if its node is a text node;
+// - otherwise the first boundary point after it whose node is a text node, if any;
+// - otherwise, the last boundary point before it whose node is a text node.
+// If the document has no text nodes, it throws an error.
+function snapBoundaryPointToTextNode(node: Node, offset: number): [Text, number] {
+  if (isText(node))
+    return [node, offset];
+
+  // Find the node at or right after the boundary point.
+  let curNode: Node;
+  if (isCharacterData(node)) {
+    curNode = node;
+  } else if (offset < node.childNodes.length) {
+    curNode = node.childNodes[offset];
+  } else {
+    curNode = node;
+    while (curNode.nextSibling === null) {
+      if (curNode.parentNode === null) // Boundary point is at end of document
+        throw new Error('not implemented'); // TODO
+      curNode = curNode.parentNode;
+    }
+    curNode = curNode.nextSibling;
+  }
+
+  if (isText(curNode))
+    return [curNode, 0];
+
+  // Walk to the next text node, or the last if there is none.
+  const document = node.ownerDocument ?? node as Document;
+  const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+  walker.currentNode = curNode;
+  if (walker.nextNode() !== null)
+    return [walker.currentNode as Text, 0];
+  else if (walker.previousNode() !== null)
+    return [walker.currentNode as Text, (walker.currentNode as Text).length];
+  else
+    throw new Error('Document contains no text nodes.');
+}
+
+function isText(node: Node): node is Text {
+  return node.nodeType === Node.TEXT_NODE;
+}
+
+function isCharacterData(node: Node): node is CharacterData {
+  return (
+    node.nodeType === Node.PROCESSING_INSTRUCTION_NODE
+    || node.nodeType === Node.COMMENT_NODE
+    || node.nodeType === Node.TEXT_NODE
+  );
+}
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 7d7c107..cd314b6 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -18,7 +18,7 @@
  * under the License.
  */
 
-import { Chunk, Chunker, TextNodeChunker, PartialTextNode, chunkEquals } from "./chunker";
+import { Chunk, TextNodeChunker, PartialTextNode, chunkEquals } from "./chunker";
 
 const E_END = 'Iterator exhausted before seek ended.';
 
@@ -165,10 +165,8 @@ export class TextSeeker<TChunk extends Chunk<string>> implements Seeker<string>
 }
 
 export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPointer<Text> {
-  constructor(chunkerOrScope: Chunker<PartialTextNode> | Range) {
-    const chunker = 'currentChunk' in chunkerOrScope
-      ? chunkerOrScope
-      : new TextNodeChunker(chunkerOrScope);
+  constructor(scope: Range) {
+    const chunker = new TextNodeChunker(scope);
     if (chunker.currentChunk === null)
       throw new RangeError('Range does not contain any Text nodes.');
     super(chunker as NonEmptyChunker<PartialTextNode>);
@@ -181,17 +179,4 @@ export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPo
   get offsetInReferenceNode() {
     return this.offsetInChunk + this.currentChunk.startOffset;
   }
-
-  seekToBoundaryPoint(node: Node, offset: number) {
-    const document = (node.ownerDocument ?? node as Document);
-    const target = document.createRange();
-    target.setStart(node, offset);
-    // target.setEnd(node, offset); // (implied by setting the start)
-
-    // Seek step by step until we are at, or crossed, the target point.
-    const reverse = !!(node.compareDocumentPosition(this.referenceNode) & Node.DOCUMENT_POSITION_PRECEDING);
-    while (target.comparePoint(this.referenceNode, this.offsetInReferenceNode) === (reverse ? 1 : -1)) {
-      this.seekBy(reverse ? -1 : 1);
-    }
-  }
 }
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 8ccf47e..cbad0c3 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -20,9 +20,9 @@
 
 import type { TextQuoteSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
-import { Chunk, Chunker, ChunkRange, PartialTextNode, TextNodeChunker, chunkRangeEquals } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
 import { abstractTextQuoteSelectorMatcher } from '.';
-import { DomSeeker, TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker, NonEmptyChunker } from '../seek';
 
 export async function describeTextQuote(
   range: Range,
@@ -45,7 +45,7 @@ export async function describeTextQuote(
     range.setEnd(scope.endContainer, scope.endOffset);
 
   return await abstractDescribeTextQuote(
-    convertRangeToChunkRange(chunker, range),
+    chunker.rangeToChunkRange(range),
     chunker,
   );
 }
@@ -114,17 +114,3 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
       throw new Error('Target cannot be disambiguated; how could that have happened‽');
   }
 }
-
-function convertRangeToChunkRange(chunker: Chunker<PartialTextNode>, range: Range): ChunkRange<PartialTextNode> {
-  const domSeeker = new DomSeeker(chunker);
-
-  domSeeker.seekToBoundaryPoint(range.startContainer, range.startOffset);
-  const startChunk = domSeeker.currentChunk;
-  const startIndex = domSeeker.offsetInChunk;
-
-  domSeeker.seekToBoundaryPoint(range.endContainer, range.endOffset);
-  const endChunk = domSeeker.currentChunk;
-  const endIndex = domSeeker.offsetInChunk;
-
-  return { startChunk, startIndex, endChunk, endIndex };
-}