You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/11 17:38:02 UTC
[incubator-annotator] 01/01: Change approach,
(re)implement normalizeRange
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 0f13c976cb7779e317cd9076425a2259d2b8bafb
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 11 16:54:10 2020 +0100
Change approach, (re)implement normalizeRange
---
packages/dom/src/chunker.ts | 25 ++++++
packages/dom/src/normalize-range.ts | 135 ++++++++++++++++++++++++++++++++
packages/dom/src/seek.ts | 21 +----
packages/dom/src/text-quote/describe.ts | 20 +----
4 files changed, 166 insertions(+), 35 deletions(-)
diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index c8e3015..7209d7a 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -18,6 +18,7 @@
* under the License.
*/
+import { normalizeRange } from "./normalize-range";
import { ownerDocument } from "./owner-document";
// A Chunk represents a fragment (typically a string) of some document.
@@ -78,6 +79,12 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
const node = this.iter.referenceNode;
if (!isText(node))
return null;
+ return this.nodeToChunk(node);
+ }
+
+ nodeToChunk(node: Text): PartialTextNode {
+ if (!this.scope.intersectsNode(node))
+ throw new Error('Cannot convert node to chunk, as it falls outside of chunker’s scope.');
const startOffset = (node === this.scope.startContainer) ? this.scope.startOffset : 0;
const endOffset = (node === this.scope.endContainer) ? this.scope.endOffset : node.length;
return {
@@ -85,9 +92,27 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
startOffset,
endOffset,
data: node.data.substring(startOffset, endOffset),
+ equals(other) {
+ return (
+ other.node === this.node
+ && other.startOffset === this.startOffset
+ && other.endOffset === this.endOffset
+ );
+ },
}
}
+ rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> {
+ const textRange = normalizeRange(range);
+
+ const startChunk = this.nodeToChunk(textRange.startContainer);
+ const startIndex = textRange.startOffset - startChunk.startOffset;
+ const endChunk = this.nodeToChunk(textRange.endContainer);
+ const endIndex = textRange.endOffset - endChunk.endOffset;
+
+ return { startChunk, startIndex, endChunk, endIndex };
+ }
+
constructor(private scope: Range) {
this.iter = ownerDocument(scope).createNodeIterator(
scope.commonAncestorContainer,
diff --git a/packages/dom/src/normalize-range.ts b/packages/dom/src/normalize-range.ts
new file mode 100644
index 0000000..8616bce
--- /dev/null
+++ b/packages/dom/src/normalize-range.ts
@@ -0,0 +1,135 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ownerDocument } from "./owner-document";
+
+// TextRange is a Range that guarantees to always have Text nodes as its start
+// and end nodes. To ensure the type remains correct, it also restricts usage
+// of methods that would modify these nodes (note that a user can simply cast
+// the TextRange back to a Range to remove these restrictions).
+export interface TextRange extends Range {
+ readonly startContainer: Text;
+ readonly endContainer: Text;
+ cloneRange(): TextRange;
+
+ // Allow only Text nodes to be passed to these methods.
+ insertNode(node: Text): void;
+ selectNodeContents(node: Text): void;
+ setEnd(node: Text, offset: number): void;
+ setStart(node: Text, offset: number): void;
+
+ // Do not allow these methods to be used at all.
+ selectNode(node: never): void;
+ setEndAfter(node: never): void;
+ setEndBefore(node: never): void;
+ setStartAfter(node: never): void;
+ setStartBefore(node: never): void;
+ surroundContents(newParent: never): void;
+}
+
+// Normalise a range such that both its start and end are text nodes, and that
+// if there are equivalent text selections it takes the narrowest option (i.e.
+// it prefers the start not to be at the end of a text node, and vice versa).
+//
+// Note that if the given range does not contain non-empty text nodes, it will
+// end up pointing at a text node outside of it (after it if possible, else
+// before). If the document does not contain any text nodes, an error is thrown.
+export function normalizeRange(range: Range): TextRange {
+ const document = ownerDocument(range);
+ const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+
+ let [ startContainer, startOffset ] = snapBoundaryPointToTextNode(range.startContainer, range.startOffset);
+
+ // If we point at the end of a text node, move to the start of the next one.
+ // The step is repeated to skip over empty text nodes.
+ walker.currentNode = startContainer;
+ while (startOffset === startContainer.length && walker.nextNode()) {
+ startContainer = walker.currentNode as Text;
+ startOffset = 0;
+ }
+
+ range.setStart(startContainer, startOffset);
+
+ let [ endContainer, endOffset ] = snapBoundaryPointToTextNode(range.endContainer, range.endOffset);
+
+ // If we point at the start of a text node, move to the end of the previous one.
+ // The step is repeated to skip over empty text nodes.
+ walker.currentNode = endContainer;
+ while (endOffset === 0 && walker.previousNode()) {
+ endContainer = walker.currentNode as Text;
+ endOffset = endContainer.length;
+ }
+
+ range.setEnd(endContainer, endOffset);
+
+ return range as TextRange;
+}
+
+// Given an arbitrary boundary point, this returns either:
+// - that same boundary point, if its node is a text node;
+// - otherwise the first boundary point after it whose node is a text node, if any;
+// - otherwise, the last boundary point before it whose node is a text node.
+// If the document has no text nodes, it throws an error.
+function snapBoundaryPointToTextNode(node: Node, offset: number): [Text, number] {
+ if (isText(node))
+ return [node, offset];
+
+ // Find the node at or right after the boundary point.
+ let curNode: Node;
+ if (isCharacterData(node)) {
+ curNode = node;
+ } else if (offset < node.childNodes.length) {
+ curNode = node.childNodes[offset];
+ } else {
+ curNode = node;
+ while (curNode.nextSibling === null) {
+ if (curNode.parentNode === null) // Boundary point is at end of document
+ throw new Error('not implemented'); // TODO
+ curNode = curNode.parentNode;
+ }
+ curNode = curNode.nextSibling;
+ }
+
+ if (isText(curNode))
+ return [curNode, 0];
+
+ // Walk to the next text node, or the last if there is none.
+ const document = node.ownerDocument ?? node as Document;
+ const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+ walker.currentNode = curNode;
+ if (walker.nextNode() !== null)
+ return [walker.currentNode as Text, 0];
+ else if (walker.previousNode() !== null)
+ return [walker.currentNode as Text, (walker.currentNode as Text).length];
+ else
+ throw new Error('Document contains no text nodes.');
+}
+
+function isText(node: Node): node is Text {
+ return node.nodeType === Node.TEXT_NODE;
+}
+
+function isCharacterData(node: Node): node is CharacterData {
+ return (
+ node.nodeType === Node.PROCESSING_INSTRUCTION_NODE
+ || node.nodeType === Node.COMMENT_NODE
+ || node.nodeType === Node.TEXT_NODE
+ );
+}
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 7d7c107..cd314b6 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -18,7 +18,7 @@
* under the License.
*/
-import { Chunk, Chunker, TextNodeChunker, PartialTextNode, chunkEquals } from "./chunker";
+import { Chunk, TextNodeChunker, PartialTextNode, chunkEquals } from "./chunker";
const E_END = 'Iterator exhausted before seek ended.';
@@ -165,10 +165,8 @@ export class TextSeeker<TChunk extends Chunk<string>> implements Seeker<string>
}
export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPointer<Text> {
- constructor(chunkerOrScope: Chunker<PartialTextNode> | Range) {
- const chunker = 'currentChunk' in chunkerOrScope
- ? chunkerOrScope
- : new TextNodeChunker(chunkerOrScope);
+ constructor(scope: Range) {
+ const chunker = new TextNodeChunker(scope);
if (chunker.currentChunk === null)
throw new RangeError('Range does not contain any Text nodes.');
super(chunker as NonEmptyChunker<PartialTextNode>);
@@ -181,17 +179,4 @@ export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPo
get offsetInReferenceNode() {
return this.offsetInChunk + this.currentChunk.startOffset;
}
-
- seekToBoundaryPoint(node: Node, offset: number) {
- const document = (node.ownerDocument ?? node as Document);
- const target = document.createRange();
- target.setStart(node, offset);
- // target.setEnd(node, offset); // (implied by setting the start)
-
- // Seek step by step until we are at, or crossed, the target point.
- const reverse = !!(node.compareDocumentPosition(this.referenceNode) & Node.DOCUMENT_POSITION_PRECEDING);
- while (target.comparePoint(this.referenceNode, this.offsetInReferenceNode) === (reverse ? 1 : -1)) {
- this.seekBy(reverse ? -1 : 1);
- }
- }
}
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 8ccf47e..cbad0c3 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -20,9 +20,9 @@
import type { TextQuoteSelector } from '@annotator/selector';
import { ownerDocument } from '../owner-document';
-import { Chunk, Chunker, ChunkRange, PartialTextNode, TextNodeChunker, chunkRangeEquals } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
import { abstractTextQuoteSelectorMatcher } from '.';
-import { DomSeeker, TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker, NonEmptyChunker } from '../seek';
export async function describeTextQuote(
range: Range,
@@ -45,7 +45,7 @@ export async function describeTextQuote(
range.setEnd(scope.endContainer, scope.endOffset);
return await abstractDescribeTextQuote(
- convertRangeToChunkRange(chunker, range),
+ chunker.rangeToChunkRange(range),
chunker,
);
}
@@ -114,17 +114,3 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
throw new Error('Target cannot be disambiguated; how could that have happened‽');
}
}
-
-function convertRangeToChunkRange(chunker: Chunker<PartialTextNode>, range: Range): ChunkRange<PartialTextNode> {
- const domSeeker = new DomSeeker(chunker);
-
- domSeeker.seekToBoundaryPoint(range.startContainer, range.startOffset);
- const startChunk = domSeeker.currentChunk;
- const startIndex = domSeeker.offsetInChunk;
-
- domSeeker.seekToBoundaryPoint(range.endContainer, range.endOffset);
- const endChunk = domSeeker.currentChunk;
- const endIndex = domSeeker.offsetInChunk;
-
- return { startChunk, startIndex, endChunk, endIndex };
-}