You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/20 21:21:03 UTC

[incubator-annotator] 05/14: Require all Chunkers to be non-empty

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit e8500bb000e262e1bf24a4828b8d9c07834d2478
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 11:55:57 2020 +0100

    Require all Chunkers to be non-empty
---
 packages/dom/src/chunker.ts                | 27 ++++++++++++++++++++-------
 packages/dom/src/seek.ts                   |  6 +-----
 packages/dom/src/text-position/describe.ts |  8 ++++----
 packages/dom/src/text-position/match.ts    | 12 +++++-------
 packages/dom/src/text-quote/describe.ts    |  8 ++++----
 packages/dom/src/text-quote/match.ts       | 12 ++++++++++--
 6 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index f671b5e..bb71857 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -53,8 +53,8 @@ export function chunkRangeEquals(range1: ChunkRange<any>, range2: ChunkRange<any
 // It is inspired by, and similar to, the DOM’s NodeIterator. (but unlike
 // NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk)
 export interface Chunker<TChunk extends Chunk<any>> {
-  // currentChunk is null only if it contains no chunks at all.
-  readonly currentChunk: TChunk | null;
+  // The chunk currently being pointed at.
+  readonly currentChunk: TChunk;
 
   // Move currentChunk to the chunk following it, and return that chunk.
   // If there are no chunks following it, keep currentChunk unchanged and return null.
@@ -74,14 +74,22 @@ export interface PartialTextNode extends Chunk<string> {
   readonly endOffset: number;
 }
 
+export class EmptyScopeError extends TypeError {
+  constructor(message?: string) {
+    super(message || 'Scope contains no text nodes.');
+  }
+}
+
 export class TextNodeChunker implements Chunker<PartialTextNode> {
 
   private iter: NodeIterator;
 
   get currentChunk() {
     const node = this.iter.referenceNode;
-    if (!isText(node))
-      return null;
+
+    // This test should not actually be needed, but it keeps TypeScript happy.
+    if (!isText(node)) throw new EmptyScopeError();
+
     return this.nodeToChunk(node);
   }
 
@@ -131,6 +139,9 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
     return range;
   }
 
+  /**
+   * @param scope A Range that overlaps with at least one text node.
+   */
   constructor(private scope: Range) {
     this.iter = ownerDocument(scope).createNodeIterator(
       scope.commonAncestorContainer,
@@ -146,9 +157,11 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
 
     // Move the iterator to after the start (= root) node.
     this.iter.nextNode();
-    // If the start node is not a text node, move it to the first text node (if any).
-    if (!isText(this.iter.referenceNode))
-      this.iter.nextNode();
+    // If the start node is not a text node, move it to the first text node.
+    if (!isText(this.iter.referenceNode)) {
+      const nextNode = this.iter.nextNode();
+      if (nextNode === null) throw new EmptyScopeError();
+    }
   }
 
   nextChunk() {
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 3832b07..75627e9 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -22,10 +22,6 @@ import { Chunk, Chunker, chunkEquals } from "./chunker";
 
 const E_END = 'Iterator exhausted before seek ended.';
 
-export interface NonEmptyChunker<TChunk extends Chunk<any>> extends Chunker<TChunk> {
-  readonly currentChunk: TChunk;
-}
-
 export interface Seeker<T extends Iterable<any> = string> {
   readonly position: number;
   read(length?: number, roundUp?: boolean): T;
@@ -56,7 +52,7 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh
   // The current text position (measured in code units)
   get position() { return this.currentChunkPosition + this.offsetInChunk; }
 
-  constructor(protected chunker: NonEmptyChunker<TChunk>) {
+  constructor(protected chunker: Chunker<TChunk>) {
     // Walk to the start of the first non-empty chunk inside the scope.
     this.seekTo(0);
   }
diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts
index 8baff8d..a711410 100644
--- a/packages/dom/src/text-position/describe.ts
+++ b/packages/dom/src/text-position/describe.ts
@@ -20,9 +20,9 @@
 
 import type { TextPositionSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
-import { Chunk, Chunker, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker } from '../chunker';
 import { CodePointSeeker } from '../code-point-seeker';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 
 export async function describeTextPosition(
   range: Range,
@@ -51,13 +51,13 @@ export async function describeTextPosition(
 
   return await abstractDescribeTextPosition(
     textChunks.rangeToChunkRange(range),
-    textChunks as NonEmptyChunker<PartialTextNode>,
+    textChunks,
   );
 }
 
 async function abstractDescribeTextPosition<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
-  scope: NonEmptyChunker<TChunk>,
+  scope: Chunker<TChunk>,
 ): Promise<TextPositionSelector> {
   const codeUnitSeeker = new TextSeeker(scope);
   const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index 53bfae3..cc8044e 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -19,9 +19,9 @@
  */
 
 import type { Matcher, TextPositionSelector } from '@annotator/selector';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 import { CodePointSeeker } from '../code-point-seeker';
-import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, ChunkRange, TextNodeChunker, Chunker } from '../chunker';
 
 export function createTextPositionSelectorMatcher(
   selector: TextPositionSelector,
@@ -31,9 +31,7 @@ export function createTextPositionSelectorMatcher(
   return async function* matchAll(scope) {
     const textChunks = new TextNodeChunker(scope);
 
-    if (textChunks.currentChunk === null)
-      throw new RangeError('Range does not contain any Text nodes.');
-    const matches = abstractMatcher(textChunks as NonEmptyChunker<PartialTextNode>);
+    const matches = abstractMatcher(textChunks);
 
     for await (const abstractMatch of matches) {
       yield textChunks.chunkRangeToRange(abstractMatch);
@@ -43,10 +41,10 @@ export function createTextPositionSelectorMatcher(
 
 export function abstractTextPositionSelectorMatcher(
   selector: TextPositionSelector,
-): <TChunk extends Chunk<any>>(scope: NonEmptyChunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
+): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
   const { start, end } = selector;
 
-  return async function* matchAll<TChunk extends Chunk<string>>(textChunks: NonEmptyChunker<TChunk>) {
+  return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) {
     const codeUnitSeeker = new TextSeeker(textChunks);
     const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
 
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 4f0c70f..2e4693e 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -22,7 +22,7 @@ import type { TextQuoteSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
 import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
 import { abstractTextQuoteSelectorMatcher } from '.';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 
 export async function describeTextQuote(
   range: Range,
@@ -57,7 +57,7 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
   scope: () => Chunker<TChunk>,
 ): Promise<TextQuoteSelector> {
-  const seeker = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+  const seeker = new TextSeeker(scope());
 
   // Read the target’s exact text.
   seeker.seekToChunk(target.startChunk, target.startIndex);
@@ -95,8 +95,8 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
 
     // We’ll have to add more prefix/suffix to disqualify this unintended match.
     const unintendedMatch = nextMatch.value;
-    const seeker1 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
-    const seeker2 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+    const seeker1 = new TextSeeker(scope());
+    const seeker2 = new TextSeeker(scope());
 
     // Count how many characters we’d need as a prefix to disqualify this match.
     seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 38e09d5..dea1f68 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -19,7 +19,7 @@
  */
 
 import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import { TextNodeChunker, Chunk, Chunker, ChunkRange, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, EmptyScopeError } from '../chunker';
 
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
@@ -27,7 +27,15 @@ export function createTextQuoteSelectorMatcher(
   const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
 
   return async function* matchAll(scope) {
-    const textChunks = new TextNodeChunker(scope);
+    let textChunks;
+    try {
+      textChunks = new TextNodeChunker(scope);
+    } catch (err) {
+      if (err instanceof EmptyScopeError)
+        return; // An empty range contains no matches.
+      else
+        throw err;
+    }
 
     for await (const abstractMatch of abstractMatcher(textChunks)) {
       yield textChunks.chunkRangeToRange(abstractMatch);