You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/20 21:21:03 UTC
[incubator-annotator] 05/14: Require all Chunkers to be non-empty
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit e8500bb000e262e1bf24a4828b8d9c07834d2478
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 11:55:57 2020 +0100
Require all Chunkers to be non-empty
---
packages/dom/src/chunker.ts | 27 ++++++++++++++++++++-------
packages/dom/src/seek.ts | 6 +-----
packages/dom/src/text-position/describe.ts | 8 ++++----
packages/dom/src/text-position/match.ts | 12 +++++-------
packages/dom/src/text-quote/describe.ts | 8 ++++----
packages/dom/src/text-quote/match.ts | 12 ++++++++++--
6 files changed, 44 insertions(+), 29 deletions(-)
diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index f671b5e..bb71857 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -53,8 +53,8 @@ export function chunkRangeEquals(range1: ChunkRange<any>, range2: ChunkRange<any
// It is inspired by, and similar to, the DOM’s NodeIterator. (but unlike
// NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk)
export interface Chunker<TChunk extends Chunk<any>> {
- // currentChunk is null only if it contains no chunks at all.
- readonly currentChunk: TChunk | null;
+ // The chunk currently being pointed at.
+ readonly currentChunk: TChunk;
// Move currentChunk to the chunk following it, and return that chunk.
// If there are no chunks following it, keep currentChunk unchanged and return null.
@@ -74,14 +74,22 @@ export interface PartialTextNode extends Chunk<string> {
readonly endOffset: number;
}
+export class EmptyScopeError extends TypeError {
+ constructor(message?: string) {
+ super(message || 'Scope contains no text nodes.');
+ }
+}
+
export class TextNodeChunker implements Chunker<PartialTextNode> {
private iter: NodeIterator;
get currentChunk() {
const node = this.iter.referenceNode;
- if (!isText(node))
- return null;
+
+ // This test should not actually be needed, but it keeps TypeScript happy.
+ if (!isText(node)) throw new EmptyScopeError();
+
return this.nodeToChunk(node);
}
@@ -131,6 +139,9 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
return range;
}
+ /**
+ * @param scope A Range that overlaps with at least one text node.
+ */
constructor(private scope: Range) {
this.iter = ownerDocument(scope).createNodeIterator(
scope.commonAncestorContainer,
@@ -146,9 +157,11 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
// Move the iterator to after the start (= root) node.
this.iter.nextNode();
- // If the start node is not a text node, move it to the first text node (if any).
- if (!isText(this.iter.referenceNode))
- this.iter.nextNode();
+ // If the start node is not a text node, move it to the first text node.
+ if (!isText(this.iter.referenceNode)) {
+ const nextNode = this.iter.nextNode();
+ if (nextNode === null) throw new EmptyScopeError();
+ }
}
nextChunk() {
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 3832b07..75627e9 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -22,10 +22,6 @@ import { Chunk, Chunker, chunkEquals } from "./chunker";
const E_END = 'Iterator exhausted before seek ended.';
-export interface NonEmptyChunker<TChunk extends Chunk<any>> extends Chunker<TChunk> {
- readonly currentChunk: TChunk;
-}
-
export interface Seeker<T extends Iterable<any> = string> {
readonly position: number;
read(length?: number, roundUp?: boolean): T;
@@ -56,7 +52,7 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh
// The current text position (measured in code units)
get position() { return this.currentChunkPosition + this.offsetInChunk; }
- constructor(protected chunker: NonEmptyChunker<TChunk>) {
+ constructor(protected chunker: Chunker<TChunk>) {
// Walk to the start of the first non-empty chunk inside the scope.
this.seekTo(0);
}
diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts
index 8baff8d..a711410 100644
--- a/packages/dom/src/text-position/describe.ts
+++ b/packages/dom/src/text-position/describe.ts
@@ -20,9 +20,9 @@
import type { TextPositionSelector } from '@annotator/selector';
import { ownerDocument } from '../owner-document';
-import { Chunk, Chunker, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker } from '../chunker';
import { CodePointSeeker } from '../code-point-seeker';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
export async function describeTextPosition(
range: Range,
@@ -51,13 +51,13 @@ export async function describeTextPosition(
return await abstractDescribeTextPosition(
textChunks.rangeToChunkRange(range),
- textChunks as NonEmptyChunker<PartialTextNode>,
+ textChunks,
);
}
async function abstractDescribeTextPosition<TChunk extends Chunk<string>>(
target: ChunkRange<TChunk>,
- scope: NonEmptyChunker<TChunk>,
+ scope: Chunker<TChunk>,
): Promise<TextPositionSelector> {
const codeUnitSeeker = new TextSeeker(scope);
const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index 53bfae3..cc8044e 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -19,9 +19,9 @@
*/
import type { Matcher, TextPositionSelector } from '@annotator/selector';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
import { CodePointSeeker } from '../code-point-seeker';
-import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, ChunkRange, TextNodeChunker, Chunker } from '../chunker';
export function createTextPositionSelectorMatcher(
selector: TextPositionSelector,
@@ -31,9 +31,7 @@ export function createTextPositionSelectorMatcher(
return async function* matchAll(scope) {
const textChunks = new TextNodeChunker(scope);
- if (textChunks.currentChunk === null)
- throw new RangeError('Range does not contain any Text nodes.');
- const matches = abstractMatcher(textChunks as NonEmptyChunker<PartialTextNode>);
+ const matches = abstractMatcher(textChunks);
for await (const abstractMatch of matches) {
yield textChunks.chunkRangeToRange(abstractMatch);
@@ -43,10 +41,10 @@ export function createTextPositionSelectorMatcher(
export function abstractTextPositionSelectorMatcher(
selector: TextPositionSelector,
-): <TChunk extends Chunk<any>>(scope: NonEmptyChunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
+): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
const { start, end } = selector;
- return async function* matchAll<TChunk extends Chunk<string>>(textChunks: NonEmptyChunker<TChunk>) {
+ return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) {
const codeUnitSeeker = new TextSeeker(textChunks);
const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 4f0c70f..2e4693e 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -22,7 +22,7 @@ import type { TextQuoteSelector } from '@annotator/selector';
import { ownerDocument } from '../owner-document';
import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
import { abstractTextQuoteSelectorMatcher } from '.';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
export async function describeTextQuote(
range: Range,
@@ -57,7 +57,7 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
target: ChunkRange<TChunk>,
scope: () => Chunker<TChunk>,
): Promise<TextQuoteSelector> {
- const seeker = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+ const seeker = new TextSeeker(scope());
// Read the target’s exact text.
seeker.seekToChunk(target.startChunk, target.startIndex);
@@ -95,8 +95,8 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
// We’ll have to add more prefix/suffix to disqualify this unintended match.
const unintendedMatch = nextMatch.value;
- const seeker1 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
- const seeker2 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+ const seeker1 = new TextSeeker(scope());
+ const seeker2 = new TextSeeker(scope());
// Count how many characters we’d need as a prefix to disqualify this match.
seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 38e09d5..dea1f68 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -19,7 +19,7 @@
*/
import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import { TextNodeChunker, Chunk, Chunker, ChunkRange, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, EmptyScopeError } from '../chunker';
export function createTextQuoteSelectorMatcher(
selector: TextQuoteSelector,
@@ -27,7 +27,15 @@ export function createTextQuoteSelectorMatcher(
const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
return async function* matchAll(scope) {
- const textChunks = new TextNodeChunker(scope);
+ let textChunks;
+ try {
+ textChunks = new TextNodeChunker(scope);
+ } catch (err) {
+ if (err instanceof EmptyScopeError)
+ return; // An empty range contains no matches.
+ else
+ throw err;
+ }
for await (const abstractMatch of abstractMatcher(textChunks)) {
yield textChunks.chunkRangeToRange(abstractMatch);