You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/20 12:26:23 UTC

[incubator-annotator] branch import-dom-seek updated (2ec7100 -> 436b3a0)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard 2ec7100  Export describeTextPosition & use it in demo
 discard ec53350  Create basic tests for describe text position
 discard d6a0be0  Make CodePointSeeker.seekToChunk count units, not points
 discard 84ae601  Handle half-characters in CodePointSeeker.seekToChunk
 discard 645eb77  Factor out chunkRangeToRange
 discard 5b4debb  Implement describe text position
 discard 36f1631  Make ChunkSeeker interface, drop BoundaryPointer/DomSeeker
 discard 23bb877  Make abstract TextPosition matcher
 discard 4c7fdcd  Fix normalizeRange edge case
     new b972ddc  Fix normalizeRange edge case
     new 25d753a  Make abstract TextPosition matcher
     new 15aec21  Make ChunkSeeker interface, drop BoundaryPointer/DomSeeker
     new 5706821  Implement describe text position
     new 610ba79  Factor out chunkRangeToRange
     new 9c1ef9a  Handle half-characters in CodePointSeeker.seekToChunk
     new 06ea5d5  Make CodePointSeeker.seekToChunk count units, not points
     new 114b98c  Create basic tests for describe text position
     new 213c49e  Export describeTextPosition & use it in demo
     new 953f0d4  Make demo more challenging.
     new 2fbc7bf  fix type of matchers
     new 36bb0b8  Add note on fragility. May need to rethink approach.
     new 2da2f50  Require all Chunkers to be non-empty
     new f37e397  This is what do–while was invented for :)
     new a174e4c  Refactor clip range to scope
     new 8b7d302  Refactor pre/suffix disambiguation
     new 9ccb88c  Compare *extra* pre/suffix lengths (ignore sunk costs)
     new d27ba4e  tweak comments
     new 436b3a0  Tweak seeker

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (2ec7100)
            \
             N -- N -- N   refs/heads/import-dom-seek (436b3a0)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 19 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 packages/dom/src/chunker.ts                |  38 +++++++---
 packages/dom/src/seek.ts                   |  10 +--
 packages/dom/src/text-position/describe.ts |  15 ++--
 packages/dom/src/text-position/match.ts    |  22 ++----
 packages/dom/src/text-quote/describe.ts    | 112 ++++++++++++-----------------
 packages/dom/src/text-quote/match.ts       |  32 ++++-----
 web/demo/index.html                        |   4 +-
 7 files changed, 108 insertions(+), 125 deletions(-)


[incubator-annotator] 07/19: Make CodePointSeeker.seekToChunk count units, not points

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 06ea5d5adc5031ffe1b70b88c9630ecc159774ac
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 18 18:57:38 2020 +0100

    Make CodePointSeeker.seekToChunk count units, not points
    
    We don’t actually ever want to count chunk offsets as code points. Note
    its (unused) offsetInChunk method also returns the number in code units.
    
    Ideally the caller would invoke the TextSeeker’s (= codeUnitSeeker’s)
    seekToChunk method. But, currently, doing so would not update the
    codePointSeeker’s position.
---
 packages/dom/src/code-point-seeker.ts | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/packages/dom/src/code-point-seeker.ts b/packages/dom/src/code-point-seeker.ts
index 10cb1b1..b97089e 100644
--- a/packages/dom/src/code-point-seeker.ts
+++ b/packages/dom/src/code-point-seeker.ts
@@ -61,10 +61,9 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke
   private _readOrSeekToChunk(read: true, target: TChunk, offset?: number): string[]
   private _readOrSeekToChunk(read: false, target: TChunk, offset?: number): void
   private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0) {
-    const oldPosition = this.position;
     const oldRawPosition = this.raw.position;
 
-    let s = this.raw.readToChunk(target, 0);
+    let s = this.raw.readToChunk(target, offset);
 
     const movedForward = this.raw.position >= oldRawPosition;
 
@@ -82,25 +81,7 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke
       ? this.position + result.length
       : this.position - result.length;
 
-    const targetPosition = this.position + offset;
-    if (!read) {
-      this.seekTo(targetPosition);
-    } else {
-      if (targetPosition >= this.position) {
-        // Read further until the target.
-        result = result.concat(this.readTo(targetPosition));
-      }
-      else if (targetPosition >= oldPosition) {
-        // We passed by our target position: step back.
-        this.seekTo(targetPosition);
-        result = result.slice(0, targetPosition - oldPosition);
-      } else {
-        // The target precedes our starting position: read backwards from there.
-        this.seekTo(oldPosition);
-        result = this.readTo(targetPosition);
-      }
-      return result;
-    }
+    if (read) return result;
   }
 
   private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string[];


[incubator-annotator] 09/19: Export describeTextPosition & use it in demo

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 213c49e3a5259836a4aa01a7c683270ebc8e03c4
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 18 19:46:19 2020 +0100

    Export describeTextPosition & use it in demo
---
 packages/dom/src/index.ts |  1 +
 web/demo/index.html       |  9 +++++++++
 web/demo/index.js         | 14 +++++++++++---
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/packages/dom/src/index.ts b/packages/dom/src/index.ts
index 3d7ca58..cd7d2ea 100644
--- a/packages/dom/src/index.ts
+++ b/packages/dom/src/index.ts
@@ -21,4 +21,5 @@
 export * from './css';
 export * from './range';
 export * from './text-quote';
+export * from './text-position';
 export * from './highlight-range';
diff --git a/web/demo/index.html b/web/demo/index.html
index ad2d0a2..3ed0961 100644
--- a/web/demo/index.html
+++ b/web/demo/index.html
@@ -72,6 +72,15 @@ under the License.
           Upon a change of selection, a
           <a rel="external" href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-quote-selector" target="_blank">TextQuoteSelector</a>
           will be created, that describes what was selected.</p>
+          <form id="form">
+            The selector can work either
+            <br/>
+            <input type="radio" name="describeMode" value="TextQuote" id="describeModeTextQuote" checked>
+            <label for="describeModeTextQuote">by quoting the selected text</label>; or
+            </br>
+            <input type="radio" name="describeMode" value="TextPosition" id="describeModeTextPosition">
+            <label for="describeModeTextPosition">by counting the selected characters’ position in the text</label>.
+          </form>
       </div>
       <div class="column">
         <h2>Text is found here</h2>
diff --git a/web/demo/index.js b/web/demo/index.js
index cb96a40..842e074 100644
--- a/web/demo/index.js
+++ b/web/demo/index.js
@@ -18,12 +18,14 @@
  * under the License.
  */
 
-/* global info, module, source, target */
+/* global info, module, source, target, form */
 
 import {
   makeCreateRangeSelectorMatcher,
   createTextQuoteSelectorMatcher,
   describeTextQuote,
+  createTextPositionSelectorMatcher,
+  describeTextPosition,
   highlightRange,
 } from '@annotator/dom';
 import { makeRefinable } from '@annotator/selector';
@@ -88,13 +90,14 @@ function cleanup() {
   while ((removeHighlight = cleanupFunctions.shift())) {
     removeHighlight();
   }
-  target.normalize();
+  // target.normalize();
   info.innerText = '';
 }
 
 const createMatcher = makeRefinable((selector) => {
   const innerCreateMatcher = {
     TextQuoteSelector: createTextQuoteSelectorMatcher,
+    TextPositionSelector: createTextPositionSelectorMatcher,
     RangeSelector: makeCreateRangeSelectorMatcher(createMatcher),
   }[selector.type];
 
@@ -126,12 +129,16 @@ async function anchor(selector) {
 
 async function onSelectionChange() {
   cleanup();
+  const describeMode = form.describeMode.value;
   const scope = document.createRange();
   scope.selectNodeContents(source);
   const selection = document.getSelection();
   for (let i = 0; i < selection.rangeCount; i++) {
     const range = selection.getRangeAt(i);
-    const selector = await describeTextQuote(range, scope);
+    const selector =
+      describeMode === 'TextPosition'
+        ? await describeTextPosition(range, scope)
+        : await describeTextQuote(range, scope);
     await anchor(selector);
   }
 }
@@ -146,6 +153,7 @@ function onSelectorExampleClick(event) {
 }
 
 document.addEventListener('selectionchange', onSelectionChange);
+form.addEventListener('change', onSelectionChange);
 document.addEventListener('click', onSelectorExampleClick);
 
 if (module.hot) {


[incubator-annotator] 11/19: fix type of matchers

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 2fbc7bfa7723e462af3986b4b1adcd049397c331
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Nov 19 17:48:40 2020 +0100

    fix type of matchers
---
 packages/dom/src/text-position/match.ts | 12 +++---------
 packages/dom/src/text-quote/match.ts    | 11 +++--------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index aa5fe49..53bfae3 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -26,9 +26,7 @@ import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker'
 export function createTextPositionSelectorMatcher(
   selector: TextPositionSelector,
 ): Matcher<Range, Range> {
-
-  const abstractMatcher: AbstractMatcher<PartialTextNode> =
-    abstractTextPositionSelectorMatcher(selector);
+  const abstractMatcher = abstractTextPositionSelectorMatcher(selector);
 
   return async function* matchAll(scope) {
     const textChunks = new TextNodeChunker(scope);
@@ -43,13 +41,9 @@ export function createTextPositionSelectorMatcher(
   };
 }
 
-type AbstractMatcher<TChunk extends Chunk<any>> =
-  Matcher<NonEmptyChunker<TChunk>, ChunkRange<TChunk>>
-
-export function abstractTextPositionSelectorMatcher<TChunk extends Chunk<string>>(
+export function abstractTextPositionSelectorMatcher(
   selector: TextPositionSelector,
-): AbstractMatcher<TChunk> {
-
+): <TChunk extends Chunk<any>>(scope: NonEmptyChunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
   const { start, end } = selector;
 
   return async function* matchAll<TChunk extends Chunk<string>>(textChunks: NonEmptyChunker<TChunk>) {
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 37a75ba..38e09d5 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -24,9 +24,7 @@ import { TextNodeChunker, Chunk, Chunker, ChunkRange, PartialTextNode } from '..
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
 ): Matcher<Range, Range> {
-
-  const abstractMatcher: AbstractMatcher<PartialTextNode> =
-    abstractTextQuoteSelectorMatcher(selector);
+  const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
 
   return async function* matchAll(scope) {
     const textChunks = new TextNodeChunker(scope);
@@ -37,12 +35,9 @@ export function createTextQuoteSelectorMatcher(
   }
 }
 
-type AbstractMatcher<TChunk extends Chunk<any>> =
-  Matcher<Chunker<TChunk>, ChunkRange<TChunk>>
-
-export function abstractTextQuoteSelectorMatcher<TChunk extends Chunk<string>>(
+export function abstractTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
-): AbstractMatcher<TChunk> {
+): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
   return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) {
     const exact = selector.exact;
     const prefix = selector.prefix || '';


[incubator-annotator] 19/19: Tweak seeker

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 436b3a09f168c05f22bdfcf9bed004dd81690a15
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 13:07:16 2020 +0100

    Tweak seeker
---
 packages/dom/src/seek.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 75627e9..a1f52c8 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -83,7 +83,7 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh
 
   private _readOrSeekToChunk(read: true, target: TChunk, offset?: number): string
   private _readOrSeekToChunk(read: false, target: TChunk, offset?: number): void
-  private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0): string {
+  private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0): string | void {
     const oldPosition = this.position;
     let result = '';
 
@@ -124,8 +124,8 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh
         this.seekTo(oldPosition);
         result = this.readTo(targetPosition);
       }
+      return result;
     }
-    return result;
   }
 
   private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string


[incubator-annotator] 03/19: Make ChunkSeeker interface, drop BoundaryPointer/DomSeeker

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 15aec219ed6e489a8e6d405dcf1b524ef6cb523f
Author: Gerben <ge...@treora.com>
AuthorDate: Mon Nov 16 23:00:13 2020 +0100

    Make ChunkSeeker interface, drop BoundaryPointer/DomSeeker
---
 packages/dom/src/chunker.ts           |  2 +-
 packages/dom/src/code-point-seeker.ts | 67 +++++++++++++++++++++++++++--------
 packages/dom/src/seek.ts              | 33 +++++------------
 3 files changed, 62 insertions(+), 40 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index e636509..78b8646 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -24,7 +24,7 @@ import { ownerDocument } from "./owner-document";
 // A Chunk represents a fragment (typically a string) of some document.
 // Subclasses can add further attributes to map the chunk to its position in the
 // data structure it came from (e.g. a DOM node).
-export interface Chunk<TData extends any> {
+export interface Chunk<TData> {
   readonly data: TData;
   equals?(otherChunk: this): boolean;
 }
diff --git a/packages/dom/src/code-point-seeker.ts b/packages/dom/src/code-point-seeker.ts
index 9adc89c..b0a95cd 100644
--- a/packages/dom/src/code-point-seeker.ts
+++ b/packages/dom/src/code-point-seeker.ts
@@ -18,12 +18,13 @@
  * under the License.
  */
 
-import { Seeker, BoundaryPointer } from "./seek";
+import { ChunkSeeker } from "./seek";
+import { Chunk } from "./chunker";
 
-class _CodePointSeeker implements Seeker<string[]> {
+export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TChunk, string[]> {
   position = 0;
 
-  constructor(public readonly raw: Seeker<string>) {}
+  constructor(public readonly raw: ChunkSeeker<TChunk>) {}
 
   seekBy(length: number) {
     this.seekTo(this.position + length);
@@ -41,6 +42,54 @@ class _CodePointSeeker implements Seeker<string[]> {
     return this._readOrSeekTo(true, target, roundUp);
   }
 
+  get currentChunk() {
+    return this.raw.currentChunk;
+  }
+
+  get offsetInChunk() {
+    return this.raw.offsetInChunk;
+  }
+
+  seekToChunk(target: TChunk, offset: number = 0) {
+    this._readOrSeekToChunk(false, target, offset);
+  }
+
+  readToChunk(target: TChunk, offset: number = 0) {
+    return this._readOrSeekToChunk(true, target, offset);
+  }
+
+  private _readOrSeekToChunk(read: true, target: TChunk, offset?: number): string[]
+  private _readOrSeekToChunk(read: false, target: TChunk, offset?: number): void
+  private _readOrSeekToChunk(read: boolean, target: TChunk, offset: number = 0) {
+    const oldPosition = this.position;
+    const oldRawPosition = this.raw.position;
+
+    let result = [...this.raw.readToChunk(target, 0)];
+    this.position = this.raw.position >= oldRawPosition
+      ? this.position + result.length
+      : this.position - result.length;
+
+    const targetPosition = this.position + offset;
+    if (!read) {
+      this.seekTo(targetPosition);
+    } else {
+      if (targetPosition >= this.position) {
+        // Read further until the target.
+        result = result.concat(this.readTo(targetPosition));
+      }
+      else if (targetPosition >= oldPosition) {
+        // We passed by our target position: step back.
+        this.seekTo(targetPosition);
+        result = result.slice(0, targetPosition - oldPosition);
+      } else {
+        // The target precedes our starting position: read backwards from there.
+        this.seekTo(oldPosition);
+        result = this.readTo(targetPosition);
+      }
+    }
+    return result;
+  }
+
   private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string[];
   private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void;
   private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string[] | void {
@@ -96,18 +145,6 @@ class _CodePointSeeker implements Seeker<string[]> {
   }
 }
 
-export class CodePointSeeker extends _CodePointSeeker implements Seeker<string[]>, BoundaryPointer<string[]> {
-  constructor(public readonly raw: Seeker<string> & BoundaryPointer<Text>) {
-    super(raw);
-  }
-
-  get referenceNode() { return [...this.raw.referenceNode.data] };
-  get offsetInReferenceNode() {
-    const substring = this.raw.referenceNode.data.substring(0, this.raw.offsetInReferenceNode);
-    return [...substring].length;
-  };
-}
-
 function endsWithinCharacter(s: string) {
   const codeUnit = s.charCodeAt(s.length - 1);
   return (0xD800 <= codeUnit && codeUnit <= 0xDBFF)
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 00feaee..3832b07 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -18,7 +18,7 @@
  * under the License.
  */
 
-import { Chunk, Chunker, TextNodeChunker, PartialTextNode, chunkEquals } from "./chunker";
+import { Chunk, Chunker, chunkEquals } from "./chunker";
 
 const E_END = 'Iterator exhausted before seek ended.';
 
@@ -26,11 +26,6 @@ export interface NonEmptyChunker<TChunk extends Chunk<any>> extends Chunker<TChu
   readonly currentChunk: TChunk;
 }
 
-export interface BoundaryPointer<T extends any> {
-  readonly referenceNode: T;
-  readonly offsetInReferenceNode: number;
-}
-
 export interface Seeker<T extends Iterable<any> = string> {
   readonly position: number;
   read(length?: number, roundUp?: boolean): T;
@@ -39,7 +34,14 @@ export interface Seeker<T extends Iterable<any> = string> {
   seekTo(target: number): void;
 }
 
-export class TextSeeker<TChunk extends Chunk<string>> implements Seeker<string> {
+export interface ChunkSeeker<TChunk extends Chunk<any>, T extends Iterable<any> = string> extends Seeker<T> {
+  readonly currentChunk: TChunk;
+  readonly offsetInChunk: number;
+  seekToChunk(chunk: TChunk, offset?: number): void;
+  readToChunk(chunk: TChunk, offset?: number): T;
+}
+
+export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TChunk> {
   // The chunk containing our current text position.
   get currentChunk() {
     return this.chunker.currentChunk;
@@ -210,20 +212,3 @@ export class TextSeeker<TChunk extends Chunk<string>> implements Seeker<string>
     return [data, previousChunk];
   }
 }
-
-export class DomSeeker extends TextSeeker<PartialTextNode> implements BoundaryPointer<Text> {
-  constructor(scope: Range) {
-    const chunker = new TextNodeChunker(scope);
-    if (chunker.currentChunk === null)
-      throw new RangeError('Range does not contain any Text nodes.');
-    super(chunker as NonEmptyChunker<PartialTextNode>);
-  }
-
-  get referenceNode() {
-    return this.currentChunk.node;
-  }
-
-  get offsetInReferenceNode() {
-    return this.offsetInChunk + this.currentChunk.startOffset;
-  }
-}


[incubator-annotator] 02/19: Make abstract TextPosition matcher

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 25d753a973aae0566c4bb6a3e47de521a379744e
Author: Gerben <ge...@treora.com>
AuthorDate: Mon Nov 16 22:55:15 2020 +0100

    Make abstract TextPosition matcher
---
 packages/dom/src/text-position/match.ts | 51 +++++++++++++++++++++++----------
 packages/dom/src/text-quote/match.ts    | 13 +++++----
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index 7b9f1c5..985e278 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -20,32 +20,53 @@
 
 import type { Matcher, TextPositionSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
-import { DomSeeker } from '../seek';
+import { TextSeeker, NonEmptyChunker } from '../seek';
 import { CodePointSeeker } from '../code-point-seeker';
+import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
 
 export function createTextPositionSelectorMatcher(
   selector: TextPositionSelector,
 ): Matcher<Range, Range> {
+
+  const abstractMatcher: AbstractMatcher<PartialTextNode> =
+    abstractTextPositionSelectorMatcher(selector);
+
   return async function* matchAll(scope) {
-    const document = ownerDocument(scope);
+    const textChunks = new TextNodeChunker(scope);
 
-    const { start, end } = selector;
+    if (textChunks.currentChunk === null)
+      throw new RangeError('Range does not contain any Text nodes.');
+    const matches = abstractMatcher(textChunks as NonEmptyChunker<PartialTextNode>);
 
-    const codeUnitSeeker = new DomSeeker(scope);
-    const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
+    for await (const abstractMatch of matches) {
+      const match = ownerDocument(scope).createRange();
+      match.setStart(abstractMatch.startChunk.node, abstractMatch.startChunk.startOffset + abstractMatch.startIndex);
+      match.setEnd(abstractMatch.endChunk.node, abstractMatch.endChunk.startOffset + abstractMatch.endIndex);
+      yield match;
+    }
+  };
+}
 
-    // Create a range to represent the described text in the dom.
-    const match = document.createRange();
+type AbstractMatcher<TChunk extends Chunk<any>> =
+  Matcher<NonEmptyChunker<TChunk>, ChunkRange<TChunk>>
 
-    // Seek to the start of the match, make the range start there.
-    codePointSeeker.seekTo(start);
-    match.setStart(codeUnitSeeker.referenceNode, codeUnitSeeker.offsetInReferenceNode);
+export function abstractTextPositionSelectorMatcher<TChunk extends Chunk<string>>(
+  selector: TextPositionSelector,
+): AbstractMatcher<TChunk> {
+
+  const { start, end } = selector;
+
+  return async function* matchAll<TChunk extends Chunk<string>>(textChunks: NonEmptyChunker<TChunk>) {
+    const codeUnitSeeker = new TextSeeker(textChunks);
+    const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
 
-    // Seek to the end of the match, make the range end there.
+    codePointSeeker.seekTo(start);
+    const startChunk = codeUnitSeeker.currentChunk;
+    const startIndex = codeUnitSeeker.offsetInChunk;
     codePointSeeker.seekTo(end);
-    match.setEnd(codeUnitSeeker.referenceNode, codeUnitSeeker.offsetInReferenceNode);
+    const endChunk = codeUnitSeeker.currentChunk;
+    const endIndex = codeUnitSeeker.offsetInChunk;
 
-    // Yield the match.
-    yield match;
-  };
+    yield { startChunk, startIndex, endChunk, endIndex };
+  }
 }
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 5a4df52..f7c8da3 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -19,12 +19,15 @@
  */
 
 import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import { TextNodeChunker, Chunk, Chunker, ChunkRange } from '../chunker';
+import { TextNodeChunker, Chunk, Chunker, ChunkRange, PartialTextNode } from '../chunker';
 
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
 ): Matcher<Range, Range> {
-  const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
+
+  const abstractMatcher: AbstractMatcher<PartialTextNode> =
+    abstractTextQuoteSelectorMatcher(selector);
+
   return async function* matchAll(scope) {
     const textChunks = new TextNodeChunker(scope);
 
@@ -41,12 +44,12 @@ export function createTextQuoteSelectorMatcher(
   }
 }
 
-type AbstractMatcher<TChunk extends Chunk<string>> =
+type AbstractMatcher<TChunk extends Chunk<any>> =
   Matcher<Chunker<TChunk>, ChunkRange<TChunk>>
 
-export function abstractTextQuoteSelectorMatcher(
+export function abstractTextQuoteSelectorMatcher<TChunk extends Chunk<string>>(
   selector: TextQuoteSelector,
-): AbstractMatcher<any> {
+): AbstractMatcher<TChunk> {
   return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) {
     const exact = selector.exact;
     const prefix = selector.prefix || '';


[incubator-annotator] 18/19: tweak comments

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit d27ba4e2e17796ebe95a8bb9baafe3347708b11d
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 13:06:30 2020 +0100

    tweak comments
---
 packages/dom/src/text-quote/describe.ts | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 81cc4fa..756df1e 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -72,19 +72,21 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
     const matches = abstractTextQuoteSelectorMatcher(tentativeSelector)(scope());
     let nextMatch = await matches.next();
 
+    // If this match is the intended one, no need to act.
     // XXX This test is fragile: nextMatch and target are assumed to be normalised.
     if (!nextMatch.done && chunkRangeEquals(nextMatch.value, target)) {
-      // This match is the intended one, ignore it.
       nextMatch = await matches.next();
     }
 
     // If there are no more unintended matches, our selector is unambiguous!
     if (nextMatch.done) return tentativeSelector;
 
-    // A subsequent search could safely skip the part we already processed,
-    // we’d need the matcher to start at the seeker’s position, instead of
-    // searching in the whole current chunk.
-    // seeker.seekBy(-prefix.length + 1);
+    // Possible optimisation: A subsequent search could safely skip the part we
+    // already processed, instead of starting from the beginning again. But we’d
+    // need the matcher to start at the seeker’s position, instead of searching
+    // in the whole current chunk. Then we could just seek back to just after
+    // the start of the prefix: seeker.seekBy(-prefix.length + 1); (don’t forget
+    // to also correct for any changes in the prefix we will make below)
 
     // We’ll have to add more prefix/suffix to disqualify this unintended match.
     const unintendedMatch = nextMatch.value;


[incubator-annotator] 12/19: Add note on fragility. May need to rethink approach.

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 36bb0b83dbfca3d78e9b2156a57b9d7e91ff504f
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Nov 19 18:16:10 2020 +0100

    Add note on fragility. May need to rethink approach.
---
 packages/dom/src/text-quote/describe.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 1a7941d..4f0c70f 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -79,6 +79,7 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
     const matches = abstractTextQuoteSelectorMatcher(tentativeSelector)(scope());
     let nextMatch = await matches.next();
 
+    // XXX This test is fragile: nextMatch and target are assumed to be normalised.
     if (!nextMatch.done && chunkRangeEquals(nextMatch.value, target)) {
       // This match is the intended one, ignore it.
       nextMatch = await matches.next();


[incubator-annotator] 01/19: Fix normalizeRange edge case

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit b972ddc5afee7bd021c066c79e7b17b6f215625f
Author: Gerben <ge...@treora.com>
AuthorDate: Mon Nov 16 20:38:01 2020 +0100

    Fix normalizeRange edge case
    
    A collapsed range add the start of the scope would end up, after being
    normalised, at the end of the text node just before the scope; which
    would not be shown by the chunker. The only solution I see is to tell
    the normalisation function about the scope.
---
 packages/dom/src/chunker.ts         |  4 +---
 packages/dom/src/normalize-range.ts | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index 574b627..e636509 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -106,9 +106,7 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
   }
 
   rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> {
-    const textRange = normalizeRange(range);
-    // FIXME: normalizeRange can mess up: a collapsed range at the very end of
-    // the chunker’s scope might move to the next text node outside the scope.
+    const textRange = normalizeRange(range, this.scope);
 
     const startChunk = this.nodeToChunk(textRange.startContainer);
     const startIndex = textRange.startOffset - startChunk.startOffset;
diff --git a/packages/dom/src/normalize-range.ts b/packages/dom/src/normalize-range.ts
index 0fece41..a4a758e 100644
--- a/packages/dom/src/normalize-range.ts
+++ b/packages/dom/src/normalize-range.ts
@@ -50,14 +50,24 @@ export interface TextRange extends Range {
 //
 // If there is no text between the start and end, they thus collapse onto one a
 // single position; and if there are multiple equivalent positions, it takes the
-// first one.
+// first one; or, if scope is passed, the first equivalent falling within scope.
 //
 // Note that if the given range does not contain non-empty text nodes, it will
-// end up pointing at a text node outside of it (after it if possible, else
-// before). If the document does not contain any text nodes, an error is thrown.
-export function normalizeRange(range: Range): TextRange {
+// end up pointing at a text node outside of it (before it if possible, else
+// after). If the document does not contain any text nodes, an error is thrown.
+export function normalizeRange(range: Range, scope?: Range): TextRange {
   const document = ownerDocument(range);
-  const walker = document.createTreeWalker(document, NodeFilter.SHOW_TEXT);
+  const walker = document.createTreeWalker(
+    document,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        return (!scope || scope.intersectsNode(node))
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT;
+      },
+    },
+  );
 
   let [ startContainer, startOffset ] = snapBoundaryPointToTextNode(range.startContainer, range.startOffset);
 
@@ -69,6 +79,7 @@ export function normalizeRange(range: Range): TextRange {
     startOffset = 0;
   }
 
+  // Set the range’s start; note this might move its end too.
   range.setStart(startContainer, startOffset);
 
   let [ endContainer, endOffset ] = snapBoundaryPointToTextNode(range.endContainer, range.endOffset);
@@ -81,6 +92,7 @@ export function normalizeRange(range: Range): TextRange {
     endOffset = endContainer.length;
   }
 
+  // Set the range’s end; note this might move its start too.
   range.setEnd(endContainer, endOffset);
 
   return range as TextRange;


[incubator-annotator] 15/19: Refactor clip range to scope

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit a174e4cfd3baa759e358927df7597e3c92c7358d
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 12:09:15 2020 +0100

    Refactor clip range to scope
---
 packages/dom/src/chunker.ts                | 9 +++++++++
 packages/dom/src/text-position/describe.ts | 7 -------
 packages/dom/src/text-quote/describe.ts    | 7 -------
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index bb71857..8c28f78 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -114,6 +114,15 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
   }
 
   rangeToChunkRange(range: Range): ChunkRange<PartialTextNode> {
+    range = range.cloneRange();
+
+    // Take the part of the range that falls within the scope.
+    if (range.compareBoundaryPoints(Range.START_TO_START, this.scope) === -1)
+      range.setStart(this.scope.startContainer, this.scope.startOffset);
+    if (range.compareBoundaryPoints(Range.END_TO_END, this.scope) === 1)
+      range.setEnd(this.scope.endContainer, this.scope.endOffset);
+
+    // Ensure it starts and ends at text nodes.
     const textRange = normalizeRange(range, this.scope);
 
     const startChunk = this.nodeToChunk(textRange.startContainer);
diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts
index a711410..d4099a9 100644
--- a/packages/dom/src/text-position/describe.ts
+++ b/packages/dom/src/text-position/describe.ts
@@ -38,13 +38,6 @@ export async function describeTextPosition(
     scope.selectNodeContents(document);
   }
 
-  // Take the part of the range that falls within the scope.
-  range = range.cloneRange();
-  if (range.compareBoundaryPoints(Range.START_TO_START, scope) === -1)
-    range.setStart(scope.startContainer, scope.startOffset);
-  if (range.compareBoundaryPoints(Range.END_TO_END, scope) === 1)
-    range.setEnd(scope.endContainer, scope.endOffset);
-
   const textChunks = new TextNodeChunker(scope);
   if (textChunks.currentChunk === null)
     throw new RangeError('Range does not contain any Text nodes.');
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 2e4693e..688089f 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -38,13 +38,6 @@ export async function describeTextQuote(
     scope.selectNodeContents(document);
   }
 
-  // Take the part of the range that falls within the scope.
-  range = range.cloneRange();
-  if (range.compareBoundaryPoints(Range.START_TO_START, scope) === -1)
-    range.setStart(scope.startContainer, scope.startOffset);
-  if (range.compareBoundaryPoints(Range.END_TO_END, scope) === 1)
-    range.setEnd(scope.endContainer, scope.endOffset);
-
   const chunker = new TextNodeChunker(scope);
 
   return await abstractDescribeTextQuote(


[incubator-annotator] 17/19: Compare *extra* pre/suffix lengths (ignore sunk costs)

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 9ccb88c4a198c41104bf3839146eff8bdf55fd6d
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 13:02:16 2020 +0100

    Compare *extra* pre/suffix lengths (ignore sunk costs)
---
 packages/dom/src/text-quote/describe.ts | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index ae79ad0..81cc4fa 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -95,20 +95,17 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
     seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
     seeker2.seekToChunk(unintendedMatch.startChunk, unintendedMatch.startIndex - prefix.length);
     const extraPrefix = readUntilDifferent(seeker1, seeker2, true);
-    let sufficientPrefix = extraPrefix !== undefined ? extraPrefix + prefix : undefined;
 
     // Count how many characters we’d need as a suffix to disqualify this match.
     seeker1.seekToChunk(target.endChunk, target.endIndex + suffix.length);
     seeker2.seekToChunk(unintendedMatch.endChunk, unintendedMatch.endIndex + suffix.length);
     const extraSuffix = readUntilDifferent(seeker1, seeker2, false);
-    let sufficientSuffix = extraSuffix !== undefined ? suffix + extraSuffix : undefined;
 
     // Use either the prefix or suffix, whichever is shortest.
-    if (sufficientPrefix !== undefined && (sufficientSuffix === undefined || sufficientPrefix.length <= sufficientSuffix.length)) {
-      prefix = sufficientPrefix;
-      // seeker.seekBy(sufficientPrefix.length - prefix.length) // Would be required if we’d skip the processed part.
-    } else if (sufficientSuffix !== undefined) {
-      suffix = sufficientSuffix;
+    if (extraPrefix !== undefined && (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length)) {
+      prefix = extraPrefix + prefix;
+    } else if (extraSuffix !== undefined) {
+      suffix = suffix + extraSuffix;
     } else {
       throw new Error('Target cannot be disambiguated; how could that have happened‽');
     }


[incubator-annotator] 16/19: Refactor pre/suffix disambiguation

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 8b7d302405df3095dbcb05cfec6adb791e0bf1e1
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 12:59:29 2020 +0100

    Refactor pre/suffix disambiguation
---
 packages/dom/src/text-quote/describe.ts | 79 ++++++++++++++-------------------
 1 file changed, 34 insertions(+), 45 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 688089f..ae79ad0 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -22,7 +22,7 @@ import type { TextQuoteSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
 import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
 import { abstractTextQuoteSelectorMatcher } from '.';
-import { TextSeeker } from '../seek';
+import { TextSeeker, Seeker } from '../seek';
 
 export async function describeTextQuote(
   range: Range,
@@ -94,54 +94,14 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
     // Count how many characters we’d need as a prefix to disqualify this match.
     seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
     seeker2.seekToChunk(unintendedMatch.startChunk, unintendedMatch.startIndex - prefix.length);
-    let sufficientPrefix: string | undefined = prefix;
-    while (true) {
-      let previousCharacter: string;
-      try {
-        previousCharacter = seeker1.read(-1);
-      } catch (err) {
-        sufficientPrefix = undefined; // Start of text reached.
-        break;
-      }
-      sufficientPrefix = previousCharacter + sufficientPrefix;
-
-      // Break if the newly added character makes the prefix unambiguous.
-      try {
-        const unintendedMatchPreviousCharacter = seeker2.read(-1);
-        if (previousCharacter !== unintendedMatchPreviousCharacter) break;
-      } catch (err) {
-        if (err instanceof RangeError)
-          break;
-        else
-          throw err;
-      }
-    }
+    const extraPrefix = readUntilDifferent(seeker1, seeker2, true);
+    let sufficientPrefix = extraPrefix !== undefined ? extraPrefix + prefix : undefined;
 
     // Count how many characters we’d need as a suffix to disqualify this match.
     seeker1.seekToChunk(target.endChunk, target.endIndex + suffix.length);
     seeker2.seekToChunk(unintendedMatch.endChunk, unintendedMatch.endIndex + suffix.length);
-    let sufficientSuffix: string | undefined = suffix;
-    while (true) {
-      let nextCharacter: string;
-      try {
-        nextCharacter = seeker1.read(1);
-      } catch (err) {
-        sufficientSuffix = undefined; // End of text reached.
-        break;
-      }
-      sufficientSuffix += nextCharacter;
-
-      // Break if the newly added character makes the suffix unambiguous.
-      try {
-        const unintendedMatchNextCharacter = seeker2.read(1);
-        if (nextCharacter !== unintendedMatchNextCharacter) break;
-      } catch (err) {
-        if (err instanceof RangeError)
-          break;
-        else
-          throw err;
-      }
-    }
+    const extraSuffix = readUntilDifferent(seeker1, seeker2, false);
+    let sufficientSuffix = extraSuffix !== undefined ? suffix + extraSuffix : undefined;
 
     // Use either the prefix or suffix, whichever is shortest.
     if (sufficientPrefix !== undefined && (sufficientSuffix === undefined || sufficientPrefix.length <= sufficientSuffix.length)) {
@@ -154,3 +114,32 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
     }
   }
 }
+
+function readUntilDifferent(
+  seeker1: Seeker,
+  seeker2: Seeker,
+  reverse: boolean,
+): string | undefined {
+  let result = '';
+  while (true) {
+    let nextCharacter: string;
+    try {
+      nextCharacter = seeker1.read(reverse ? -1 : 1);
+    } catch (err) {
+      return undefined; // Start/end of text reached: cannot expand result.
+    }
+    result = reverse
+      ? nextCharacter + result
+      : result + nextCharacter;
+
+    // Check if the newly added character makes the result differ from the second seeker.
+    let comparisonCharacter: string | undefined;
+    try {
+      comparisonCharacter = seeker2.read(reverse ? -1 : 1);
+    } catch (err) { // A RangeError would merely mean seeker2 is exhausted.
+      if (!(err instanceof RangeError)) throw err;
+    }
+    if (nextCharacter !== comparisonCharacter)
+      return result;
+  }
+}


[incubator-annotator] 05/19: Factor out chunkRangeToRange

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 610ba79c9aa3e91e4217ef7e8a56bfba19da34de
Author: Gerben <ge...@treora.com>
AuthorDate: Mon Nov 16 23:13:59 2020 +0100

    Factor out chunkRangeToRange
---
 packages/dom/src/chunker.ts             | 15 +++++++++++++++
 packages/dom/src/text-position/match.ts |  6 +-----
 packages/dom/src/text-quote/match.ts    |  9 +--------
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index 78b8646..f671b5e 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -116,6 +116,21 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
     return { startChunk, startIndex, endChunk, endIndex };
   }
 
+  chunkRangeToRange(chunkRange: ChunkRange<PartialTextNode>): Range {
+    const range = ownerDocument(this.scope).createRange();
+    // The `+…startOffset` parts are only relevant for the first chunk, as it
+    // might start within a text node.
+    range.setStart(
+      chunkRange.startChunk.node,
+      chunkRange.startIndex + chunkRange.startChunk.startOffset,
+    );
+    range.setEnd(
+      chunkRange.endChunk.node,
+      chunkRange.endIndex + chunkRange.endChunk.startOffset,
+    );
+    return range;
+  }
+
   constructor(private scope: Range) {
     this.iter = ownerDocument(scope).createNodeIterator(
       scope.commonAncestorContainer,
diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index 985e278..aa5fe49 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -19,7 +19,6 @@
  */
 
 import type { Matcher, TextPositionSelector } from '@annotator/selector';
-import { ownerDocument } from '../owner-document';
 import { TextSeeker, NonEmptyChunker } from '../seek';
 import { CodePointSeeker } from '../code-point-seeker';
 import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
@@ -39,10 +38,7 @@ export function createTextPositionSelectorMatcher(
     const matches = abstractMatcher(textChunks as NonEmptyChunker<PartialTextNode>);
 
     for await (const abstractMatch of matches) {
-      const match = ownerDocument(scope).createRange();
-      match.setStart(abstractMatch.startChunk.node, abstractMatch.startChunk.startOffset + abstractMatch.startIndex);
-      match.setEnd(abstractMatch.endChunk.node, abstractMatch.endChunk.startOffset + abstractMatch.endIndex);
-      yield match;
+      yield textChunks.chunkRangeToRange(abstractMatch);
     }
   };
 }
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index f7c8da3..37a75ba 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -32,14 +32,7 @@ export function createTextQuoteSelectorMatcher(
     const textChunks = new TextNodeChunker(scope);
 
     for await (const abstractMatch of abstractMatcher(textChunks)) {
-      const match = document.createRange();
-      // The `+…startOffset` parts are only relevant for the first chunk, as it
-      // might start within a text node.
-      match.setStart(abstractMatch.startChunk.node,
-        abstractMatch.startIndex + abstractMatch.startChunk.startOffset);
-      match.setEnd(abstractMatch.endChunk.node,
-        abstractMatch.endIndex + abstractMatch.endChunk.startOffset);
-      yield match;
+      yield textChunks.chunkRangeToRange(abstractMatch);
     }
   }
 }


[incubator-annotator] 10/19: Make demo more challenging.

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 953f0d47737e3f5b6ff1e5c735adbb9f344ae13f
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Nov 19 16:46:24 2020 +0100

    Make demo more challenging.
---
 web/demo/index.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/web/demo/index.html b/web/demo/index.html
index 3ed0961..6c86b4b 100644
--- a/web/demo/index.html
+++ b/web/demo/index.html
@@ -67,7 +67,7 @@ under the License.
     <div class="columns full-width">
       <div class="column">
         <h2>Select text here</h2>
-        <p id="source">Hello, annotated world! To annotate, or not to annotate, that is the question.</p>
+        <p id="source" contenteditable>Hello, <em>annotated world!</em> 🙂 <b>To annotate, or <em>not</em> to annotate</b>, that is the question.</p>
         <p>Try selecting some text in this paragraph above.
           Upon a change of selection, a
           <a rel="external" href="https://www.w3.org/TR/2017/REC-annotation-model-20170223/#text-quote-selector" target="_blank">TextQuoteSelector</a>
@@ -84,7 +84,7 @@ under the License.
       </div>
       <div class="column">
         <h2>Text is found here</h2>
-        <p id="target" contenteditable>Hello, annotated world! To annotate, or not to annotate, that is the question.</p>
+        <p id="target" contenteditable><em>Hello, annotated</em> world! 🙂 To annotate, or not to annotate, <b><em>that</em> is the question.</b></p>
         <p>The selector is ‘anchored’ here: the segment it describes is found and highlighted.</p>
       </div>
       <div class="column" style="min-width: 20em;">


[incubator-annotator] 08/19: Create basic tests for describe text position

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 114b98cf0e2082d90629acc775e21f2023dc7e17
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 18 19:40:18 2020 +0100

    Create basic tests for describe text position
---
 packages/dom/test/text-position/describe.test.ts | 55 ++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/packages/dom/test/text-position/describe.test.ts b/packages/dom/test/text-position/describe.test.ts
new file mode 100644
index 0000000..2eefd38
--- /dev/null
+++ b/packages/dom/test/text-position/describe.test.ts
@@ -0,0 +1,55 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { assert } from 'chai';
+import { describeTextPosition } from '../../src/text-position/describe';
+import { hydrateRange } from '../utils';
+import { testCases } from './match-cases';
+
+const domParser = new window.DOMParser();
+
+describe('createTextPositionSelectorMatcher', () => {
+  describe('inverts test cases of text position matcher', () => {
+    for (const [name, { html, selector, expected }] of Object.entries(
+      testCases,
+    )) {
+      const range = expected[0];
+      it(`case: '${name}'`, async () => {
+        const doc = domParser.parseFromString(html, 'text/html');
+        const scope = doc.createRange();
+        scope.selectNodeContents(doc);
+        const result = await describeTextPosition(hydrateRange(range, doc), scope);
+        assert.deepEqual(result, selector);
+      });
+    }
+  });
+
+  it('works with a scope', () => {
+    // TODO
+  });
+
+  it('works with split text nodes', () => {
+    // TODO
+  });
+
+  it('works with code points split across text nodes', () => {
+    // TODO
+  });
+});


[incubator-annotator] 06/19: Handle half-characters in CodePointSeeker.seekToChunk

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 9c1ef9a738c5a3b2113c0973557047dfcdffdd9d
Author: Gerben <ge...@treora.com>
AuthorDate: Wed Nov 18 18:55:18 2020 +0100

    Handle half-characters in CodePointSeeker.seekToChunk
---
 packages/dom/src/code-point-seeker.ts | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/packages/dom/src/code-point-seeker.ts b/packages/dom/src/code-point-seeker.ts
index b0a95cd..10cb1b1 100644
--- a/packages/dom/src/code-point-seeker.ts
+++ b/packages/dom/src/code-point-seeker.ts
@@ -64,8 +64,21 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke
     const oldPosition = this.position;
     const oldRawPosition = this.raw.position;
 
-    let result = [...this.raw.readToChunk(target, 0)];
-    this.position = this.raw.position >= oldRawPosition
+    let s = this.raw.readToChunk(target, 0);
+
+    const movedForward = this.raw.position >= oldRawPosition;
+
+    if (movedForward && endsWithinCharacter(s)) {
+      this.raw.seekBy(-1);
+      s = s.slice(0, -1);
+    } else if (!movedForward && startsWithinCharacter(s)) {
+      this.raw.seekBy(1);
+      s = s.slice(1);
+    }
+
+    let result = [...s];
+
+    this.position = movedForward
       ? this.position + result.length
       : this.position - result.length;
 
@@ -86,8 +99,8 @@ export class CodePointSeeker<TChunk extends Chunk<string>> implements ChunkSeeke
         this.seekTo(oldPosition);
         result = this.readTo(targetPosition);
       }
+      return result;
     }
-    return result;
   }
 
   private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string[];


[incubator-annotator] 14/19: This is what do–while was invented for :)

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit f37e397174028751fc002f7a26311604bcef95d9
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 12:03:45 2020 +0100

    This is what do–while was invented for :)
---
 packages/dom/src/text-quote/match.ts | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index dea1f68..dd69227 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -66,9 +66,9 @@ export function abstractTextQuoteSelectorMatcher(
     }
     let partialMatches: PartialMatch[] = [];
 
-    let chunk: TChunk | null;
     let isFirstChunk = true;
-    while (chunk = textChunks.currentChunk) {
+    do {
+      const chunk = textChunks.currentChunk;
       const chunkValue = chunk.data;
 
       // 1. Continue checking any partial matches from the previous chunk(s).
@@ -158,10 +158,7 @@ export function abstractTextQuoteSelectorMatcher(
         partialMatches.push(partialMatch);
       }
 
-      if (textChunks.nextChunk() === null)
-        break;
-
       isFirstChunk = false;
-    }
+    } while (textChunks.nextChunk() !== null);
   };
 }


[incubator-annotator] 13/19: Require all Chunkers to be non-empty

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 2da2f5046722ea24d61a8d1a2fc4b04b96b21c5d
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 11:55:57 2020 +0100

    Require all Chunkers to be non-empty
---
 packages/dom/src/chunker.ts                | 27 ++++++++++++++++++++-------
 packages/dom/src/seek.ts                   |  6 +-----
 packages/dom/src/text-position/describe.ts |  8 ++++----
 packages/dom/src/text-position/match.ts    | 12 +++++-------
 packages/dom/src/text-quote/describe.ts    |  8 ++++----
 packages/dom/src/text-quote/match.ts       | 12 ++++++++++--
 6 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/packages/dom/src/chunker.ts b/packages/dom/src/chunker.ts
index f671b5e..bb71857 100644
--- a/packages/dom/src/chunker.ts
+++ b/packages/dom/src/chunker.ts
@@ -53,8 +53,8 @@ export function chunkRangeEquals(range1: ChunkRange<any>, range2: ChunkRange<any
 // It is inspired by, and similar to, the DOM’s NodeIterator. (but unlike
 // NodeIterator, it has no concept of being ‘before’ or ‘after’ a chunk)
 export interface Chunker<TChunk extends Chunk<any>> {
-  // currentChunk is null only if it contains no chunks at all.
-  readonly currentChunk: TChunk | null;
+  // The chunk currently being pointed at.
+  readonly currentChunk: TChunk;
 
   // Move currentChunk to the chunk following it, and return that chunk.
   // If there are no chunks following it, keep currentChunk unchanged and return null.
@@ -74,14 +74,22 @@ export interface PartialTextNode extends Chunk<string> {
   readonly endOffset: number;
 }
 
+export class EmptyScopeError extends TypeError {
+  constructor(message?: string) {
+    super(message || 'Scope contains no text nodes.');
+  }
+}
+
 export class TextNodeChunker implements Chunker<PartialTextNode> {
 
   private iter: NodeIterator;
 
   get currentChunk() {
     const node = this.iter.referenceNode;
-    if (!isText(node))
-      return null;
+
+    // This test should not actually be needed, but it keeps TypeScript happy.
+    if (!isText(node)) throw new EmptyScopeError();
+
     return this.nodeToChunk(node);
   }
 
@@ -131,6 +139,9 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
     return range;
   }
 
+  /**
+   * @param scope A Range that overlaps with at least one text node.
+   */
   constructor(private scope: Range) {
     this.iter = ownerDocument(scope).createNodeIterator(
       scope.commonAncestorContainer,
@@ -146,9 +157,11 @@ export class TextNodeChunker implements Chunker<PartialTextNode> {
 
     // Move the iterator to after the start (= root) node.
     this.iter.nextNode();
-    // If the start node is not a text node, move it to the first text node (if any).
-    if (!isText(this.iter.referenceNode))
-      this.iter.nextNode();
+    // If the start node is not a text node, move it to the first text node.
+    if (!isText(this.iter.referenceNode)) {
+      const nextNode = this.iter.nextNode();
+      if (nextNode === null) throw new EmptyScopeError();
+    }
   }
 
   nextChunk() {
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 3832b07..75627e9 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -22,10 +22,6 @@ import { Chunk, Chunker, chunkEquals } from "./chunker";
 
 const E_END = 'Iterator exhausted before seek ended.';
 
-export interface NonEmptyChunker<TChunk extends Chunk<any>> extends Chunker<TChunk> {
-  readonly currentChunk: TChunk;
-}
-
 export interface Seeker<T extends Iterable<any> = string> {
   readonly position: number;
   read(length?: number, roundUp?: boolean): T;
@@ -56,7 +52,7 @@ export class TextSeeker<TChunk extends Chunk<string>> implements ChunkSeeker<TCh
   // The current text position (measured in code units)
   get position() { return this.currentChunkPosition + this.offsetInChunk; }
 
-  constructor(protected chunker: NonEmptyChunker<TChunk>) {
+  constructor(protected chunker: Chunker<TChunk>) {
     // Walk to the start of the first non-empty chunk inside the scope.
     this.seekTo(0);
   }
diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts
index 8baff8d..a711410 100644
--- a/packages/dom/src/text-position/describe.ts
+++ b/packages/dom/src/text-position/describe.ts
@@ -20,9 +20,9 @@
 
 import type { TextPositionSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
-import { Chunk, Chunker, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker } from '../chunker';
 import { CodePointSeeker } from '../code-point-seeker';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 
 export async function describeTextPosition(
   range: Range,
@@ -51,13 +51,13 @@ export async function describeTextPosition(
 
   return await abstractDescribeTextPosition(
     textChunks.rangeToChunkRange(range),
-    textChunks as NonEmptyChunker<PartialTextNode>,
+    textChunks,
   );
 }
 
 async function abstractDescribeTextPosition<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
-  scope: NonEmptyChunker<TChunk>,
+  scope: Chunker<TChunk>,
 ): Promise<TextPositionSelector> {
   const codeUnitSeeker = new TextSeeker(scope);
   const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
diff --git a/packages/dom/src/text-position/match.ts b/packages/dom/src/text-position/match.ts
index 53bfae3..cc8044e 100644
--- a/packages/dom/src/text-position/match.ts
+++ b/packages/dom/src/text-position/match.ts
@@ -19,9 +19,9 @@
  */
 
 import type { Matcher, TextPositionSelector } from '@annotator/selector';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 import { CodePointSeeker } from '../code-point-seeker';
-import { Chunk, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { Chunk, ChunkRange, TextNodeChunker, Chunker } from '../chunker';
 
 export function createTextPositionSelectorMatcher(
   selector: TextPositionSelector,
@@ -31,9 +31,7 @@ export function createTextPositionSelectorMatcher(
   return async function* matchAll(scope) {
     const textChunks = new TextNodeChunker(scope);
 
-    if (textChunks.currentChunk === null)
-      throw new RangeError('Range does not contain any Text nodes.');
-    const matches = abstractMatcher(textChunks as NonEmptyChunker<PartialTextNode>);
+    const matches = abstractMatcher(textChunks);
 
     for await (const abstractMatch of matches) {
       yield textChunks.chunkRangeToRange(abstractMatch);
@@ -43,10 +41,10 @@ export function createTextPositionSelectorMatcher(
 
 export function abstractTextPositionSelectorMatcher(
   selector: TextPositionSelector,
-): <TChunk extends Chunk<any>>(scope: NonEmptyChunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
+): <TChunk extends Chunk<any>>(scope: Chunker<TChunk>) => AsyncGenerator<ChunkRange<TChunk>, void, void> {
   const { start, end } = selector;
 
-  return async function* matchAll<TChunk extends Chunk<string>>(textChunks: NonEmptyChunker<TChunk>) {
+  return async function* matchAll<TChunk extends Chunk<string>>(textChunks: Chunker<TChunk>) {
     const codeUnitSeeker = new TextSeeker(textChunks);
     const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
 
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 4f0c70f..2e4693e 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -22,7 +22,7 @@ import type { TextQuoteSelector } from '@annotator/selector';
 import { ownerDocument } from '../owner-document';
 import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
 import { abstractTextQuoteSelectorMatcher } from '.';
-import { TextSeeker, NonEmptyChunker } from '../seek';
+import { TextSeeker } from '../seek';
 
 export async function describeTextQuote(
   range: Range,
@@ -57,7 +57,7 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
   scope: () => Chunker<TChunk>,
 ): Promise<TextQuoteSelector> {
-  const seeker = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+  const seeker = new TextSeeker(scope());
 
   // Read the target’s exact text.
   seeker.seekToChunk(target.startChunk, target.startIndex);
@@ -95,8 +95,8 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
 
     // We’ll have to add more prefix/suffix to disqualify this unintended match.
     const unintendedMatch = nextMatch.value;
-    const seeker1 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
-    const seeker2 = new TextSeeker(scope() as NonEmptyChunker<TChunk>);
+    const seeker1 = new TextSeeker(scope());
+    const seeker2 = new TextSeeker(scope());
 
     // Count how many characters we’d need as a prefix to disqualify this match.
     seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index 38e09d5..dea1f68 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -19,7 +19,7 @@
  */
 
 import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import { TextNodeChunker, Chunk, Chunker, ChunkRange, PartialTextNode } from '../chunker';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, EmptyScopeError } from '../chunker';
 
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
@@ -27,7 +27,15 @@ export function createTextQuoteSelectorMatcher(
   const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
 
   return async function* matchAll(scope) {
-    const textChunks = new TextNodeChunker(scope);
+    let textChunks;
+    try {
+      textChunks = new TextNodeChunker(scope);
+    } catch (err) {
+      if (err instanceof EmptyScopeError)
+        return; // An empty range contains no matches.
+      else
+        throw err;
+    }
 
     for await (const abstractMatch of abstractMatcher(textChunks)) {
       yield textChunks.chunkRangeToRange(abstractMatch);


[incubator-annotator] 04/19: Implement describe text position

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 5706821a20472b934fd7a524e2298bd7cb346b9c
Author: Gerben <ge...@treora.com>
AuthorDate: Mon Nov 16 23:01:25 2020 +0100

    Implement describe text position
---
 packages/dom/src/text-position/describe.ts | 74 ++++++++++++++++++++++++++++++
 packages/dom/src/text-position/index.ts    |  1 +
 2 files changed, 75 insertions(+)

diff --git a/packages/dom/src/text-position/describe.ts b/packages/dom/src/text-position/describe.ts
new file mode 100644
index 0000000..8baff8d
--- /dev/null
+++ b/packages/dom/src/text-position/describe.ts
@@ -0,0 +1,74 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import type { TextPositionSelector } from '@annotator/selector';
+import { ownerDocument } from '../owner-document';
+import { Chunk, Chunker, ChunkRange, TextNodeChunker, PartialTextNode } from '../chunker';
+import { CodePointSeeker } from '../code-point-seeker';
+import { TextSeeker, NonEmptyChunker } from '../seek';
+
+export async function describeTextPosition(
+  range: Range,
+  maybeScope?: Range,
+): Promise<TextPositionSelector> {
+  // Default to search in the whole document.
+  let scope: Range;
+  if (maybeScope !== undefined) {
+    scope = maybeScope;
+  } else {
+    const document = ownerDocument(range);
+    scope = document.createRange();
+    scope.selectNodeContents(document);
+  }
+
+  // Take the part of the range that falls within the scope.
+  range = range.cloneRange();
+  if (range.compareBoundaryPoints(Range.START_TO_START, scope) === -1)
+    range.setStart(scope.startContainer, scope.startOffset);
+  if (range.compareBoundaryPoints(Range.END_TO_END, scope) === 1)
+    range.setEnd(scope.endContainer, scope.endOffset);
+
+  const textChunks = new TextNodeChunker(scope);
+  if (textChunks.currentChunk === null)
+    throw new RangeError('Range does not contain any Text nodes.');
+
+  return await abstractDescribeTextPosition(
+    textChunks.rangeToChunkRange(range),
+    textChunks as NonEmptyChunker<PartialTextNode>,
+  );
+}
+
+async function abstractDescribeTextPosition<TChunk extends Chunk<string>>(
+  target: ChunkRange<TChunk>,
+  scope: NonEmptyChunker<TChunk>,
+): Promise<TextPositionSelector> {
+  const codeUnitSeeker = new TextSeeker(scope);
+  const codePointSeeker = new CodePointSeeker(codeUnitSeeker);
+
+  codePointSeeker.seekToChunk(target.startChunk, target.startIndex);
+  const start = codePointSeeker.position;
+  codePointSeeker.seekToChunk(target.endChunk, target.endIndex);
+  const end = codePointSeeker.position;
+  return {
+    type: 'TextPositionSelector',
+    start,
+    end,
+  };
+}
diff --git a/packages/dom/src/text-position/index.ts b/packages/dom/src/text-position/index.ts
index 011e994..bb73732 100644
--- a/packages/dom/src/text-position/index.ts
+++ b/packages/dom/src/text-position/index.ts
@@ -18,4 +18,5 @@
  * under the License.
  */
 
+export * from './describe';
 export * from './match';