You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/10/15 22:36:41 UTC

[incubator-annotator] 01/04: WIP

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit ff0bf4536364e2aa92e3c92dc9b3d175ffbbef75
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Oct 15 22:30:51 2020 +0200

    WIP
---
 packages/dom/src/seek.ts | 123 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 108 insertions(+), 15 deletions(-)

diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 2d0efdc..78102fa 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -2,7 +2,28 @@ import { ownerDocument } from "./owner-document";
 
 const E_END = 'Iterator exhausted before seek ended.';
 
-export class Seeker {
+interface BoundaryPointer {
+  readonly referenceNode: Node;
+  readonly offsetInReferenceNode: number;
+}
+
+interface TextBoundaryPointer extends BoundaryPointer{
+  readonly referenceNode: Text;
+  readonly offsetInReferenceNode: number;
+}
+
+interface Chunker {
+  read1(): string;
+}
+
+interface Seeker extends Chunker {
+  readonly position: number;
+  read(length: number): string;
+  seekBy(length: number): void;
+  seekTo(target: number): void;
+}
+
+export class Seeker_ implements Seeker, TextBoundaryPointer {
   // The node containing our current text position.
   get referenceNode(): Text {
     // The NodeFilter will guarantee this is a Text node (except before the
@@ -14,12 +35,12 @@ export class Seeker {
   offsetInReferenceNode = 0;
 
   // The index of the first character of iter.referenceNode inside the text.
-  // get referenceNodeIndex() { return this.position - this.offsetInReferenceNode; }
-  referenceNodeIndex = 0;
+  // get referenceNodePosition() { return this.position - this.offsetInReferenceNode; }
+  private referenceNodePosition = 0;
 
   // The current text position, i.e. the number of code units passed so far.
   // position = 0;
-  get position() { return this.referenceNodeIndex + this.offsetInReferenceNode; }
+  get position() { return this.referenceNodePosition + this.offsetInReferenceNode; }
 
   // // The number of code points passed so far.
   // codePointCount = 0;
@@ -48,7 +69,7 @@ export class Seeker {
 
     if (isText(scope.startContainer)) {
       // The scope starts inside the text node. Adjust our index accordingly.
-      this.referenceNodeIndex = -scope.startOffset;
+      this.referenceNodePosition = -scope.startOffset;
       this.offsetInReferenceNode = scope.startOffset;
     }
     // TODO Handle the scope.endOffset as well, and fix behaviour in edge cases
@@ -61,34 +82,53 @@ export class Seeker {
   // seekCodePoints(count: number) {
   // }
 
-  seekBy(count: number) {
-    return this.seekTo(this.position + count);
+  read(length: number) {
+    return this._readOrSeekTo(true, this.position + length);
+  }
+
+  read1() {
+    return this._readOrSeekTo(true, this.position + 1, true);
+  }
+
+  seekBy(length: number) {
+    this.seekTo(this.position + length);
   }
 
   seekTo(target: number) {
+    this._readOrSeekTo(false, target);
+  }
+
+  private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string
+  private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void
+  private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string | void {
+    let result = '';
+
     // Move the iterator to after the current node, so nextNode() would cause a jump.
     if (this.iter.pointerBeforeReferenceNode)
       this.iter.nextNode();
 
     while (this.position <= target) {
-      if (target < this.referenceNodeIndex + this.referenceNode.length) {
+      if (!roundUp && target < this.referenceNodePosition + this.referenceNode.length) {
         // The target is before the end of the current node.
         // (we use < not ≤: if the target is *at* the end of the node, possibly
         // because the current node is empty, we prefer to take the next node)
-        this.offsetInReferenceNode = target - this.referenceNodeIndex;
+        const oldOffset = this.offsetInReferenceNode;
+        this.offsetInReferenceNode = target - this.referenceNodePosition;
+        if (read) result += this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode);
         // if (this.countCodePoints)
         //   this.codePointCount += [...this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode)].length;
         break;
       }
 
       // Move to the start of the next node, while counting the characters of the current one.
+      if (read) result += this.referenceNode.data.substring(this.offsetInReferenceNode);
       const curNode = this.referenceNode;
       const nextNode = this.iter.nextNode();
       if (nextNode !== null) {
-        this.referenceNodeIndex += curNode.length;
+        this.referenceNodePosition += curNode.length;
         this.offsetInReferenceNode = 0;
         // if (this.countCodePoints)
-        //   this.codePointCount += [...curNode.data].length;
+        //   this.codePointCount += [...curNode.data.substring(curOffset)].length;
       } else {
         // There is no next node. Finish at the end of the last node.
         this.offsetInReferenceNode = this.referenceNode.length;
@@ -102,13 +142,15 @@ export class Seeker {
       }
     }
 
-    // Move to the start of the current node.
+    if (read) return result;
+
+    // Move to the start of the current node to prepare for moving backwards.
     if (!this.iter.pointerBeforeReferenceNode)
       this.iter.previousNode();
 
     while (this.position > target) {
-      if (this.referenceNodeIndex <= target) {
-        this.offsetInReferenceNode = target - this.referenceNodeIndex;
+      if (this.referenceNodePosition <= target) {
+        this.offsetInReferenceNode = target - this.referenceNodePosition;
         // if (this.countCodePoints)
         //   this.codePointCount -= [...this.referenceNode.data.substring(this.offsetInReferenceNode, oldOffset)].length;
         break;
@@ -118,7 +160,7 @@ export class Seeker {
       // const curNode = this.referenceNode;
       const prevNode = this.iter.previousNode();
       if (prevNode !== null) {
-        this.referenceNodeIndex -= this.referenceNode.length;
+        this.referenceNodePosition -= this.referenceNode.length;
         this.offsetInReferenceNode = this.referenceNode.length;
         // if (this.countCodePoints)
         //   this.codePointCount -= [...curNode.data].length;
@@ -134,3 +176,54 @@ export class Seeker {
 function isText(node: Node): node is Text {
   return node.nodeType === Node.TEXT_NODE;
 }
+
+class CharSeeker implements Seeker, TextBoundaryPointer {
+  constructor(public readonly raw: Seeker & TextBoundaryPointer) {
+  }
+
+  position = 0;
+  get referenceNode() { return this.raw.referenceNode };
+  get offsetInReferenceNode() {
+    const substring = this.referenceNode.data.substring(0, this.raw.offsetInReferenceNode);
+    return [...substring].length;
+  };
+
+  seekBy(length: number) {
+    return this.seekTo(this.position + length);
+  }
+
+  seekTo(target: number) {
+    this._readOrSeekTo(target, false);
+  }
+
+  read(length: number) {
+    return this._readOrSeekTo(this.position + length, true);
+  }
+
+  read1() {
+    return this._read1().nextChunk;
+  }
+
+  private _read1() {
+    const nextChunk = this.raw.read1();
+    const characters = [...nextChunk];
+    this.position += characters.length;
+    return { nextChunk, characters };
+  }
+
+  private _readOrSeekTo(target: number, read: true): string;
+  private _readOrSeekTo(target: number, read: false): void;
+  private _readOrSeekTo(target: number, read: boolean): string | void {
+    let characters: string[] = [];
+    let result = '';
+    let nextChunk;
+    while (this.position < target) {
+      ({ nextChunk, characters } = this._read1());
+      if (read) result += nextChunk;
+    }
+    const overshootInCodePoints = this.position - target;
+    const overshootInCodeUnits = characters.slice(overshootInCodePoints).join('').length;
+    this.raw.seekBy(-overshootInCodeUnits);
+    if (read) return result;
+  }
+}