You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/10/15 22:36:41 UTC
[incubator-annotator] 01/04: WIP
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit ff0bf4536364e2aa92e3c92dc9b3d175ffbbef75
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Oct 15 22:30:51 2020 +0200
WIP
---
packages/dom/src/seek.ts | 123 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 108 insertions(+), 15 deletions(-)
diff --git a/packages/dom/src/seek.ts b/packages/dom/src/seek.ts
index 2d0efdc..78102fa 100644
--- a/packages/dom/src/seek.ts
+++ b/packages/dom/src/seek.ts
@@ -2,7 +2,28 @@ import { ownerDocument } from "./owner-document";
const E_END = 'Iterator exhausted before seek ended.';
-export class Seeker {
+interface BoundaryPointer {
+ readonly referenceNode: Node;
+ readonly offsetInReferenceNode: number;
+}
+
+interface TextBoundaryPointer extends BoundaryPointer{
+ readonly referenceNode: Text;
+ readonly offsetInReferenceNode: number;
+}
+
+interface Chunker {
+ read1(): string;
+}
+
+interface Seeker extends Chunker {
+ readonly position: number;
+ read(length: number): string;
+ seekBy(length: number): void;
+ seekTo(target: number): void;
+}
+
+export class Seeker_ implements Seeker, TextBoundaryPointer {
// The node containing our current text position.
get referenceNode(): Text {
// The NodeFilter will guarantee this is a Text node (except before the
@@ -14,12 +35,12 @@ export class Seeker {
offsetInReferenceNode = 0;
// The index of the first character of iter.referenceNode inside the text.
- // get referenceNodeIndex() { return this.position - this.offsetInReferenceNode; }
- referenceNodeIndex = 0;
+ // get referenceNodePosition() { return this.position - this.offsetInReferenceNode; }
+ private referenceNodePosition = 0;
// The current text position, i.e. the number of code units passed so far.
// position = 0;
- get position() { return this.referenceNodeIndex + this.offsetInReferenceNode; }
+ get position() { return this.referenceNodePosition + this.offsetInReferenceNode; }
// // The number of code points passed so far.
// codePointCount = 0;
@@ -48,7 +69,7 @@ export class Seeker {
if (isText(scope.startContainer)) {
// The scope starts inside the text node. Adjust our index accordingly.
- this.referenceNodeIndex = -scope.startOffset;
+ this.referenceNodePosition = -scope.startOffset;
this.offsetInReferenceNode = scope.startOffset;
}
// TODO Handle the scope.endOffset as well, and fix behaviour in edge cases
@@ -61,34 +82,53 @@ export class Seeker {
// seekCodePoints(count: number) {
// }
- seekBy(count: number) {
- return this.seekTo(this.position + count);
+ read(length: number) {
+ return this._readOrSeekTo(true, this.position + length);
+ }
+
+ read1() {
+ return this._readOrSeekTo(true, this.position + 1, true);
+ }
+
+ seekBy(length: number) {
+ this.seekTo(this.position + length);
}
seekTo(target: number) {
+ this._readOrSeekTo(false, target);
+ }
+
+ private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string
+ private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void
+ private _readOrSeekTo(read: boolean, target: number, roundUp: boolean = false): string | void {
+ let result = '';
+
// Move the iterator to after the current node, so nextNode() would cause a jump.
if (this.iter.pointerBeforeReferenceNode)
this.iter.nextNode();
while (this.position <= target) {
- if (target < this.referenceNodeIndex + this.referenceNode.length) {
+ if (!roundUp && target < this.referenceNodePosition + this.referenceNode.length) {
// The target is before the end of the current node.
// (we use < not ≤: if the target is *at* the end of the node, possibly
// because the current node is empty, we prefer to take the next node)
- this.offsetInReferenceNode = target - this.referenceNodeIndex;
+ const oldOffset = this.offsetInReferenceNode;
+ this.offsetInReferenceNode = target - this.referenceNodePosition;
+ if (read) result += this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode);
// if (this.countCodePoints)
// this.codePointCount += [...this.referenceNode.data.substring(oldOffset, this.offsetInReferenceNode)].length;
break;
}
// Move to the start of the next node, while counting the characters of the current one.
+ if (read) result += this.referenceNode.data.substring(this.offsetInReferenceNode);
const curNode = this.referenceNode;
const nextNode = this.iter.nextNode();
if (nextNode !== null) {
- this.referenceNodeIndex += curNode.length;
+ this.referenceNodePosition += curNode.length;
this.offsetInReferenceNode = 0;
// if (this.countCodePoints)
- // this.codePointCount += [...curNode.data].length;
+ // this.codePointCount += [...curNode.data.substring(curOffset)].length;
} else {
// There is no next node. Finish at the end of the last node.
this.offsetInReferenceNode = this.referenceNode.length;
@@ -102,13 +142,15 @@ export class Seeker {
}
}
- // Move to the start of the current node.
+ if (read) return result;
+
+ // Move to the start of the current node to prepare for moving backwards.
if (!this.iter.pointerBeforeReferenceNode)
this.iter.previousNode();
while (this.position > target) {
- if (this.referenceNodeIndex <= target) {
- this.offsetInReferenceNode = target - this.referenceNodeIndex;
+ if (this.referenceNodePosition <= target) {
+ this.offsetInReferenceNode = target - this.referenceNodePosition;
// if (this.countCodePoints)
// this.codePointCount -= [...this.referenceNode.data.substring(this.offsetInReferenceNode, oldOffset)].length;
break;
@@ -118,7 +160,7 @@ export class Seeker {
// const curNode = this.referenceNode;
const prevNode = this.iter.previousNode();
if (prevNode !== null) {
- this.referenceNodeIndex -= this.referenceNode.length;
+ this.referenceNodePosition -= this.referenceNode.length;
this.offsetInReferenceNode = this.referenceNode.length;
// if (this.countCodePoints)
// this.codePointCount -= [...curNode.data].length;
@@ -134,3 +176,54 @@ export class Seeker {
function isText(node: Node): node is Text {
return node.nodeType === Node.TEXT_NODE;
}
+
+class CharSeeker implements Seeker, TextBoundaryPointer {
+ constructor(public readonly raw: Seeker & TextBoundaryPointer) {
+ }
+
+ position = 0;
+ get referenceNode() { return this.raw.referenceNode };
+ get offsetInReferenceNode() {
+ const substring = this.referenceNode.data.substring(0, this.raw.offsetInReferenceNode);
+ return [...substring].length;
+ };
+
+ seekBy(length: number) {
+ return this.seekTo(this.position + length);
+ }
+
+ seekTo(target: number) {
+ this._readOrSeekTo(target, false);
+ }
+
+ read(length: number) {
+ return this._readOrSeekTo(this.position + length, true);
+ }
+
+ read1() {
+ return this._read1().nextChunk;
+ }
+
+ private _read1() {
+ const nextChunk = this.raw.read1();
+ const characters = [...nextChunk];
+ this.position += characters.length;
+ return { nextChunk, characters };
+ }
+
+ private _readOrSeekTo(target: number, read: true): string;
+ private _readOrSeekTo(target: number, read: false): void;
+ private _readOrSeekTo(target: number, read: boolean): string | void {
+ let characters: string[] = [];
+ let result = '';
+ let nextChunk;
+ while (this.position < target) {
+ ({ nextChunk, characters } = this._read1());
+ if (read) result += nextChunk;
+ }
+ const overshootInCodePoints = this.position - target;
+ const overshootInCodeUnits = characters.slice(overshootInCodePoints).join('').length;
+ this.raw.seekBy(-overshootInCodeUnits);
+ if (read) return result;
+ }
+}