You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/09/15 20:58:34 UTC

[incubator-annotator] branch improve-range-stuff updated (2be85b6 -> 18c9eab)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


 discard 2be85b6  WIP reimplement range iteration
     add 8e24974  Get Babel module resolver working with TypeScript preset
     add 28821d6  Set up TypeScript linting with ESLint
     add 3aeb518  Do not lint the demo build output
     add e59fb15  Lint the webpack config
     add 230c0d2  Use names for inter-package references
     add 58a0421  Use a single tsconfig.json
     add 1320153  Remove JSON module resolution in TypeScript
     add d9dcd28  Fix remaining test typecheck failures
     add f348d4d  Fix lexical scope in switch case
     add 35cc009  Apply automatic lint fixes
     add cff896a  Remove excess space in package.json
     add 9642123  nitpicking
     add 6b38b35  Fix document->doc
     add 519b231  Move test utils.ts one folder up
     add d453e3a  Support highlighting empty ranges
     add aa5bd9e  Create tests for highlighter
     add d4d933c  Merge branch 'tweak-highlighter'
     add 6a614da  Lint the highlighter tests
     add 17fb8a5  Upgrade ESLint and Prettier related dependencies
     add 6a86565  Make babel-register a little terser
     add 9de88db  Add babel-register.js to ESLint files
     add defd047  Add tsconfig.json to .ratignore
     add 0053b2a  Add missing ASL headers
     add 1b31726  Apply automatic lint fixes
     add 3337df7  Use type-only exports where applicable
     add f4c41fc  Set @babel/typescript options
     add 5c3c9dc  Remove default exports
     add c00cf1c  Enable @typescript-eslint/explicit-function-return-type rule
     add 8eaef7c  Enable the downlevelIteration TypeScript compiler option
     add 3ea6e80  Upgrade mocha from v7 to v8
     add 994fc24  Upgrade typescript from v3.8 to v3.9
     add 43307f0  Enable esnext compilation for TypeScript
     add 3430aa5  Enable support for shipped ECMAScript proposals
     add d4e2c0f  Clean up unnecessary .js and .ts extension options
     add 70a6460  Fix swapped assertion result and expectation
     add ecca786  Run CI tests on Node.js 14
     add c98dea0  Switch to a solution-style TypeScript configuration
     add 9984acf  Upgrade Travis environment from xenial to bionic
     add 9f584cb  Simplify the mocha invocation by ignoring node_modules
     add bd010e0  Remove plain JavaScript from babel-register
     add e81082e  Target the current node version for running tests
     add 6935bfc  Make sure selector is built before dom
     add 5a75d7c  Remove dangling comma in a tsconfig.json file
     add db8b26e  Target only the latest ECMAScript standard
     add 7137b54  Enable minimal syntax polyfills in @babel/preset-env
     add 26df739  Do not lint generated declaration files
     add 1659c20  Include all packages as projects for Typescript ESLint
     add 6d03197  Sort .gitignore
     add 17e1d1a  Add tsconfig.base.json to .ratignore
     add a327076  Make .ratignore use the correct syntax
     add fbe4b94  Add TypeScript solution support to scripts
     add f9db3f2  Remove unnecessary /index in imports
     add 924ee09  Include the validation tests in the top-level solution
     add 32bba11  Remove baseUrl option in shared TypeScript configuration
     add f2246af  Updated supported Node.js versions
     add 06141b0  Add conditional exports
     add 7edcecc  Obssessive alphabetizing
     add ca3cd02  Add Node.js 13 to CI test matrix
     add 675517b  Deduplicate lockfile
     add 1ba5b4f  Commit post-deduplication lockfile changes
     add 21cb6da  Remove superfluous .js extension support in nyc
     add 710e235  Add type qualifier to type-only imports
     add acf90a5  Add extensions to import and require statements in build
     add bca65b9  Export all types from @annotator/selector
     add a2806d4  Use a unified output directory with .js or .mjs extensions
     add da48584  Add comment to highlighter about using normalize()
     add 500af96  Support empty ranges in highlighter & demo
     add 2cea3e3  Merge branch 'demo-empty-quote'
     add 90336e1  Output declarations to the lib directory
     add fd7c72c  Include all the tests in the root project
     add 05298c2  Raise the TypeScript target to es2018
     add 0121bd2  Turn off the @typescript-eslint/require-await lint rule
     add decfe8c  Restrict DOM scopes to be instances of Range
     add f4adabd  Reintroduce ownerDocument(), now in util.ts
     add b118253  Remove the no-relative-parent-imports eslint rule.
     add 983f005  Fix default scope in describeTextQuote
     add e5f3d46  Fix some references to global `document`
     add 14df40f  Delete unused types.ts
     add 1d8ca69  Rename util.ts→owner-document.ts
     add 267bbcb  Merge pull request #88 from apache/range-as-dom-scope
     add 9e36aaf  Fix forgotten scope→Range change
     add 934746a  Fix describeTextQuote’s clipping range to scope
     add 83e00f8  Fix spec mismatch: RangeSelector’s start is inclusive.
     add 871e51d  Support multi-selection in demo
     new 6a790ff  Update test names and tweak some tests.
     new 18c9eab  WIP Create chunk abstraction for text quote matching

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (2be85b6)
            \
             N -- N -- N   refs/heads/improve-range-stuff (18c9eab)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .eslintignore                                      |    7 +-
 .eslintrc.js                                       |   96 +-
 .gitignore                                         |    6 +-
 .mocharc.js                                        |    3 +
 .ratignore                                         |   31 +-
 .travis.yml                                        |    3 +
 @types/cartesian/index.d.ts                        |    3 -
 @types/dom-seek/index.d.ts                         |    3 -
 README.md                                          |    8 +-
 babel-register.js                                  |   22 +-
 babel.config.js                                    |   51 +-
 nyc.config.js                                      |    2 +-
 package.json                                       |   46 +-
 packages/dom/package.json                          |   14 +-
 packages/dom/src/css.ts                            |   23 +-
 packages/dom/src/highlight-range.ts                |   45 +-
 packages/dom/src/index.ts                          |    4 +-
 .mocharc.js => packages/dom/src/owner-document.ts  |    8 +-
 packages/dom/src/range/cartesian.ts                |   29 +-
 packages/dom/src/range/match.ts                    |   23 +-
 packages/dom/src/scope.ts                          |   41 -
 packages/dom/src/text-iterator.ts                  |   51 +-
 packages/dom/src/text-quote/describe.ts            |  128 +-
 packages/dom/src/text-quote/match.ts               |  107 +-
 packages/dom/src/types.ts                          |   25 -
 .../dom/src/types/cartesian.d.ts                   |    8 +-
 .../dom/src/types/dom-seek.d.ts                    |    9 +-
 .../test/highlight-range/highlight-range.test.ts   |  218 +++
 packages/dom/test/range/cartesian.test.ts          |    5 +-
 packages/dom/test/text-quote/describe-cases.ts     |   19 +-
 packages/dom/test/text-quote/describe.test.ts      |   48 +-
 packages/dom/test/text-quote/match-cases.ts        |   88 +-
 packages/dom/test/text-quote/match.test.ts         |  104 +-
 packages/dom/test/{text-quote => }/utils.ts        |   39 +-
 packages/dom/tsconfig.json                         |   11 +
 packages/selector/package.json                     |   11 +-
 packages/selector/src/index.ts                     |   15 +-
 packages/selector/src/types.ts                     |   22 +-
 packages/selector/tsconfig.json                    |    8 +
 test/data-model.test.ts                            |   46 +-
 tsconfig.base.json                                 |   19 +
 tsconfig.json                                      |   29 +-
 tsconfig.tests.json                                |   15 -
 web/demo/index.js                                  |   32 +-
 web/style.css                                      |    2 +-
 yarn.lock                                          | 1657 +++++++-------------
 46 files changed, 1574 insertions(+), 1610 deletions(-)
 delete mode 100644 @types/cartesian/index.d.ts
 delete mode 100644 @types/dom-seek/index.d.ts
 copy .mocharc.js => packages/dom/src/owner-document.ts (78%)
 delete mode 100644 packages/dom/src/scope.ts
 delete mode 100644 packages/dom/src/types.ts
 copy .mocharc.js => packages/dom/src/types/cartesian.d.ts (84%)
 copy .mocharc.js => packages/dom/src/types/dom-seek.d.ts (86%)
 create mode 100644 packages/dom/test/highlight-range/highlight-range.test.ts
 rename packages/dom/test/{text-quote => }/utils.ts (64%)
 create mode 100644 packages/dom/tsconfig.json
 create mode 100644 packages/selector/tsconfig.json
 create mode 100644 tsconfig.base.json
 delete mode 100644 tsconfig.tests.json


[incubator-annotator] 02/02: WIP Create chunk abstraction for text quote matching

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 18c9eabffd175364385c19902b11f07a630d375a
Author: Gerben <ge...@treora.com>
AuthorDate: Tue Sep 15 19:55:35 2020 +0200

    WIP Create chunk abstraction for text quote matching
---
 packages/dom/src/text-iterator.ts           |  79 ++++++++++++++++++++++
 packages/dom/src/text-quote/match.ts        | 101 ++++++++++++++--------------
 packages/dom/test/text-quote/match-cases.ts |   8 +--
 3 files changed, 134 insertions(+), 54 deletions(-)

diff --git a/packages/dom/src/text-iterator.ts b/packages/dom/src/text-iterator.ts
new file mode 100644
index 0000000..dfc1384
--- /dev/null
+++ b/packages/dom/src/text-iterator.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ownerDocument } from "./owner-document";
+
+export interface TextRange extends Range {
+  // We guarantee that to always have Text nodes as start and end containers.
+  readonly startContainer: Text;
+  readonly endContainer: Text;
+  cloneRange(): TextRange;
+
+  // Allow only Text nodes to be passed to these methods.
+  insertNode(node: Text): void;
+  selectNodeContents(node: Text): void;
+  setEnd(node: Text, offset: number): void;
+  setStart(node: Text, offset: number): void;
+
+  // Do not allow these methods to be used at all.
+  selectNode(node: never): void;
+  setEndAfter(node: never): void;
+  setEndBefore(node: never): void;
+  setStartAfter(node: never): void;
+  setStartBefore(node: never): void;
+  surroundContents(newParent: never): void;
+}
+
+export interface Chunk {
+  toString(): string;
+}
+
+// Yields ranges whose start and end nodes are both the *same* Text node.
+export async function* chunkRange(scope: Range): AsyncIterable<TextRange> {
+  const document = ownerDocument(scope);
+
+  const iter = document.createNodeIterator(
+    scope.commonAncestorContainer,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        // Only reveal nodes within the range; and skip any empty text nodes.
+        return scope.intersectsNode(node) && node.length > 0
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT;
+      },
+    },
+  );
+
+  let node: Text | null;
+  while (node = iter.nextNode() as (Text | null)) {
+    const range = document.createRange() as TextRange;
+    range.selectNodeContents(node);
+
+    if (node === scope.startContainer) {
+      range.setStart(node, scope.startOffset);
+    }
+    if (node === scope.endContainer) {
+      range.setEnd(node, scope.endOffset);
+    }
+
+    yield range;
+  }
+}
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index a4a216f..6b7fd93 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -18,71 +18,72 @@
  * under the License.
  */
 
-import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import seek from 'dom-seek';
+import type { TextQuoteSelector } from '@annotator/selector';
 
-import { ownerDocument } from '../owner-document';
+import { chunkRange, Chunk, TextRange } from '../text-iterator';
 
 export function createTextQuoteSelectorMatcher(
   selector: TextQuoteSelector,
-): Matcher<Range, Range> {
+): (scope: Range) => AsyncGenerator<TextRange, void, void> {
+  const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
   return async function* matchAll(scope) {
-    const document = ownerDocument(scope);
-    const scopeText = scope.toString();
+    // Turn the scope into a stream of ranges, each wrapping exactly one text node. We wrap it in
+    // a range such that the first and last text node can be partially included. Could be changed
+    // to e.g. be an object { node: Text, startOffset, endOffset }.
+    const textChunks = chunkRange(scope);
 
+    for await (const abstractMatch of abstractMatcher(textChunks)) {
+      const match = document.createRange() as TextRange;
+      // The `+…startOffset` part is only relevant for the first chunk, whose text node might be partially in scope.
+      match.setStart(abstractMatch.startChunk.startContainer,
+        abstractMatch.startIndex + abstractMatch.startChunk.startOffset);
+      match.setEnd(abstractMatch.endChunk.startContainer, // (note that startContainer equals endContainer)
+        abstractMatch.endIndex + abstractMatch.endChunk.startOffset);
+      yield match;
+    }
+  }
+}
+
+interface AbstractRange<TChunk> {
+  startChunk: TChunk;
+  startIndex: number;
+  endChunk: TChunk;
+  endIndex: number;
+}
+
+export function abstractTextQuoteSelectorMatcher(
+  selector: TextQuoteSelector,
+): <TChunk extends Chunk>(textChunks: AsyncIterable<TChunk>) => AsyncGenerator<AbstractRange<TChunk>, void, void> {
+  return async function* matchAll(textChunks) {
     const exact = selector.exact;
     const prefix = selector.prefix || '';
     const suffix = selector.suffix || '';
     const searchPattern = prefix + exact + suffix;
 
-    const iter = document.createNodeIterator(
-      scope.commonAncestorContainer,
-      NodeFilter.SHOW_TEXT,
-      {
-        acceptNode(node: Text) {
-          // Only reveal nodes within the range; and skip any empty text nodes.
-          return scope.intersectsNode(node) && node.length > 0
-            ? NodeFilter.FILTER_ACCEPT
-            : NodeFilter.FILTER_REJECT;
-        },
-      },
-    );
+    for await (const chunk of textChunks) {
+      const chunkValue = chunk.toString();
 
-    // The index of the first character of iter.referenceNode inside the text.
-    let referenceNodeIndex = isTextNode(scope.startContainer)
-      ? -scope.startOffset
-      : 0;
+      // Find the pattern in the chunk (possibly multiple times)
+      // TODO allow pattern to be spread across chunks
+      let fromIndex = 0;
+      while (fromIndex <= chunkValue.length) {
+        const patternStartIndex = chunkValue.indexOf(searchPattern, fromIndex);
+        if (patternStartIndex === -1) break;
 
-    let fromIndex = 0;
-    while (fromIndex <= scopeText.length) {
-      // Find the quote with its prefix and suffix in the string.
-      const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
-      if (patternStartIndex === -1) return;
+        // Correct for the prefix and suffix lengths.
+        const matchStartIndex = patternStartIndex + prefix.length;
+        const matchEndIndex = matchStartIndex + exact.length;
 
-      // Correct for the prefix and suffix lengths.
-      const matchStartIndex = patternStartIndex + prefix.length;
-      const matchEndIndex = matchStartIndex + exact.length;
+        yield {
+          startChunk: chunk,
+          startIndex: matchStartIndex,
+          endChunk: chunk,
+          endIndex: matchEndIndex,
+        };
 
-      // Create a range to represent this exact quote in the dom.
-      const match = document.createRange();
-
-      // Seek to the start of the match, make the range start there.
-      referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
-      match.setStart(iter.referenceNode, matchStartIndex - referenceNodeIndex);
-
-      // Seek to the end of the match, make the range end there.
-      referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
-      match.setEnd(iter.referenceNode, matchEndIndex - referenceNodeIndex);
-
-      // Yield the match.
-      yield match;
-
-      // Advance the search forward to detect multiple occurrences.
-      fromIndex = matchStartIndex + 1;
+        // Advance the search forward to detect multiple occurrences within the same chunk.
+        fromIndex = matchStartIndex + 1;
+      }
     }
   };
 }
-
-function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE;
-}
diff --git a/packages/dom/test/text-quote/match-cases.ts b/packages/dom/test/text-quote/match-cases.ts
index 099802c..d4c2acd 100644
--- a/packages/dom/test/text-quote/match-cases.ts
+++ b/packages/dom/test/text-quote/match-cases.ts
@@ -99,8 +99,8 @@ export const testCases: {
       {
         startContainerXPath: '//i/text()',
         startOffset: 0,
-        endContainerXPath: '//b/text()[2]',
-        endOffset: 0,
+        endContainerXPath: '//i/text()',
+        endOffset: 11,
       },
     ],
   },
@@ -115,8 +115,8 @@ export const testCases: {
       {
         startContainerXPath: '//title/text()',
         startOffset: 4,
-        endContainerXPath: '//b/text()[1]',
-        endOffset: 0,
+        endContainerXPath: '//title/text()',
+        endOffset: 9,
       },
     ],
   },


[incubator-annotator] 01/02: Update test names and tweak some tests.

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 6a790ffcf39f6d3308ad17e0bc6ad4e36c454de6
Author: Gerben <ge...@treora.com>
AuthorDate: Tue Sep 15 19:48:35 2020 +0200

    Update test names and tweak some tests.
    
    All scopes are ranges now.
---
 packages/dom/test/text-quote/match.test.ts | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/packages/dom/test/text-quote/match.test.ts b/packages/dom/test/text-quote/match.test.ts
index 7bdd83c..7d52e15 100644
--- a/packages/dom/test/text-quote/match.test.ts
+++ b/packages/dom/test/text-quote/match.test.ts
@@ -95,36 +95,26 @@ describe('createTextQuoteSelectorMatcher', () => {
     ]);
   });
 
-  it('works with parent of text as scope', async () => {
-    const { html, selector, expected } = testCases['simple'];
-    const doc = domParser.parseFromString(html, 'text/html');
-
-    const scope = doc.createRange();
-    scope.selectNodeContents(evaluateXPath(doc, '//b'));
-
-    await testMatcher(doc, scope, selector, expected);
-  });
-
-  it('works with parent of text as scope, when matching its first characters', async () => {
+  it('works when scope spans one text node’s contents, matching its first characters', async () => {
     const { html, selector, expected } = testCases['first characters'];
     const doc = domParser.parseFromString(html, 'text/html');
 
     const scope = doc.createRange();
-    scope.selectNodeContents(evaluateXPath(doc, '//b'));
+    scope.selectNodeContents(evaluateXPath(doc, '//b/text()'));
 
     await testMatcher(doc, scope, selector, expected);
   });
 
-  it('works with parent of text as scope, when matching its first characters, with an empty text node', async () => {
+  it('works when scope starts with an empty text node, matching its first characters', async () => {
     const { html, selector } = testCases['first characters'];
     const doc = domParser.parseFromString(html, 'text/html');
 
-    const scope = doc.createRange();
-    scope.selectNodeContents(evaluateXPath(doc, '//b'));
-
     const textNode = evaluateXPath(doc, '//b/text()') as Text;
     textNode.splitText(0);
 
+    const scope = doc.createRange();
+    scope.selectNodeContents(evaluateXPath(doc, '//b'));
+
     await testMatcher(doc, scope, selector, [
       {
         startContainerXPath: '//b/text()[2]',
@@ -135,7 +125,7 @@ describe('createTextQuoteSelectorMatcher', () => {
     ]);
   });
 
-  it('works when scope is a Range within one text node', async () => {
+  it('works when scope has both ends within one text node', async () => {
     const { html, selector, expected } = testCases['simple'];
     const doc = domParser.parseFromString(html, 'text/html');
 
@@ -146,7 +136,7 @@ describe('createTextQuoteSelectorMatcher', () => {
     await testMatcher(doc, scope, selector, expected);
   });
 
-  it('works when scope is a Range with both ends inside text nodes', async () => {
+  it('works when scope has both ends inside text nodes', async () => {
     const { html, selector, expected } = testCases['across elements'];
     const doc = domParser.parseFromString(html, 'text/html');
 
@@ -157,7 +147,7 @@ describe('createTextQuoteSelectorMatcher', () => {
     await testMatcher(doc, scope, selector, expected);
   });
 
-  it('works when scope is a Range with both ends inside elements', async () => {
+  it('works when scope has both ends inside an element', async () => {
     const { html, selector, expected } = testCases['across elements'];
     const doc = domParser.parseFromString(html, 'text/html');