You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/09/15 20:58:34 UTC
[incubator-annotator] branch improve-range-stuff updated (2be85b6
-> 18c9eab)
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a change to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.
discard 2be85b6 WIP reimplement range iteration
add 8e24974 Get Babel module resolver working with TypeScript preset
add 28821d6 Set up TypeScript linting with ESLint
add 3aeb518 Do not lint the demo build output
add e59fb15 Lint the webpack config
add 230c0d2 Use names for inter-package references
add 58a0421 Use a single tsconfig.json
add 1320153 Remove JSON module resolution in TypeScript
add d9dcd28 Fix remaining test typecheck failures
add f348d4d Fix lexical scope in switch case
add 35cc009 Apply automatic lint fixes
add cff896a Remove excess space in package.json
add 9642123 nitpicking
add 6b38b35 Fix document->doc
add 519b231 Move test utils.ts one folder up
add d453e3a Support highlighting empty ranges
add aa5bd9e Create tests for highlighter
add d4d933c Merge branch 'tweak-highlighter'
add 6a614da Lint the highlighter tests
add 17fb8a5 Upgrade ESLint and Prettier related dependencies
add 6a86565 Make babel-register a little terser
add 9de88db Add babel-register.js to ESLint files
add defd047 Add tsconfig.json to .ratignore
add 0053b2a Add missing ASL headers
add 1b31726 Apply automatic lint fixes
add 3337df7 Use type-only exports where applicable
add f4c41fc Set @babel/typescript options
add 5c3c9dc Remove default exports
add c00cf1c Enable @typescript-eslint/explicit-function-return-type rule
add 8eaef7c Enable the downlevelIteration TypeScript compiler option
add 3ea6e80 Upgrade mocha from v7 to v8
add 994fc24 Upgrade typescript from v3.8 to v3.9
add 43307f0 Enable esnext compilation for TypeScript
add 3430aa5 Enable support for shipped ECMAScript proposals
add d4e2c0f Clean up unnecessary .js and .ts extension options
add 70a6460 Fix swapped assertion result and expectation
add ecca786 Run CI tests on Node.js 14
add c98dea0 Switch to a solution-style TypeScript configuration
add 9984acf Upgrade Travis environment from xenial to bionic
add 9f584cb Simplify the mocha invocation by ignoring node_modules
add bd010e0 Remove plain JavaScript from babel-register
add e81082e Target the current node version for running tests
add 6935bfc Make sure selector is built before dom
add 5a75d7c Remove dangling comma in a tsconfig.json file
add db8b26e Target only the latest ECMAScript standard
add 7137b54 Enable minimal syntax polyfills in @babel/preset-env
add 26df739 Do not lint generated declaration files
add 1659c20 Include all packages as projects for Typescript ESLint
add 6d03197 Sort .gitignore
add 17e1d1a Add tsconfig.base.json to .ratignore
add a327076 Make .ratignore use the correct syntax
add fbe4b94 Add TypeScript solution support to scripts
add f9db3f2 Remove unnecessary /index in imports
add 924ee09 Include the validation tests in the top-level solution
add 32bba11 Remove baseUrl option in shared TypeScript configuration
add f2246af Updated supported Node.js versions
add 06141b0 Add conditional exports
add 7edcecc Obssessive alphabetizing
add ca3cd02 Add Node.js 13 to CI test matrix
add 675517b Deduplicate lockfile
add 1ba5b4f Commit post-deduplication lockfile changes
add 21cb6da Remove superfluous .js extension support in nyc
add 710e235 Add type qualifier to type-only imports
add acf90a5 Add extensions to import and require statements in build
add bca65b9 Export all types from @annotator/selector
add a2806d4 Use a unified output directory with .js or .mjs extensions
add da48584 Add comment to highlighter about using normalize()
add 500af96 Support empty ranges in highlighter & demo
add 2cea3e3 Merge branch 'demo-empty-quote'
add 90336e1 Output declarations to the lib directory
add fd7c72c Include all the tests in the root project
add 05298c2 Raise the TypeScript target to es2018
add 0121bd2 Turn off the @typescript-eslint/require-await lint rule
add decfe8c Restrict DOM scopes to be instances of Range
add f4adabd Reintroduce ownerDocument(), now in util.ts
add b118253 Remove the no-relative-parent-imports eslint rule.
add 983f005 Fix default scope in describeTextQuote
add e5f3d46 Fix some references to global `document`
add 14df40f Delete unused types.ts
add 1d8ca69 Rename util.ts→owner-document.ts
add 267bbcb Merge pull request #88 from apache/range-as-dom-scope
add 9e36aaf Fix forgotten scope→Range change
add 934746a Fix describeTextQuote’s clipping range to scope
add 83e00f8 Fix spec mismatch: RangeSelector’s start is inclusive.
add 871e51d Support multi-selection in demo
new 6a790ff Update test names and tweak some tests.
new 18c9eab WIP Create chunk abstraction for text quote matching
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (2be85b6)
\
N -- N -- N refs/heads/improve-range-stuff (18c9eab)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.eslintignore | 7 +-
.eslintrc.js | 96 +-
.gitignore | 6 +-
.mocharc.js | 3 +
.ratignore | 31 +-
.travis.yml | 3 +
@types/cartesian/index.d.ts | 3 -
@types/dom-seek/index.d.ts | 3 -
README.md | 8 +-
babel-register.js | 22 +-
babel.config.js | 51 +-
nyc.config.js | 2 +-
package.json | 46 +-
packages/dom/package.json | 14 +-
packages/dom/src/css.ts | 23 +-
packages/dom/src/highlight-range.ts | 45 +-
packages/dom/src/index.ts | 4 +-
.mocharc.js => packages/dom/src/owner-document.ts | 8 +-
packages/dom/src/range/cartesian.ts | 29 +-
packages/dom/src/range/match.ts | 23 +-
packages/dom/src/scope.ts | 41 -
packages/dom/src/text-iterator.ts | 51 +-
packages/dom/src/text-quote/describe.ts | 128 +-
packages/dom/src/text-quote/match.ts | 107 +-
packages/dom/src/types.ts | 25 -
.../dom/src/types/cartesian.d.ts | 8 +-
.../dom/src/types/dom-seek.d.ts | 9 +-
.../test/highlight-range/highlight-range.test.ts | 218 +++
packages/dom/test/range/cartesian.test.ts | 5 +-
packages/dom/test/text-quote/describe-cases.ts | 19 +-
packages/dom/test/text-quote/describe.test.ts | 48 +-
packages/dom/test/text-quote/match-cases.ts | 88 +-
packages/dom/test/text-quote/match.test.ts | 104 +-
packages/dom/test/{text-quote => }/utils.ts | 39 +-
packages/dom/tsconfig.json | 11 +
packages/selector/package.json | 11 +-
packages/selector/src/index.ts | 15 +-
packages/selector/src/types.ts | 22 +-
packages/selector/tsconfig.json | 8 +
test/data-model.test.ts | 46 +-
tsconfig.base.json | 19 +
tsconfig.json | 29 +-
tsconfig.tests.json | 15 -
web/demo/index.js | 32 +-
web/style.css | 2 +-
yarn.lock | 1657 +++++++-------------
46 files changed, 1574 insertions(+), 1610 deletions(-)
delete mode 100644 @types/cartesian/index.d.ts
delete mode 100644 @types/dom-seek/index.d.ts
copy .mocharc.js => packages/dom/src/owner-document.ts (78%)
delete mode 100644 packages/dom/src/scope.ts
delete mode 100644 packages/dom/src/types.ts
copy .mocharc.js => packages/dom/src/types/cartesian.d.ts (84%)
copy .mocharc.js => packages/dom/src/types/dom-seek.d.ts (86%)
create mode 100644 packages/dom/test/highlight-range/highlight-range.test.ts
rename packages/dom/test/{text-quote => }/utils.ts (64%)
create mode 100644 packages/dom/tsconfig.json
create mode 100644 packages/selector/tsconfig.json
create mode 100644 tsconfig.base.json
delete mode 100644 tsconfig.tests.json
[incubator-annotator] 02/02: WIP Create chunk abstraction for text
quote matching
Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 18c9eabffd175364385c19902b11f07a630d375a
Author: Gerben <ge...@treora.com>
AuthorDate: Tue Sep 15 19:55:35 2020 +0200
WIP Create chunk abstraction for text quote matching
---
packages/dom/src/text-iterator.ts | 79 ++++++++++++++++++++++
packages/dom/src/text-quote/match.ts | 101 ++++++++++++++--------------
packages/dom/test/text-quote/match-cases.ts | 8 +--
3 files changed, 134 insertions(+), 54 deletions(-)
diff --git a/packages/dom/src/text-iterator.ts b/packages/dom/src/text-iterator.ts
new file mode 100644
index 0000000..dfc1384
--- /dev/null
+++ b/packages/dom/src/text-iterator.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ownerDocument } from "./owner-document";
+
+export interface TextRange extends Range {
+ // We guarantee that to always have Text nodes as start and end containers.
+ readonly startContainer: Text;
+ readonly endContainer: Text;
+ cloneRange(): TextRange;
+
+ // Allow only Text nodes to be passed to these methods.
+ insertNode(node: Text): void;
+ selectNodeContents(node: Text): void;
+ setEnd(node: Text, offset: number): void;
+ setStart(node: Text, offset: number): void;
+
+ // Do not allow these methods to be used at all.
+ selectNode(node: never): void;
+ setEndAfter(node: never): void;
+ setEndBefore(node: never): void;
+ setStartAfter(node: never): void;
+ setStartBefore(node: never): void;
+ surroundContents(newParent: never): void;
+}
+
+export interface Chunk {
+ toString(): string;
+}
+
+// Yields ranges whose start and end nodes are both the *same* Text node.
+export async function* chunkRange(scope: Range): AsyncIterable<TextRange> {
+ const document = ownerDocument(scope);
+
+ const iter = document.createNodeIterator(
+ scope.commonAncestorContainer,
+ NodeFilter.SHOW_TEXT,
+ {
+ acceptNode(node: Text) {
+ // Only reveal nodes within the range; and skip any empty text nodes.
+ return scope.intersectsNode(node) && node.length > 0
+ ? NodeFilter.FILTER_ACCEPT
+ : NodeFilter.FILTER_REJECT;
+ },
+ },
+ );
+
+ let node: Text | null;
+ while (node = iter.nextNode() as (Text | null)) {
+ const range = document.createRange() as TextRange;
+ range.selectNodeContents(node);
+
+ if (node === scope.startContainer) {
+ range.setStart(node, scope.startOffset);
+ }
+ if (node === scope.endContainer) {
+ range.setEnd(node, scope.endOffset);
+ }
+
+ yield range;
+ }
+}
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index a4a216f..6b7fd93 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -18,71 +18,72 @@
* under the License.
*/
-import type { Matcher, TextQuoteSelector } from '@annotator/selector';
-import seek from 'dom-seek';
+import type { TextQuoteSelector } from '@annotator/selector';
-import { ownerDocument } from '../owner-document';
+import { chunkRange, Chunk, TextRange } from '../text-iterator';
export function createTextQuoteSelectorMatcher(
selector: TextQuoteSelector,
-): Matcher<Range, Range> {
+): (scope: Range) => AsyncGenerator<TextRange, void, void> {
+ const abstractMatcher = abstractTextQuoteSelectorMatcher(selector);
return async function* matchAll(scope) {
- const document = ownerDocument(scope);
- const scopeText = scope.toString();
+ // Turn the scope into a stream of ranges, each wrapping exactly one text node. We wrap it in
+ // a range such that the first and last text node can be partially included. Could be changed
+ // to e.g. be an object { node: Text, startOffset, endOffset }.
+ const textChunks = chunkRange(scope);
+ for await (const abstractMatch of abstractMatcher(textChunks)) {
+ const match = document.createRange() as TextRange;
+ // The `+…startOffset` part is only relevant for the first chunk, whose text node might be partially in scope.
+ match.setStart(abstractMatch.startChunk.startContainer,
+ abstractMatch.startIndex + abstractMatch.startChunk.startOffset);
+ match.setEnd(abstractMatch.endChunk.startContainer, // (note that startContainer equals endContainer)
+ abstractMatch.endIndex + abstractMatch.endChunk.startOffset);
+ yield match;
+ }
+ }
+}
+
+interface AbstractRange<TChunk> {
+ startChunk: TChunk;
+ startIndex: number;
+ endChunk: TChunk;
+ endIndex: number;
+}
+
+export function abstractTextQuoteSelectorMatcher(
+ selector: TextQuoteSelector,
+): <TChunk extends Chunk>(textChunks: AsyncIterable<TChunk>) => AsyncGenerator<AbstractRange<TChunk>, void, void> {
+ return async function* matchAll(textChunks) {
const exact = selector.exact;
const prefix = selector.prefix || '';
const suffix = selector.suffix || '';
const searchPattern = prefix + exact + suffix;
- const iter = document.createNodeIterator(
- scope.commonAncestorContainer,
- NodeFilter.SHOW_TEXT,
- {
- acceptNode(node: Text) {
- // Only reveal nodes within the range; and skip any empty text nodes.
- return scope.intersectsNode(node) && node.length > 0
- ? NodeFilter.FILTER_ACCEPT
- : NodeFilter.FILTER_REJECT;
- },
- },
- );
+ for await (const chunk of textChunks) {
+ const chunkValue = chunk.toString();
- // The index of the first character of iter.referenceNode inside the text.
- let referenceNodeIndex = isTextNode(scope.startContainer)
- ? -scope.startOffset
- : 0;
+ // Find the pattern in the chunk (possibly multiple times)
+ // TODO allow pattern to be spread across chunks
+ let fromIndex = 0;
+ while (fromIndex <= chunkValue.length) {
+ const patternStartIndex = chunkValue.indexOf(searchPattern, fromIndex);
+ if (patternStartIndex === -1) break;
- let fromIndex = 0;
- while (fromIndex <= scopeText.length) {
- // Find the quote with its prefix and suffix in the string.
- const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
- if (patternStartIndex === -1) return;
+ // Correct for the prefix and suffix lengths.
+ const matchStartIndex = patternStartIndex + prefix.length;
+ const matchEndIndex = matchStartIndex + exact.length;
- // Correct for the prefix and suffix lengths.
- const matchStartIndex = patternStartIndex + prefix.length;
- const matchEndIndex = matchStartIndex + exact.length;
+ yield {
+ startChunk: chunk,
+ startIndex: matchStartIndex,
+ endChunk: chunk,
+ endIndex: matchEndIndex,
+ };
- // Create a range to represent this exact quote in the dom.
- const match = document.createRange();
-
- // Seek to the start of the match, make the range start there.
- referenceNodeIndex += seek(iter, matchStartIndex - referenceNodeIndex);
- match.setStart(iter.referenceNode, matchStartIndex - referenceNodeIndex);
-
- // Seek to the end of the match, make the range end there.
- referenceNodeIndex += seek(iter, matchEndIndex - referenceNodeIndex);
- match.setEnd(iter.referenceNode, matchEndIndex - referenceNodeIndex);
-
- // Yield the match.
- yield match;
-
- // Advance the search forward to detect multiple occurrences.
- fromIndex = matchStartIndex + 1;
+ // Advance the search forward to detect multiple occurrences within the same chunk.
+ fromIndex = matchStartIndex + 1;
+ }
}
};
}
-
-function isTextNode(node: Node): node is Text {
- return node.nodeType === Node.TEXT_NODE;
-}
diff --git a/packages/dom/test/text-quote/match-cases.ts b/packages/dom/test/text-quote/match-cases.ts
index 099802c..d4c2acd 100644
--- a/packages/dom/test/text-quote/match-cases.ts
+++ b/packages/dom/test/text-quote/match-cases.ts
@@ -99,8 +99,8 @@ export const testCases: {
{
startContainerXPath: '//i/text()',
startOffset: 0,
- endContainerXPath: '//b/text()[2]',
- endOffset: 0,
+ endContainerXPath: '//i/text()',
+ endOffset: 11,
},
],
},
@@ -115,8 +115,8 @@ export const testCases: {
{
startContainerXPath: '//title/text()',
startOffset: 4,
- endContainerXPath: '//b/text()[1]',
- endOffset: 0,
+ endContainerXPath: '//title/text()',
+ endOffset: 9,
},
],
},
[incubator-annotator] 01/02: Update test names and tweak some tests.
Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch improve-range-stuff
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 6a790ffcf39f6d3308ad17e0bc6ad4e36c454de6
Author: Gerben <ge...@treora.com>
AuthorDate: Tue Sep 15 19:48:35 2020 +0200
Update test names and tweak some tests.
All scopes are ranges now.
---
packages/dom/test/text-quote/match.test.ts | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/packages/dom/test/text-quote/match.test.ts b/packages/dom/test/text-quote/match.test.ts
index 7bdd83c..7d52e15 100644
--- a/packages/dom/test/text-quote/match.test.ts
+++ b/packages/dom/test/text-quote/match.test.ts
@@ -95,36 +95,26 @@ describe('createTextQuoteSelectorMatcher', () => {
]);
});
- it('works with parent of text as scope', async () => {
- const { html, selector, expected } = testCases['simple'];
- const doc = domParser.parseFromString(html, 'text/html');
-
- const scope = doc.createRange();
- scope.selectNodeContents(evaluateXPath(doc, '//b'));
-
- await testMatcher(doc, scope, selector, expected);
- });
-
- it('works with parent of text as scope, when matching its first characters', async () => {
+ it('works when scope spans one text node’s contents, matching its first characters', async () => {
const { html, selector, expected } = testCases['first characters'];
const doc = domParser.parseFromString(html, 'text/html');
const scope = doc.createRange();
- scope.selectNodeContents(evaluateXPath(doc, '//b'));
+ scope.selectNodeContents(evaluateXPath(doc, '//b/text()'));
await testMatcher(doc, scope, selector, expected);
});
- it('works with parent of text as scope, when matching its first characters, with an empty text node', async () => {
+ it('works when scope starts with an empty text node, matching its first characters', async () => {
const { html, selector } = testCases['first characters'];
const doc = domParser.parseFromString(html, 'text/html');
- const scope = doc.createRange();
- scope.selectNodeContents(evaluateXPath(doc, '//b'));
-
const textNode = evaluateXPath(doc, '//b/text()') as Text;
textNode.splitText(0);
+ const scope = doc.createRange();
+ scope.selectNodeContents(evaluateXPath(doc, '//b'));
+
await testMatcher(doc, scope, selector, [
{
startContainerXPath: '//b/text()[2]',
@@ -135,7 +125,7 @@ describe('createTextQuoteSelectorMatcher', () => {
]);
});
- it('works when scope is a Range within one text node', async () => {
+ it('works when scope has both ends within one text node', async () => {
const { html, selector, expected } = testCases['simple'];
const doc = domParser.parseFromString(html, 'text/html');
@@ -146,7 +136,7 @@ describe('createTextQuoteSelectorMatcher', () => {
await testMatcher(doc, scope, selector, expected);
});
- it('works when scope is a Range with both ends inside text nodes', async () => {
+ it('works when scope has both ends inside text nodes', async () => {
const { html, selector, expected } = testCases['across elements'];
const doc = domParser.parseFromString(html, 'text/html');
@@ -157,7 +147,7 @@ describe('createTextQuoteSelectorMatcher', () => {
await testMatcher(doc, scope, selector, expected);
});
- it('works when scope is a Range with both ends inside elements', async () => {
+ it('works when scope has both ends inside an element', async () => {
const { html, selector, expected } = testCases['across elements'];
const doc = domParser.parseFromString(html, 'text/html');