You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/11/20 21:21:06 UTC
[incubator-annotator] 08/14: Refactor pre/suffix disambiguation
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch import-dom-seek
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit 91d245958ef43d092ecbd2ad777af9794c40c516
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Nov 20 12:59:29 2020 +0100
Refactor pre/suffix disambiguation
---
packages/dom/src/text-quote/describe.ts | 79 ++++++++++++++-------------------
1 file changed, 34 insertions(+), 45 deletions(-)
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 688089f..ae79ad0 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -22,7 +22,7 @@ import type { TextQuoteSelector } from '@annotator/selector';
import { ownerDocument } from '../owner-document';
import { Chunk, Chunker, ChunkRange, TextNodeChunker, chunkRangeEquals } from '../chunker';
import { abstractTextQuoteSelectorMatcher } from '.';
-import { TextSeeker } from '../seek';
+import { TextSeeker, Seeker } from '../seek';
export async function describeTextQuote(
range: Range,
@@ -94,54 +94,14 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
// Count how many characters we’d need as a prefix to disqualify this match.
seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
seeker2.seekToChunk(unintendedMatch.startChunk, unintendedMatch.startIndex - prefix.length);
- let sufficientPrefix: string | undefined = prefix;
- while (true) {
- let previousCharacter: string;
- try {
- previousCharacter = seeker1.read(-1);
- } catch (err) {
- sufficientPrefix = undefined; // Start of text reached.
- break;
- }
- sufficientPrefix = previousCharacter + sufficientPrefix;
-
- // Break if the newly added character makes the prefix unambiguous.
- try {
- const unintendedMatchPreviousCharacter = seeker2.read(-1);
- if (previousCharacter !== unintendedMatchPreviousCharacter) break;
- } catch (err) {
- if (err instanceof RangeError)
- break;
- else
- throw err;
- }
- }
+ const extraPrefix = readUntilDifferent(seeker1, seeker2, true);
+ let sufficientPrefix = extraPrefix !== undefined ? extraPrefix + prefix : undefined;
// Count how many characters we’d need as a suffix to disqualify this match.
seeker1.seekToChunk(target.endChunk, target.endIndex + suffix.length);
seeker2.seekToChunk(unintendedMatch.endChunk, unintendedMatch.endIndex + suffix.length);
- let sufficientSuffix: string | undefined = suffix;
- while (true) {
- let nextCharacter: string;
- try {
- nextCharacter = seeker1.read(1);
- } catch (err) {
- sufficientSuffix = undefined; // End of text reached.
- break;
- }
- sufficientSuffix += nextCharacter;
-
- // Break if the newly added character makes the suffix unambiguous.
- try {
- const unintendedMatchNextCharacter = seeker2.read(1);
- if (nextCharacter !== unintendedMatchNextCharacter) break;
- } catch (err) {
- if (err instanceof RangeError)
- break;
- else
- throw err;
- }
- }
+ const extraSuffix = readUntilDifferent(seeker1, seeker2, false);
+ let sufficientSuffix = extraSuffix !== undefined ? suffix + extraSuffix : undefined;
// Use either the prefix or suffix, whichever is shortest.
if (sufficientPrefix !== undefined && (sufficientSuffix === undefined || sufficientPrefix.length <= sufficientSuffix.length)) {
@@ -154,3 +114,32 @@ async function abstractDescribeTextQuote<TChunk extends Chunk<string>>(
}
}
}
+
+function readUntilDifferent(
+ seeker1: Seeker,
+ seeker2: Seeker,
+ reverse: boolean,
+): string | undefined {
+ let result = '';
+ while (true) {
+ let nextCharacter: string;
+ try {
+ nextCharacter = seeker1.read(reverse ? -1 : 1);
+ } catch (err) {
+ return undefined; // Start/end of text reached: cannot expand result.
+ }
+ result = reverse
+ ? nextCharacter + result
+ : result + nextCharacter;
+
+ // Check if the newly added character makes the result differ from the second seeker.
+ let comparisonCharacter: string | undefined;
+ try {
+ comparisonCharacter = seeker2.read(reverse ? -1 : 1);
+ } catch (err) { // A RangeError would merely mean seeker2 is exhausted.
+ if (!(err instanceof RangeError)) throw err;
+ }
+ if (nextCharacter !== comparisonCharacter)
+ return result;
+ }
+}