You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/27 18:44:36 UTC
[incubator-annotator] 05/06: Make match & describe handle empty quotes ‘correctly’
This is an automated email from the ASF dual-hosted git repository.
gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit da1981addaa5596cdf16ac268d8ff9665bfffb4b
Author: Gerben <ge...@treora.com>
AuthorDate: Wed May 27 17:32:30 2020 +0200
Make match & describe handle empty quotes ‘correctly’
What constitutes correct behaviour could be debated, but it seems
reasonable to match at every possible position.
---
packages/dom/src/text-quote/describe.ts | 2 +-
packages/dom/src/text-quote/match.ts | 2 +-
packages/dom/test/text-quote-describe-cases.ts | 16 ++++++-
packages/dom/test/text-quote-match-cases.ts | 66 ++++++++++++++++++++++++++
4 files changed, 83 insertions(+), 3 deletions(-)
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 572e218..784881c 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -52,7 +52,7 @@ function calculateContextForDisambiguation(
// Find all matches of the text in the scope.
const stringMatches: number[] = [];
let fromIndex = 0;
- while (fromIndex < scopeText.length) {
+ while (fromIndex <= scopeText.length) {
const matchIndex = scopeText.indexOf(exactText, fromIndex);
if (matchIndex === -1) break;
stringMatches.push(matchIndex);
diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index f32afce..18b077e 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts
@@ -54,7 +54,7 @@ export function createTextQuoteSelectorMatcher(selector: TextQuoteSelector): Dom
: 0;
let fromIndex = 0;
- while (fromIndex < scopeText.length) {
+ while (fromIndex <= scopeText.length) {
// Find the quote with its prefix and suffix in the string.
const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
if (patternStartIndex === -1) return;
diff --git a/packages/dom/test/text-quote-describe-cases.ts b/packages/dom/test/text-quote-describe-cases.ts
index cfc1435..4f34c92 100644
--- a/packages/dom/test/text-quote-describe-cases.ts
+++ b/packages/dom/test/text-quote-describe-cases.ts
@@ -83,9 +83,23 @@ const testCases: {
suffix: '',
},
},
+ 'empty quote': {
+ html: '<b>To annotate or not to annotate</b>',
+ range: {
+ startContainerXPath: '//b/text()',
+ startOffset: 11,
+ endContainerXPath: '//b/text()',
+ endOffset: 11,
+ },
+ expected: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ prefix: 'e',
+ suffix: ' ',
+ },
+ },
// TODO test for:
- // emtpy range
// empty scope
// custom scope
// element edges, across elements, etc.
diff --git a/packages/dom/test/text-quote-match-cases.ts b/packages/dom/test/text-quote-match-cases.ts
index 33d66de..0fd757a 100644
--- a/packages/dom/test/text-quote-match-cases.ts
+++ b/packages/dom/test/text-quote-match-cases.ts
@@ -264,6 +264,72 @@ const testCases: {
},
],
},
+ 'empty quote': {
+ html: '<b>lorem</b>',
+ selector: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ },
+ // A five character string contains six spots to find an empty string
+ expected: Array(6).fill(null).map((_, i) => ({
+ startContainerXPath: '//b/text()',
+ startOffset: i,
+ endContainerXPath: '//b/text()',
+ endOffset: i,
+ }))
+ },
+ 'empty quote, with prefix': {
+ html: '<b>lorem ipsum dolor amet yada yada</b>',
+ selector: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ prefix: 'dolor',
+ },
+ expected: [{
+ startContainerXPath: '//b/text()',
+ startOffset: 17,
+ endContainerXPath: '//b/text()',
+ endOffset: 17,
+ }]
+ },
+ 'empty quote, with suffix': {
+ html: '<b>lorem ipsum dolor amet yada yada</b>',
+ selector: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ suffix: 'i',
+ },
+ expected: [{
+ startContainerXPath: '//b/text()',
+ startOffset: 6,
+ endContainerXPath: '//b/text()',
+ endOffset: 6,
+ }]
+ },
+ 'empty quote, with prefix and suffix': {
+ html: '<b>lorem ipsum dolor amet yada yada</b>',
+ selector: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ prefix: 'lorem ',
+ suffix: 'ipsum',
+ },
+ expected: [{
+ startContainerXPath: '//b/text()',
+ startOffset: 6,
+ endContainerXPath: '//b/text()',
+ endOffset: 6,
+ }]
+ },
+ 'empty quote, no matches': {
+ html: '<b>lorem ipsum dolor amet yada yada</b>',
+ selector: {
+ type: 'TextQuoteSelector',
+ exact: '',
+ prefix: 'X',
+ },
+ expected: [],
+ }
};
export default testCases;