You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2020/05/27 18:44:37 UTC

[incubator-annotator] 06/06: Test with custom/empty scopes and fix revealed mistakes

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 15923384007efb0ea58cc251ede67b4081999f1d
Author: Gerben <ge...@treora.com>
AuthorDate: Wed May 27 20:36:27 2020 +0200

    Test with custom/empty scopes and fix revealed mistakes
    
    When describing a range that goes beyond the given scope, it will now
    trim the range to the part that is within the scope; is this derirable?
    We could make it optional and by default throw an error instead.
---
 packages/dom/src/text-quote/describe.ts        | 20 +++++++++----
 packages/dom/test/text-quote-describe-cases.ts | 20 +++++++++----
 packages/dom/test/text-quote-describe.ts       | 40 +++++++++++++++++++++++++-
 packages/dom/test/text-quote-match.ts          | 28 ++++++++++++++++++
 4 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 784881c..15749c9 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -28,6 +28,15 @@ export async function describeTextQuote(
   range: Range,
   scope: DomScope = ownerDocument(range).documentElement,
 ): Promise<TextQuoteSelector> {
+  range = range.cloneRange();
+
+  // Take the part of the range that falls within the scope.
+  const scopeAsRange = rangeFromScope(scope);
+  if (!scopeAsRange.isPointInRange(range.startContainer, range.startOffset))
+    range.setStart(scopeAsRange.startContainer, scopeAsRange.startOffset);
+  if (!scopeAsRange.isPointInRange(range.endContainer, range.endOffset))
+    range.setEnd(scopeAsRange.endContainer, scopeAsRange.endOffset);
+
   const exact = range.toString();
 
   const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };
@@ -94,10 +103,8 @@ function calculateContextForDisambiguation(
 function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
   // Count how many characters the two strings have in common.
   let overlap = 0;
-  while (reverse
-    ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
-    : target[overlap] === impostor[overlap]
-  )
+  const charAt = (s: string, i: number) => reverse ? s[s.length - 1 - i] : s[overlap];
+  while (overlap < target.length && charAt(target, overlap) === charAt(impostor, overlap))
     overlap++;
   if (overlap === target.length)
     return Infinity; // (no substring of target can make it distinguishable from its impostor)
@@ -140,10 +147,11 @@ function getRangeTextPosition(range: Range, scope: DomScope): number {
       },
     },
   );
+  const scopeOffset = isTextNode(scopeAsRange.startContainer) ? scopeAsRange.startOffset : 0;
   if (isTextNode(range.startContainer))
-    return seek(iter, range.startContainer) + range.startOffset;
+    return seek(iter, range.startContainer) + range.startOffset - scopeOffset;
   else
-    return seek(iter, firstTextNodeInRange(range));
+    return seek(iter, firstTextNodeInRange(range)) - scopeOffset;
 }
 
 function firstTextNodeInRange(range: Range): Text {
diff --git a/packages/dom/test/text-quote-describe-cases.ts b/packages/dom/test/text-quote-describe-cases.ts
index 4f34c92..a7e556e 100644
--- a/packages/dom/test/text-quote-describe-cases.ts
+++ b/packages/dom/test/text-quote-describe-cases.ts
@@ -98,11 +98,21 @@ const testCases: {
       suffix: ' ',
     },
   },
-
-  // TODO test for:
-  // empty scope
-  // custom scope
-  // element edges, across elements, etc.
+  'across elements': {
+    html: '<b>To annotate or <i>not</i> to <u>anno</u>tate</b>',
+    range: {
+      startContainerXPath: '//u/text()',
+      startOffset: 0,
+      endContainerXPath: '//b/text()[3]',
+      endOffset: 2,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'annota',
+      prefix: 'to ',
+      suffix: '',
+    },
+  },
 };
 
 export default testCases;
diff --git a/packages/dom/test/text-quote-describe.ts b/packages/dom/test/text-quote-describe.ts
index 749c083..9d2e68a 100644
--- a/packages/dom/test/text-quote-describe.ts
+++ b/packages/dom/test/text-quote-describe.ts
@@ -22,7 +22,7 @@ import { assert } from 'chai';
 import { describeTextQuote } from '../src/text-quote/describe';
 import testCases from './text-quote-describe-cases';
 import testMatchCases from './text-quote-match-cases';
-import { hydrateRange } from './utils';
+import { hydrateRange, evaluateXPath } from './utils';
 
 const domParser = new window.DOMParser();
 
@@ -35,6 +35,44 @@ describe('describeTextQuote', () => {
     })
   }
 
+  it('works with custom scope', async () => {
+    const { html, range } = testCases['minimal prefix'];
+    const doc = domParser.parseFromString(html, 'text/html');
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b/text()'), 15);
+    scope.setEnd(evaluateXPath(doc, '//b/text()'), 30); // "not to annotate"
+    // const scope = hydrateRange(range, doc).cloneRange()
+    const result = await describeTextQuote(hydrateRange(range, doc), scope);
+    assert.deepEqual(result, {
+      type: 'TextQuoteSelector',
+      exact: 'anno',
+      prefix: '', // no prefix needed in this scope.
+      suffix: '',
+    });
+  });
+
+  it('strips part of the range outside the scope', async () => {
+    const { html, range } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b/text()'), 6);
+    scope.setEnd(evaluateXPath(doc, '//b/text()'), 17); // "ipsum dolor"
+    const result = await describeTextQuote(hydrateRange(range, doc), scope);
+    assert.deepEqual(result, {
+      type: 'TextQuoteSelector',
+      exact: 'dolor',
+      prefix: '',
+      suffix: '',
+    });
+  });
+
+  it('works if the range equals the scope', async () => {
+    const { html, range, expected } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+    const result = await describeTextQuote(hydrateRange(range, doc), hydrateRange(range, doc));
+    assert.deepEqual(result, expected);
+  });
+
   describe('inverts test cases of text quote matcher', () => {
     const applicableTestCases = Object.entries(testMatchCases)
       .filter(([_, { expected }]) => expected.length > 0);
diff --git a/packages/dom/test/text-quote-match.ts b/packages/dom/test/text-quote-match.ts
index 1144bc3..e7a025e 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -143,6 +143,34 @@ describe('createTextQuoteSelectorMatcher', () => {
     scope.setEnd(evaluateXPath(doc, '//b'), 4); // before the " yada yada"
     await testMatcher(doc, scope, selector, expected);
   });
+
+  it('ignores quote when scope is an empty range', async () => {
+    const { html, selector } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const scope = document.createRange();
+    await testMatcher(doc, scope, selector, []);
+  });
+
+  it('ignores quote extending just beyond scope', async () => {
+    const { html, selector } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b/text()'), 0);
+    scope.setEnd(evaluateXPath(doc, '//b/text()'), 19);
+    await testMatcher(doc, scope, selector, []);
+  });
+
+  it('ignores quote starting just before scope', async () => {
+    const { html, selector } = testCases['simple'];
+    const doc = domParser.parseFromString(html, 'text/html');
+
+    const scope = document.createRange();
+    scope.setStart(evaluateXPath(doc, '//b/text()'), 13);
+    scope.setEnd(evaluateXPath(doc, '//b/text()'), 32);
+    await testMatcher(doc, scope, selector, []);
+  });
 });
 
 async function testMatcher(