You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@annotator.apache.org by ge...@apache.org on 2021/01/04 17:40:44 UTC

[incubator-annotator] branch more-context created (now 50a17d7)

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a change to branch more-context
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git.


      at 50a17d7  Update tests

This branch includes the following new commits:

     new 4c826b3  Tweak Seeker documentation (add @throws)
     new d967698  Generate less minimal prefixes&suffixes
     new 50a17d7  Update tests

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-annotator] 01/03: Tweak Seeker documentation (add @throws)

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch more-context
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 4c826b3f5de9214aae8286aeff8d96959552fd7f
Author: Gerben <ge...@treora.com>
AuthorDate: Tue Dec 22 13:19:02 2020 +0100

    Tweak Seeker documentation (add @throws)
---
 packages/selector/src/text/seeker.ts | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/packages/selector/src/text/seeker.ts b/packages/selector/src/text/seeker.ts
index 948e615..2c1f788 100644
--- a/packages/selector/src/text/seeker.ts
+++ b/packages/selector/src/text/seeker.ts
@@ -55,6 +55,8 @@ export interface RelativeSeeker<TData extends Iterable<any> = string> {
    *
    * @param length - The number of characters to pass. A negative number moves
    * backwards in the file.
+   * @throws RangeError if there are not enough characters in the file. The
+   * pointer is left at the end/start of the file.
    */
   seekBy(length: number): void;
 
@@ -69,6 +71,8 @@ export interface RelativeSeeker<TData extends Iterable<any> = string> {
    * characters, read further until the end (or start) of the current chunk.
    * @returns The characters passed (in their normal order, even when moving
    * backwards)
+   * @throws RangeError if there are not enough characters in the file. The
+   * pointer is left at the end/start of the file.
    */
   read(length?: number, roundUp?: boolean): TData;
 }
@@ -87,6 +91,8 @@ export interface AbsoluteSeeker<TData extends Iterable<any> = string> {
    * Move to the given position in the file.
    *
    * @param target - The position to end up at.
+   * @throws RangeError if the given position is beyond the end/start of the
+   * file. The pointer is left at the end/start of the file.
    */
   seekTo(target: number): void;
 
@@ -101,6 +107,8 @@ export interface AbsoluteSeeker<TData extends Iterable<any> = string> {
    * further until the end (or start) of the current chunk.
    * @returns The characters passed (in their normal order, even when moving
    * backwards)
+   * @throws RangeError if the given position is beyond the end/start of the
+   * file. The pointer is left at the end/start of the file.
    */
   readTo(target: number, roundUp?: boolean): TData;
 }
@@ -141,6 +149,7 @@ export interface ChunkSeeker<
    * @param chunk - The chunk of the file to move to.
    * @param offset - The offset to move to, relative to the start of `chunk`.
    * Defaults to zero.
+   * @throws RangeError if the given chunk is not found in the file.
    */
   seekToChunk(chunk: TChunk, offset?: number): void;
 
@@ -152,6 +161,9 @@ export interface ChunkSeeker<
    * @param chunk - The chunk of the file to move to.
    * @param offset - The offset to move to, relative to the start of `chunk`.
    * Defaults to zero.
+   * @returns The characters passed (in their normal order, even when moving
+   * backwards)
+   * @throws RangeError if the given chunk is not found in the file.
    */
   readToChunk(chunk: TChunk, offset?: number): TData;
 }


[incubator-annotator] 02/03: Generate less minimal prefixes&suffixes

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch more-context
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit d967698f6c46515a7996bdfd9902511849a932f1
Author: Gerben <ge...@treora.com>
AuthorDate: Thu Dec 24 17:09:22 2020 +0100

    Generate less minimal prefixes&suffixes
    
    1. Round them up to the next whitespace.
    2. Optionally add prefix&suffix around a short quote even if it is not
       ambiguous.
    
    The previous behaviour can still be obtained using the option
    `minimalContext`; especially useful if robustness against document
    variations is not required.
    
    Also refactor a bit, reusing the seekers instead of creating new ones
    on every match.
---
 packages/dom/src/text-quote/describe.ts           |   4 +-
 packages/selector/src/text/describe-text-quote.ts | 182 ++++++++++++++++++----
 packages/selector/src/text/seeker.ts              |  24 +--
 web/demo/index.js                                 |   2 +-
 4 files changed, 167 insertions(+), 45 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 45b8adc..ecbbd36 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -18,7 +18,7 @@
  * under the License.
  */
 
-import type { TextQuoteSelector } from '@annotator/selector';
+import type { TextQuoteSelector, DescribeTextQuoteOptions } from '@annotator/selector';
 import { describeTextQuote as abstractDescribeTextQuote } from '@annotator/selector';
 import { TextNodeChunker } from '../text-node-chunker';
 import { ownerDocument } from '../owner-document';
@@ -26,6 +26,7 @@ import { ownerDocument } from '../owner-document';
 export async function describeTextQuote(
   range: Range,
   maybeScope?: Range,
+  options: DescribeTextQuoteOptions = {},
 ): Promise<TextQuoteSelector> {
   // Default to search in the whole document.
   let scope: Range;
@@ -42,5 +43,6 @@ export async function describeTextQuote(
   return await abstractDescribeTextQuote(
     chunker.rangeToChunkRange(range),
     () => new TextNodeChunker(scope),
+    options,
   );
 }
diff --git a/packages/selector/src/text/describe-text-quote.ts b/packages/selector/src/text/describe-text-quote.ts
index 9b6526a..b3c4d48 100644
--- a/packages/selector/src/text/describe-text-quote.ts
+++ b/packages/selector/src/text/describe-text-quote.ts
@@ -25,21 +25,105 @@ import type { RelativeSeeker } from './seeker';
 import { TextSeeker } from './seeker';
 import { textQuoteSelectorMatcher } from '.';
 
+export interface DescribeTextQuoteOptions {
+  /**
+   * Keep prefix and suffix to the minimum that is necessary to disambiguate
+   * the quote. Use only if robustness against text variations is not required.
+   */
+  minimalContext?: boolean;
+
+  /**
+   * Add prefix and suffix to quotes below this length, such that the total of
+   * prefix + exact + suffix is at least this length.
+   */
+  minimumQuoteLength?: number
+
+  /**
+   * When attempting to find a whitespace to make the prefix/suffix start/end
+   * (resp.) at a word boundary, give up after this number of characters.
+   */
+  maxWordLength?: number;
+}
+
+/**
+ * Returns a {@link TextQuoteSelector} that points at the target quote in the
+ * given text.
+ *
+ * @remarks
+ * The selector will contain the *exact* target quote, and in case this quote
+ * appears multiple times in the text, sufficient context around the quote will
+ * be included in the selector’s *prefix* and *suffix* attributes to
+ * disambiguate. By default, more prefix and suffix are included than strictly
+ * required; both in order to be robust against slight modifications, and in an
+ * attempt to not end halfway a word (mainly for the sake of human readability).
+ *
+ * @param target - The range of characters that the selector should describe
+ * @param scope - The text containing the target range; or, more accurately, a
+ * function creating {@link Chunker}s that allow walking through the text.
+ * @param options
+ * @returns the {@link TextQuoteSelector} that describes *target*.
+ */
 export async function describeTextQuote<TChunk extends Chunk<string>>(
   target: ChunkRange<TChunk>,
   scope: () => Chunker<TChunk>,
+  {
+    minimalContext = false,
+    minimumQuoteLength = 0,
+    maxWordLength = 50,
+  }: DescribeTextQuoteOptions = {},
 ): Promise<TextQuoteSelector> {
-  const seeker = new TextSeeker(scope());
+  // Create a seeker to read the target quote and the context around it.
+  // TODO Possible optimisation: as it need not be an AbsoluteSeeker, a
+  // different implementation could provide direct ‘jump’ access in seekToChunk
+  // (the scope’s Chunker would of course also have to support this).
+  const seekerAtTarget = new TextSeeker(scope());
+
+  // Create a second seeker so that we will be able to simultaneously read
+  // characters near both the target and an unintended match, if we find any.
+  const seekerAtUnintendedMatch = new TextSeeker(scope());
 
   // Read the target’s exact text.
-  seeker.seekToChunk(target.startChunk, target.startIndex);
-  const exact = seeker.readToChunk(target.endChunk, target.endIndex);
+  seekerAtTarget.seekToChunk(target.startChunk, target.startIndex);
+  const exact = seekerAtTarget.readToChunk(target.endChunk, target.endIndex);
 
-  // Starting with an empty prefix and suffix, we search for matches. At each unintended match
-  // we encounter, we extend the prefix or suffix just enough to ensure it will no longer match.
+  // Start with an empty prefix and suffix.
   let prefix = '';
   let suffix = '';
 
+  // If the quote is below the given minimum length, add some prefix & suffix.
+  const currentQuoteLength = () => prefix.length + exact.length + suffix.length;
+  if (currentQuoteLength() < minimumQuoteLength) {
+    // Expand the prefix, but only to reach halfway towards the desired length.
+    seekerAtTarget.seekToChunk(target.startChunk, target.startIndex - prefix.length);
+    const length = Math.floor((minimumQuoteLength - currentQuoteLength()) / 2);
+    prefix = seekerAtTarget.read(-length, false, true) + prefix;
+
+    // If needed, expand the suffix to achieve the minimum length.
+    if (currentQuoteLength() < minimumQuoteLength) {
+      seekerAtTarget.seekToChunk(target.endChunk, target.endIndex + suffix.length);
+      const length = minimumQuoteLength - currentQuoteLength();
+      suffix = suffix + seekerAtTarget.read(length, false, true);
+
+      // We might have to expand the prefix again (if at the end of the scope).
+      if (currentQuoteLength() < minimumQuoteLength) {
+        seekerAtTarget.seekToChunk(target.startChunk, target.startIndex - prefix.length);
+        const length = minimumQuoteLength - currentQuoteLength();
+        prefix = seekerAtTarget.read(-length, false, true) + prefix;
+      }
+    }
+  }
+
+  // Expand prefix & suffix to avoid them ending somewhere halfway in a word.
+  if (!minimalContext) {
+    seekerAtTarget.seekToChunk(target.startChunk, target.startIndex - prefix.length);
+    prefix = readUntilWhitespace(seekerAtTarget, maxWordLength, true) + prefix;
+    seekerAtTarget.seekToChunk(target.endChunk, target.endIndex + suffix.length);
+    suffix = suffix + readUntilWhitespace(seekerAtTarget, maxWordLength, false);
+  }
+
+  // Search for matches of the quote using the current prefix and suffix. At
+  // each unintended match we encounter, we extend the prefix or suffix to
+  // ensure it will no longer match.
   while (true) {
     const tentativeSelector: TextQuoteSelector = {
       type: 'TextQuoteSelector',
@@ -48,9 +132,7 @@ export async function describeTextQuote<TChunk extends Chunk<string>>(
       suffix,
     };
 
-    const matches = textQuoteSelectorMatcher(tentativeSelector)(
-      scope(),
-    );
+    const matches = textQuoteSelectorMatcher(tentativeSelector)(scope());
     let nextMatch = await matches.next();
 
     // If this match is the intended one, no need to act.
@@ -72,42 +154,44 @@ export async function describeTextQuote<TChunk extends Chunk<string>>(
     // We’ll have to add more prefix/suffix to disqualify this unintended match.
     const unintendedMatch = nextMatch.value;
 
-    // Create two seekers to simultaneously read characters near both the target
-    // and the unintended match.
-    // Possible optimisation: as these need not be AbsoluteSeekers, a different
-    // implementation could provide direct ‘jump’ access in seekToChunk (the
-    // scope’s Chunker would of course also have to support this).
-    const seeker1 = new TextSeeker(scope());
-    const seeker2 = new TextSeeker(scope());
-
     // Count how many characters we’d need as a prefix to disqualify this match.
-    seeker1.seekToChunk(target.startChunk, target.startIndex - prefix.length);
-    seeker2.seekToChunk(
+    seekerAtTarget.seekToChunk(target.startChunk, target.startIndex - prefix.length);
+    seekerAtUnintendedMatch.seekToChunk(
       unintendedMatch.startChunk,
       unintendedMatch.startIndex - prefix.length,
     );
-    const extraPrefix = readUntilDifferent(seeker1, seeker2, true);
+    let extraPrefix = readUntilDifferent(seekerAtTarget, seekerAtUnintendedMatch, true);
+    if (extraPrefix !== undefined && !minimalContext)
+      extraPrefix = readUntilWhitespace(seekerAtTarget, maxWordLength, true) + extraPrefix;
 
     // Count how many characters we’d need as a suffix to disqualify this match.
-    seeker1.seekToChunk(target.endChunk, target.endIndex + suffix.length);
-    seeker2.seekToChunk(
+    seekerAtTarget.seekToChunk(target.endChunk, target.endIndex + suffix.length);
+    seekerAtUnintendedMatch.seekToChunk(
       unintendedMatch.endChunk,
       unintendedMatch.endIndex + suffix.length,
     );
-    const extraSuffix = readUntilDifferent(seeker1, seeker2, false);
-
-    // Use either the prefix or suffix, whichever is shortest.
-    if (
-      extraPrefix !== undefined &&
-      (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length)
-    ) {
-      prefix = extraPrefix + prefix;
-    } else if (extraSuffix !== undefined) {
-      suffix = suffix + extraSuffix;
+    let extraSuffix = readUntilDifferent(seekerAtTarget, seekerAtUnintendedMatch, false);
+    if (extraSuffix !== undefined && !minimalContext)
+      extraSuffix = extraSuffix + readUntilWhitespace(seekerAtTarget, maxWordLength, false);
+
+    if (minimalContext) {
+      // Use either the prefix or suffix, whichever is shortest.
+      if (
+        extraPrefix !== undefined &&
+        (extraSuffix === undefined || extraPrefix.length <= extraSuffix.length)
+      ) {
+        prefix = extraPrefix + prefix;
+      } else if (extraSuffix !== undefined) {
+        suffix = suffix + extraSuffix;
+      } else {
+        throw new Error(
+          'Target cannot be disambiguated; how could that have happened‽',
+        );
+      }
     } else {
-      throw new Error(
-        'Target cannot be disambiguated; how could that have happened‽',
-      );
+      // For redundancy, expand both prefix and suffix.
+      if (extraPrefix !== undefined) prefix = extraPrefix + prefix;
+      if (extraSuffix !== undefined) suffix = suffix + extraSuffix;
     }
   }
 }
@@ -138,3 +222,33 @@ function readUntilDifferent(
     if (nextCharacter !== comparisonCharacter) return result;
   }
 }
+
+function readUntilWhitespace(
+  seeker: RelativeSeeker,
+  limit: number = Infinity,
+  reverse = false
+): string {
+  let result = '';
+  while (result.length < limit) {
+    let nextCharacter: string;
+    try {
+      nextCharacter = seeker.read(reverse ? -1 : 1);
+    } catch (err) {
+      if (!(err instanceof RangeError)) throw err;
+      break; // End/start of text reached.
+    }
+
+    // Stop if we reached whitespace.
+    if (isWhitespace(nextCharacter)) {
+      seeker.seekBy(reverse ? 1 : -1); // ‘undo’ the last read.
+      break;
+    }
+
+    result = reverse ? nextCharacter + result : result + nextCharacter;
+  }
+  return result;
+}
+
+function isWhitespace(s: string): boolean {
+  return s.match(/^\s+$/) !== null;
+}
diff --git a/packages/selector/src/text/seeker.ts b/packages/selector/src/text/seeker.ts
index 2c1f788..1605d45 100644
--- a/packages/selector/src/text/seeker.ts
+++ b/packages/selector/src/text/seeker.ts
@@ -69,12 +69,14 @@ export interface RelativeSeeker<TData extends Iterable<any> = string> {
    * backwards in the file.
    * @param roundUp - If true, then, after reading the given number of
    * characters, read further until the end (or start) of the current chunk.
+   * @param lessIsFine - If true, and there are not enough characters in the
+   * file, return the result so far instead of throwing an error.
    * @returns The characters passed (in their normal order, even when moving
    * backwards)
-   * @throws RangeError if there are not enough characters in the file. The
-   * pointer is left at the end/start of the file.
+   * @throws RangeError if there are not enough characters in the file (unless
+   * `lessIsFine` is true). The pointer is left at the end/start of the file.
    */
-  read(length?: number, roundUp?: boolean): TData;
+  read(length?: number, roundUp?: boolean, lessIsFine?: boolean): TData;
 }
 
 /**
@@ -195,8 +197,8 @@ export class TextSeeker<TChunk extends Chunk<string>>
     this.seekTo(0);
   }
 
-  read(length: number, roundUp = false): string {
-    return this.readTo(this.position + length, roundUp);
+  read(length: number, roundUp = false, lessIsFine = false): string {
+    return this._readOrSeekTo(true, this.position + length, roundUp, lessIsFine);
   }
 
   readTo(target: number, roundUp = false): string {
@@ -277,12 +279,13 @@ export class TextSeeker<TChunk extends Chunk<string>>
     }
   }
 
-  private _readOrSeekTo(read: true, target: number, roundUp?: boolean): string;
-  private _readOrSeekTo(read: false, target: number, roundUp?: boolean): void;
+  private _readOrSeekTo(read: true, target: number, roundUp?: boolean, lessIsFine?: boolean): string;
+  private _readOrSeekTo(read: false, target: number, roundUp?: boolean, lessIsFine?: boolean): void;
   private _readOrSeekTo(
     read: boolean,
     target: number,
     roundUp = false,
+    lessIsFine = false,
   ): string | void {
     let result = '';
 
@@ -298,7 +301,7 @@ export class TextSeeker<TChunk extends Chunk<string>>
           const [data, nextChunk] = this._readToNextChunk();
           if (read) result += data;
           if (nextChunk === null) {
-            if (this.position === target) break;
+            if (this.position === target || lessIsFine) break;
             else throw new RangeError(E_END);
           }
         } else {
@@ -335,7 +338,10 @@ export class TextSeeker<TChunk extends Chunk<string>>
         } else {
           const [data, previousChunk] = this._readToPreviousChunk();
           if (read) result = data + result;
-          if (previousChunk === null) throw new RangeError(E_END);
+          if (previousChunk === null) {
+            if (lessIsFine) break;
+            else throw new RangeError(E_END);
+          }
         }
       }
     }
diff --git a/web/demo/index.js b/web/demo/index.js
index d513252..a9773a4 100644
--- a/web/demo/index.js
+++ b/web/demo/index.js
@@ -138,7 +138,7 @@ async function onSelectionChange() {
     const selector =
       describeMode === 'TextPosition'
         ? await describeTextPosition(range, scope)
-        : await describeTextQuote(range, scope);
+        : await describeTextQuote(range, scope, { minimumQuoteLength: 10 });
     await anchor(selector);
   }
 }


[incubator-annotator] 03/03: Update tests

Posted by ge...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch more-context
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 50a17d7e2eaea670b28a1518341e83a7f36650ae
Author: Gerben <ge...@treora.com>
AuthorDate: Fri Dec 25 00:26:36 2020 +0100

    Update tests
---
 packages/dom/test/text-quote/describe-cases.ts | 295 +++++++++++++++++++++++--
 packages/dom/test/text-quote/describe.test.ts  |  38 +++-
 2 files changed, 311 insertions(+), 22 deletions(-)

diff --git a/packages/dom/test/text-quote/describe-cases.ts b/packages/dom/test/text-quote/describe-cases.ts
index dec995e..9edb09b 100644
--- a/packages/dom/test/text-quote/describe-cases.ts
+++ b/packages/dom/test/text-quote/describe-cases.ts
@@ -18,17 +18,167 @@
  * under the License.
  */
 
-import type { TextQuoteSelector } from '@annotator/selector';
+import type { TextQuoteSelector, DescribeTextQuoteOptions } from '@annotator/selector';
 import type { RangeInfo } from '../utils';
 
-export const testCases: {
+export interface DescribeTextQuoteTestCases {
   [name: string]: {
     html: string;
     range: RangeInfo;
+    options: DescribeTextQuoteOptions;
     expected: TextQuoteSelector;
   };
-} = {
-  simple: {
+}
+
+export const testCasesWithoutOptions: DescribeTextQuoteTestCases = {
+  'no context': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 12,
+      endContainerXPath: '//b/text()',
+      endOffset: 22,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor amet',
+      prefix: '',
+      suffix: '',
+    },
+  },
+  'use prefix to complete word': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 14,
+      endContainerXPath: '//b/text()',
+      endOffset: 22,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'lor amet',
+      prefix: 'do',
+      suffix: '',
+    },
+  },
+  'use suffix to complete word': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 12,
+      endContainerXPath: '//b/text()',
+      endOffset: 20,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'dolor am',
+      prefix: '',
+      suffix: 'et',
+    },
+  },
+  'add context to disambiguate': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 15,
+      endContainerXPath: '//b/text()',
+      endOffset: 18,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'not',
+      prefix: 'or ',
+      suffix: ' to',
+    },
+  },
+  'only prefix for end of text': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 22,
+      endContainerXPath: '//b/text()',
+      endOffset: 30,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'annotate',
+      prefix: 'to ',
+      suffix: '',
+    },
+  },
+  'only suffix for start of text': {
+    html: '<b>annotate or not to annotate, yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 0,
+      endContainerXPath: '//b/text()',
+      endOffset: 8,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'annotate',
+      prefix: '',
+      suffix: ' or',
+    },
+  },
+  'multiple, overlapping false matches': {
+    html: '<b>a a a a a a a a a a</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 8,
+      endContainerXPath: '//b/text()',
+      endOffset: 13,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'a a a',
+      prefix: 'a a a a ',
+      suffix: ' a a a',
+    },
+  },
+  'empty quote': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 11,
+      endContainerXPath: '//b/text()',
+      endOffset: 11,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'To annotate',
+      suffix: ' or',
+    },
+  },
+  'across elements': {
+    html: '<b>To annotate or <i>not</i> to <u>anno</u>tat</b>e',
+    range: {
+      startContainerXPath: '//u/text()',
+      startOffset: 0,
+      endContainerXPath: '//b/text()[3]',
+      endOffset: 2,
+    },
+    options: {},
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'annota',
+      prefix: 'to ',
+      suffix: 'te',
+    },
+  },
+};
+
+export const testCasesWithMinimalContext: DescribeTextQuoteTestCases = {
+  'no context': {
     html: '<b>lorem ipsum dolor amet yada yada</b>',
     range: {
       startContainerXPath: '//b/text()',
@@ -36,6 +186,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 20,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'dolor am',
@@ -51,6 +204,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 26,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'anno',
@@ -66,6 +222,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 11,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'tate',
@@ -81,6 +240,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 2,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'to',
@@ -96,6 +258,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 30,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'tate',
@@ -111,6 +276,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 7,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: 'aaa',
@@ -126,6 +294,9 @@ export const testCases: {
       endContainerXPath: '//b/text()',
       endOffset: 11,
     },
+    options: {
+      minimalContext: true,
+    },
     expected: {
       type: 'TextQuoteSelector',
       exact: '',
@@ -133,19 +304,117 @@ export const testCases: {
       suffix: ' ',
     },
   },
-  'across elements': {
-    html: '<b>To annotate or <i>not</i> to <u>anno</u>tate</b>',
+};
+
+export const testCasesWithMinimumQuoteLength: DescribeTextQuoteTestCases = {
+  'balance prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
     range: {
-      startContainerXPath: '//u/text()',
-      startOffset: 0,
-      endContainerXPath: '//b/text()[3]',
-      endOffset: 2,
+      startContainerXPath: '//b/text()',
+      startOffset: 12,
+      endContainerXPath: '//b/text()',
+      endOffset: 17,
+    },
+    options: {
+      minimumQuoteLength: 10,
     },
     expected: {
       type: 'TextQuoteSelector',
-      exact: 'annota',
-      prefix: 'to ',
-      suffix: '',
+      exact: 'dolor',
+      prefix: 'ipsum ',
+      suffix: ' amet',
+    },
+  },
+  'use prefix for end of text': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 28,
+      endContainerXPath: '//b/text()',
+      endOffset: 30,
+    },
+    options: {
+      minimumQuoteLength: 10,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'ya',
+      prefix: 'amet yada ',
+      suffix: 'da',
+    },
+  },
+  'use suffix for start of text': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 2,
+      endContainerXPath: '//b/text()',
+      endOffset: 3,
+    },
+    options: {
+      minimumQuoteLength: 10,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'r',
+      prefix: 'lo',
+      suffix: 'em ipsum',
+    },
+  },
+};
+
+export const testCasesWithMaxWordLength: DescribeTextQuoteTestCases = {
+  'too long prefix': {
+    html: '<b>Surely counterantidisintermediationism is too long to quote.</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 28,
+      endContainerXPath: '//b/text()',
+      endOffset: 31,
+    },
+    options: {
+      maxWordLength: 10,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'dia',
+      prefix: 'disinterme',
+      suffix: 'tionism',
+    },
+  },
+  'too long suffix': {
+    html: '<b>Surely counterantidisintermediationism is too long to quote.</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 14,
+      endContainerXPath: '//b/text()',
+      endOffset: 18,
+    },
+    options: {
+      maxWordLength: 10,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'anti',
+      prefix: 'counter',
+      suffix: 'disinterme',
+    },
+  },
+  'default should be 50': {
+    html: '<b>The chromosome is ACATATTACGTTAGATATGACACCCATATAGTTATTTATAAGATGGGACAGATATTAGTTTAAAAA</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 18,
+      endContainerXPath: '//b/text()',
+      endOffset: 27,
+    },
+    options: {
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: 'ACATATTAC',
+      prefix: '',
+      suffix: 'GTTAGATATGACACCCATATAGTTATTTATAAGATGGGACAGATATTAGT',
     },
   },
 };
diff --git a/packages/dom/test/text-quote/describe.test.ts b/packages/dom/test/text-quote/describe.test.ts
index 25bfd0f..147f6d3 100644
--- a/packages/dom/test/text-quote/describe.test.ts
+++ b/packages/dom/test/text-quote/describe.test.ts
@@ -21,29 +21,48 @@
 import { assert } from 'chai';
 import { describeTextQuote } from '../../src/text-quote/describe';
 import { hydrateRange, evaluateXPath } from '../utils';
-import { testCases } from './describe-cases';
+import { DescribeTextQuoteTestCases, testCasesWithMinimumQuoteLength, testCasesWithMaxWordLength } from './describe-cases';
+import { testCasesWithMinimalContext, testCasesWithoutOptions } from './describe-cases';
 import { testCases as testMatchCases } from './match-cases';
 
 const domParser = new window.DOMParser();
 
-describe('describeTextQuote', () => {
-  for (const [name, { html, range, expected }] of Object.entries(testCases)) {
+function runTestCases(testCases: DescribeTextQuoteTestCases) {
+  for (const [name, { html, range, expected, options }] of Object.entries(testCases)) {
     it(`works for case: ${name}`, async () => {
       const doc = domParser.parseFromString(html, 'text/html');
       const scope = doc.createRange();
       scope.selectNodeContents(doc);
-      const result = await describeTextQuote(hydrateRange(range, doc), scope);
+      const result = await describeTextQuote(hydrateRange(range, doc), scope, options);
       assert.deepEqual(result, expected);
     });
   }
+}
+
+describe('describeTextQuote', () => {
+  describe('without options', () => {
+    runTestCases(testCasesWithoutOptions);
+  });
+
+  describe('with minimal context', () => {
+    runTestCases(testCasesWithMinimalContext);
+  });
+
+  describe('with minimum quote length', () => {
+    runTestCases(testCasesWithMinimumQuoteLength);
+  });
+
+  describe('with max word length', () => {
+    runTestCases(testCasesWithMaxWordLength);
+  });
 
   it('works with custom scope', async () => {
-    const { html, range } = testCases['minimal prefix'];
+    const { html, range, options } = testCasesWithMinimalContext['minimal prefix'];
     const doc = domParser.parseFromString(html, 'text/html');
     const scope = doc.createRange();
     scope.setStart(evaluateXPath(doc, '//b/text()'), 15);
     scope.setEnd(evaluateXPath(doc, '//b/text()'), 30); // "not to annotate"
-    const result = await describeTextQuote(hydrateRange(range, doc), scope);
+    const result = await describeTextQuote(hydrateRange(range, doc), scope, options);
     assert.deepEqual(result, {
       type: 'TextQuoteSelector',
       exact: 'anno',
@@ -53,12 +72,12 @@ describe('describeTextQuote', () => {
   });
 
   it('strips part of the range outside the scope', async () => {
-    const { html, range } = testCases['simple'];
+    const { html, range, options } = testCasesWithMinimalContext['no context'];
     const doc = domParser.parseFromString(html, 'text/html');
     const scope = doc.createRange();
     scope.setStart(evaluateXPath(doc, '//b/text()'), 6);
     scope.setEnd(evaluateXPath(doc, '//b/text()'), 17); // "ipsum dolor"
-    const result = await describeTextQuote(hydrateRange(range, doc), scope);
+    const result = await describeTextQuote(hydrateRange(range, doc), scope, options);
     assert.deepEqual(result, {
       type: 'TextQuoteSelector',
       exact: 'dolor',
@@ -68,11 +87,12 @@ describe('describeTextQuote', () => {
   });
 
   it('works if the range equals the scope', async () => {
-    const { html, range, expected } = testCases['simple'];
+    const { html, range, expected, options } = testCasesWithMinimalContext['no context'];
     const doc = domParser.parseFromString(html, 'text/html');
     const result = await describeTextQuote(
       hydrateRange(range, doc),
       hydrateRange(range, doc),
+      options,
     );
     assert.deepEqual(result, expected);
   });