You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2020/05/04 17:29:23 UTC
[lucene-solr] branch master updated: LUCENE-9191: ensure
LineFileDocs random seeking effort does not seek into the middle of a
multi-byte UTF-8 encoded Unicode character
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 1783c4a LUCENE-9191: ensure LineFileDocs random seeking effort does not seek into the middle of a multi-byte UTF-8 encoded Unicode character
1783c4a is described below
commit 1783c4ad47990d1a88ac3bb44b2da2c2d2abcc79
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Mon May 4 13:29:00 2020 -0400
LUCENE-9191: ensure LineFileDocs random seeking effort does not seek into the middle of a multi-byte UTF-8 encoded Unicode character
---
.../src/java/org/apache/lucene/util/LineFileDocs.java | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
index fa409d1..6d0c4bf 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
@@ -88,7 +88,6 @@ public class LineFileDocs implements Closeable {
// true if the InputStream is not already randomly seek'd after the if/else block below:
boolean needSkip;
- boolean skipFirstLineFragment = false;
long size = 0L, seekTo = 0L;
if (is == null) {
@@ -109,8 +108,15 @@ public class LineFileDocs implements Closeable {
channel.position(seekTo);
is = Channels.newInputStream(channel);
- // we (likely) seeked to the middle of a line:
- skipFirstLineFragment = true;
+ // read until newline char, otherwise we may hit "java.nio.charset.MalformedInputException: Input length = 1"
+ // exception in readline() below, because we seeked part way through a multi-byte (in UTF-8) encoded
+ // unicode character:
+ if (seekTo > 0L) {
+ int b;
+ do {
+ b = is.read();
+ } while (b >= 0 && b != 13 && b != 10);
+ }
needSkip = false;
}
@@ -169,11 +175,6 @@ public class LineFileDocs implements Closeable {
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);
-
- if (skipFirstLineFragment) {
- // read until end of line:
- reader.readLine();
- }
}
public synchronized void reset() throws IOException {