You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/03/04 18:51:58 UTC
svn commit: r1574159 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
Author: rmuir
Date: Tue Mar 4 17:51:57 2014
New Revision: 1574159
URL: http://svn.apache.org/r1574159
Log:
SOLR-2934: increase buffer size for recent dictionaries with large amounts of AF/AM lines before charset
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1574159&r1=1574158&r2=1574159&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Tue Mar 4 17:51:57 2014
@@ -154,9 +154,11 @@ public class Dictionary {
this.ignoreCase = ignoreCase;
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
- // hungarian has thousands of AF before the SET, so a 32k buffer is needed
- BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
- buffered.mark(32768);
+ // TODO: we really need to probably buffer this on disk since so many newer dictionaries
+ // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
+ // their encoding... but for now this large buffer is a workaround
+ BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
+ buffered.mark(65536);
String encoding = getDictionaryEncoding(buffered);
buffered.reset();
CharsetDecoder decoder = getJavaEncoding(encoding);