You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/02/12 11:16:33 UTC
svn commit: r909334 - in /lucene/java/branches/lucene_2_9: ./ docs/
src/java/org/apache/lucene/index/ src/site/src/documentation/content/xdocs/
Author: mikemccand
Date: Fri Feb 12 10:16:33 2010
New Revision: 909334
URL: http://svn.apache.org/viewvc?rev=909334&view=rev
Log:
LUCENE-2257: improve max per-segment term count limit from ~2.1B to ~274B
Modified:
lucene/java/branches/lucene_2_9/CHANGES.txt
lucene/java/branches/lucene_2_9/docs/fileformats.html
lucene/java/branches/lucene_2_9/docs/fileformats.pdf
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/SegmentTermEnum.java
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/TermInfosReader.java
lucene/java/branches/lucene_2_9/src/site/src/documentation/content/xdocs/fileformats.xml
Modified: lucene/java/branches/lucene_2_9/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/CHANGES.txt?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/CHANGES.txt Fri Feb 12 10:16:33 2010
@@ -27,6 +27,10 @@
* LUCENE-2158: At high indexing rates, NRT reader could temporarily
lose deletions. (Mike McCandless)
+ * LUCENE-2257: Increase max number of unique terms in one segment to
+ termIndexInterval (default 128) * ~2.1 billion = ~274 billion.
+ (Tom Burton-West via Mike McCandless)
+
API Changes
* LUCENE-2182: DEFAULT_ATTRIBUTE_FACTORY was failing to load
Modified: lucene/java/branches/lucene_2_9/docs/fileformats.html
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/docs/fileformats.html?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/docs/fileformats.html (original)
+++ lucene/java/branches/lucene_2_9/docs/fileformats.html Fri Feb 12 10:16:33 2010
@@ -2547,11 +2547,12 @@
<div class="section">
<p>
When referring to term numbers, Lucene's current
- implementation uses a Java <span class="codefrag">int</span>, which means
- the maximum number of unique terms in any single index
- segment is 2,147,483,648. This is technically not a
- limitation of the index file format, just of Lucene's
- current implementation.
+ implementation uses a Java <span class="codefrag">int</span> to hold the
+ term index, which means the maximum number of unique
+ terms in any single index segment is ~2.1 billion times
+ the term index interval (default 128) = ~274 billion.
+ This is technically not a limitation of the index file
+ format, just of Lucene's current implementation.
</p>
<p>
Similarly, Lucene uses a Java <span class="codefrag">int</span> to refer
Modified: lucene/java/branches/lucene_2_9/docs/fileformats.pdf
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/docs/fileformats.pdf?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/docs/fileformats.pdf (original)
+++ lucene/java/branches/lucene_2_9/docs/fileformats.pdf Fri Feb 12 10:16:33 2010
@@ -667,10 +667,10 @@
>>
endobj
111 0 obj
-<< /Length 1801 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1845 /Filter [ /ASCII85Decode /FlateDecode ]
>>
stream
-Gatm<>E@c%'Ro4HjK13fG\lPL<L(^M)=;1H]aSa"3di^?5e;fsk/+q5qDnr@j5@/#>)aB'A"VfQS_N=$kG,NXdH;*]f';idMD;tC&-V:5k;U.'/tp@-$LTH7^Q,1p?N)YM*ArPAGr!8(0!,&.e*ZcV$F&^;F6Ba=Bj-erc@iWHmO",>o\fW2U#KJ8%,]-0<;4\qcMW)dW(W4r')L'%gC?hm*f`HWnV%dbVZG#-_jUj#'P\O_oZ'&jIQr]%54&Rn#K1_F/7O2B740fG=lc[4Ro3YmceBq7Jn539+CM0NV+[Zoel+/JaFIhGpmP4I%\.R[ZC5-ce+e*fLJ4fAT\@3Qg<OUkg%TUHnGP,C=+#_'.pZ4H",NEsLTNJM(fBH(?KeXfo^Bc8fTuro@"@"D!KU38XjWeP#<ha-pVbZZknj=14l:`^lpdt5U*+CR:Rd:o1k!7K+CnUk"W%n9HHE4gBNuJ8`C+?h=TFac<%Jbl7LF%';\'KIXPn!+dA)t3eiE2m_tS#TS7_cUE%q?XTk5a=alQPQGf:S;O8pJoArGIg8D1^=@"9Xra$]:!Qj.`rXSB_"RPF;J`A:g<&58dgZ<6Y60G+Xe$CU3W``ZHtZ,gKj2a.5`>Gc1u-7W7h"cbCre?E8TM&At<U83)97qa[m#f:qI#Ki-S6k-)'9O'n)hi>8O!?\Zsm0q0)\(>&=We,.h1o`pZE7qOk1rFOCj3(%1n;J"B0i&M[BePZMTY8(,j_P+]_P5I?\J/(K8Z6DjQ_^H`SEO/<c%UDPoC1LgWl98*ZQ1]Y35)SuSu#ZX`icHJ?a0aBM$o43HuJb;OMG;$MM6UhF!JK?Ak#AJZ[A84JW*RL^jn2O7"f!cRE`_?KbQ6?b_61\#p$X"5_iJeHa38glO9^pJ:2R^=5R/3#XE]"\+C,=$RjI@3i)9d>Y">"_S/9Z@,81;O;nec;K.Sh35ViII!^N'h`d&Fa!oTbnAF-)SW:(h*^RW!ZkFM
nf4=9B6Til(FqYcoO?tniTMb>_+7;LbV\NtQAdON4g_="n/dHr.9:I`img>pmXGCoL.5dT;_%U,hKC-2."mArlK#2QtTjDcU^OZijUGX1BS&fM_IqtMc4YW2pchD7rNX.bkiX:CO+B#XE_l!(M&o8,(lC&:VFoWIf2JGO'0#O'r5)PBdF3_]8*R*2#c0Gb-m"MMVl"<Q\)h\G'())cK.N`6a-%YMn%^1Ye$lp!7*.*f0%_Tkd\nG$RNi()\E3I*dq/R_VI+tT[^l@#-T'\c[&GE1s;J9gn?>4r1ri%&PJjQZZUW)F>7c#SGN$u)DUp-W7j\O%/N"t%)+rC6*J5b&i/rg?gf)qQ>0oC<>?l$4CDGTap@Wn1#_`e#([onhm(7l/DLQ].K%&"i%@qog]SY54_K@#]&\CdSEi2a6_A-\ptml007OI2_pGssbFA@6[laqdrV2`R)'V"YX4fS*\3f>,V>4#L01afe)u\m0--fn,sQ)-]FFs,o^4H.qb\/+2kb`6l+lrN;Y4Gh8JS#)F1%g,CT;[NS:Jn>_(cEbKaGProLPjEZK-n>=?sW2b%F]e/'&0&"]e\7%IOAG78TS[D<B6J_lP+`S*fEMr*BXh*5qda[qM%Kd&rep2">N8C+WmlWBfB"=LHK?[dUc'ujV5J\KMr?G`JG;6k+/'3XN&F5`o:W;koaM4seWJhHt0u4mEMOr5Bo>0`iiqk,6HmeKLq4s5I7W&`*]&'g1*_%_OUd!0NMK,:Za1Fo%!78.6-^dpe7i'q*bg[A:V/te//^*dZeQmd3b+3">0,"]'IfX.'6n&~>
+Gatm<>BAg]'RnB3n/LcAW02O@J/Pq745JTKec65F#:5KGZ*oni-#N@,hi;M#d?,Yd>@9L+Eb+E>^2oQXAs9^1ai=_X<r-kr8MS(g'LhkJ&@T?O5+sCo9('qO5>Q0'^&@dco^idiB%[<2Vm-.F?@O`+.)LNo/%e)!DF@c9H;iPf\X6.,28n**]:&i5-/YXjesG6chX''KWD??0@/BgBgPg/i+("o=j<n96JHW2\NCtH4;(--Hcu8[Ho3me+HoMW^Nj.ai3S6_S8e,ZT<EZQ[*"p'`+#gT(P2urT+CM6TM?VH5S#$TYAAL$dI)Tj*"^5eHhXIk!>2NX(P0'ODl8@)CV8'q/0JJ'qT,2dZj@eRhl2)eG5hql,[U'&GA5g]tBkWd"ht*C]:.X,IJD:$Y8*\o"9@*4#=aI`*i2[+QM-hWkQX:B7;*(I#aIE4Y-7]I@hlm_I=Ys^3AIVYCG^U?e;aZ/$Qjg2,^d?/3#mjXsjHcBla:tk5Zc1Wp9=8F?5!XlfGL:G5q0gNL#ZR+WL!h;L)l;MY.r[`SLg6)`G/k_:L[KZnU*72gF,.XiE3`m5#7f1@JS.WIo@efS#26'!,qgh`+lfY'KqWP4WnNG$TdmPA&P1bNXJEpZ\B5_g1KufB?AQgcV/oL3562M1$jDUZ1WCYN83`gae1`4ETh\sT\c[KpZmarCa$tR*@=Y;>DfdgNV-pfL=BX>945KHrX\,OeJpGQrNP66?Z4=7n(UJ]F!FJr.#+@(,[$AS,(A(1%\_5*=<PGsSO*;ZEEi+/(;=T%W.k=A%=B:L<*HkrMe:Q"K!o(Z8M"5fTp5;nc@?lk,O^Rbo0TFml-2!Z@S",OMUdbOmPgq&m$kpp(/s\/TK^i\n,l!8D/"_jCY\W;Bet,j)"#4k9Y,V3e.jkl?8C,=oI"U('Om*36V"Gb`8+F[?+[^`ihp$Zpfdkg:i#!bSJpjj$%L?d`6_#Kp=0B4+LVMVirQss;"@\*>SV2g
op=(.1,XC28DF:0,;X?n:1j]'Wb.SUFW*)2i<_u...@-.LpP>=ki2,O,5a/`l#J;k\mu>9-Z2"<uSKA$uSF8t(>"?/*82.L2pc7M[%J+,^%=DgZ1dXbF=T#uc3+nuNVAp&*oI>jo`AQCR[td7;TqaI0,q(ah.jdG(2!m_[[pb3OpT`#>323Wj*LEW=Faj\2j3$aQbKW?iCt:Ss^4Pb5'-*c,geVDUOr:]L=E^(LLf<IncuH^"q'WVVKi%n&-Fh)tc^hYHL#BWt#t'qHTpGo9I+%V4UGf_^XO^[]a%=lBBh/Y$0oc[M^NB,OJ?GXh<)u6NY6Y$KXhG`+:!W*djgcNqfbL0&mZ^ps9@Co["2nl!!W~>
endstream
endobj
112 0 obj
@@ -1206,80 +1206,80 @@
xref
0 153
0000000000 65535 f
-0000054446 00000 n
-0000054652 00000 n
-0000054745 00000 n
+0000054490 00000 n
+0000054696 00000 n
+0000054789 00000 n
0000000015 00000 n
0000000071 00000 n
0000001333 00000 n
0000001453 00000 n
0000001639 00000 n
-0000054897 00000 n
+0000054941 00000 n
0000001774 00000 n
-0000054960 00000 n
+0000055004 00000 n
0000001909 00000 n
-0000055026 00000 n
+0000055070 00000 n
0000002046 00000 n
-0000055090 00000 n
+0000055134 00000 n
0000002183 00000 n
-0000055156 00000 n
+0000055200 00000 n
0000002320 00000 n
-0000055222 00000 n
+0000055266 00000 n
0000002457 00000 n
-0000055288 00000 n
+0000055332 00000 n
0000002594 00000 n
-0000055352 00000 n
+0000055396 00000 n
0000002731 00000 n
-0000055416 00000 n
+0000055460 00000 n
0000002868 00000 n
-0000055482 00000 n
+0000055526 00000 n
0000003005 00000 n
-0000055547 00000 n
+0000055591 00000 n
0000003142 00000 n
-0000055613 00000 n
+0000055657 00000 n
0000003279 00000 n
-0000055679 00000 n
+0000055723 00000 n
0000003416 00000 n
-0000055744 00000 n
+0000055788 00000 n
0000003553 00000 n
-0000055810 00000 n
+0000055854 00000 n
0000003690 00000 n
-0000055874 00000 n
+0000055918 00000 n
0000003826 00000 n
-0000055938 00000 n
+0000055982 00000 n
0000003963 00000 n
-0000056004 00000 n
+0000056048 00000 n
0000004100 00000 n
-0000056070 00000 n
+0000056114 00000 n
0000004237 00000 n
-0000056135 00000 n
+0000056179 00000 n
0000004373 00000 n
-0000056201 00000 n
+0000056245 00000 n
0000004510 00000 n
-0000056265 00000 n
+0000056309 00000 n
0000004647 00000 n
-0000056331 00000 n
+0000056375 00000 n
0000004783 00000 n
-0000056397 00000 n
+0000056441 00000 n
0000004920 00000 n
0000005673 00000 n
0000005796 00000 n
0000005872 00000 n
-0000056461 00000 n
+0000056505 00000 n
0000006004 00000 n
-0000056527 00000 n
+0000056571 00000 n
0000006137 00000 n
-0000056591 00000 n
+0000056635 00000 n
0000006270 00000 n
-0000056656 00000 n
+0000056700 00000 n
0000006403 00000 n
-0000056721 00000 n
+0000056765 00000 n
0000006536 00000 n
-0000056786 00000 n
+0000056830 00000 n
0000006669 00000 n
-0000056851 00000 n
+0000056895 00000 n
0000006801 00000 n
-0000056916 00000 n
+0000056960 00000 n
0000006934 00000 n
0000009083 00000 n
0000009191 00000 n
@@ -1317,47 +1317,47 @@
0000043725 00000 n
0000045456 00000 n
0000045566 00000 n
-0000047461 00000 n
-0000056981 00000 n
-0000047571 00000 n
-0000047771 00000 n
-0000047989 00000 n
-0000048195 00000 n
-0000048403 00000 n
-0000048571 00000 n
-0000048771 00000 n
-0000048929 00000 n
-0000049104 00000 n
-0000049367 00000 n
-0000049608 00000 n
-0000049737 00000 n
-0000049891 00000 n
-0000050045 00000 n
-0000050189 00000 n
-0000050339 00000 n
-0000050480 00000 n
-0000050715 00000 n
-0000050910 00000 n
-0000051150 00000 n
-0000051332 00000 n
-0000051505 00000 n
-0000051708 00000 n
-0000051896 00000 n
-0000052148 00000 n
-0000052289 00000 n
-0000052498 00000 n
-0000052684 00000 n
-0000052858 00000 n
-0000053103 00000 n
-0000053294 00000 n
-0000053500 00000 n
-0000053666 00000 n
-0000053780 00000 n
-0000053891 00000 n
-0000054003 00000 n
-0000054112 00000 n
-0000054219 00000 n
-0000054336 00000 n
+0000047505 00000 n
+0000057025 00000 n
+0000047615 00000 n
+0000047815 00000 n
+0000048033 00000 n
+0000048239 00000 n
+0000048447 00000 n
+0000048615 00000 n
+0000048815 00000 n
+0000048973 00000 n
+0000049148 00000 n
+0000049411 00000 n
+0000049652 00000 n
+0000049781 00000 n
+0000049935 00000 n
+0000050089 00000 n
+0000050233 00000 n
+0000050383 00000 n
+0000050524 00000 n
+0000050759 00000 n
+0000050954 00000 n
+0000051194 00000 n
+0000051376 00000 n
+0000051549 00000 n
+0000051752 00000 n
+0000051940 00000 n
+0000052192 00000 n
+0000052333 00000 n
+0000052542 00000 n
+0000052728 00000 n
+0000052902 00000 n
+0000053147 00000 n
+0000053338 00000 n
+0000053544 00000 n
+0000053710 00000 n
+0000053824 00000 n
+0000053935 00000 n
+0000054047 00000 n
+0000054156 00000 n
+0000054263 00000 n
+0000054380 00000 n
trailer
<<
/Size 153
@@ -1365,5 +1365,5 @@
/Info 4 0 R
>>
startxref
-57035
+57079
%%EOF
Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/SegmentTermEnum.java?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/SegmentTermEnum.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/SegmentTermEnum.java Fri Feb 12 10:16:33 2010
@@ -108,7 +108,7 @@
return clone;
}
- final void seek(long pointer, int p, Term t, TermInfo ti)
+ final void seek(long pointer, long p, Term t, TermInfo ti)
throws IOException {
input.seek(pointer);
position = p;
Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/TermInfosReader.java?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/TermInfosReader.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/TermInfosReader.java Fri Feb 12 10:16:33 2010
@@ -170,7 +170,7 @@
private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
enumerator.seek(indexPointers[indexOffset],
- (indexOffset * totalIndexInterval) - 1,
+ ((long) indexOffset * totalIndexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]);
}
@@ -241,28 +241,6 @@
return ti;
}
- /** Returns the nth term in the set. */
- final Term get(int position) throws IOException {
- if (size == 0) return null;
-
- SegmentTermEnum enumerator = getThreadResources().termEnum;
- if (enumerator.term() != null &&
- position >= enumerator.position &&
- position < (enumerator.position + totalIndexInterval))
- return scanEnum(enumerator, position); // can avoid seek
-
- seekEnum(enumerator, position/totalIndexInterval); // must seek
- return scanEnum(enumerator, position);
- }
-
- private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException {
- while(enumerator.position < position)
- if (!enumerator.next())
- return null;
-
- return enumerator.term();
- }
-
private void ensureIndexIsRead() {
if (indexTerms == null) {
throw new IllegalStateException("terms index was not loaded when this reader was created");
Modified: lucene/java/branches/lucene_2_9/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/site/src/documentation/content/xdocs/fileformats.xml?rev=909334&r1=909333&r2=909334&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/java/branches/lucene_2_9/src/site/src/documentation/content/xdocs/fileformats.xml Fri Feb 12 10:16:33 2010
@@ -1845,11 +1845,12 @@
<p>
When referring to term numbers, Lucene's current
- implementation uses a Java <code>int</code>, which means
- the maximum number of unique terms in any single index
- segment is 2,147,483,648. This is technically not a
- limitation of the index file format, just of Lucene's
- current implementation.
+ implementation uses a Java <code>int</code> to hold the
+ term index, which means the maximum number of unique
+ terms in any single index segment is ~2.1 billion times
+ the term index interval (default 128) = ~274 billion.
+ This is technically not a limitation of the index file
+ format, just of Lucene's current implementation.
</p>
<p>
Similarly, Lucene uses a Java <code>int</code> to refer