You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/07 13:21:14 UTC
svn commit: r1479862 [2/38] - in /lucene/dev/branches/lucene4258: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/solr/ dev-tools/maven/solr/core/src/java/
dev-tools/maven/solr/solrj/src/java/ dev-to...
Modified: lucene/dev/branches/lucene4258/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/build.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/build.xml (original)
+++ lucene/dev/branches/lucene4258/build.xml Tue May 7 11:20:55 2013
@@ -280,6 +280,8 @@
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it" depends="clean,-env-JAVA7_HOME">
<fail unless="JAVA7_HOME">JAVA7_HOME property or environment variable is not defined.</fail>
+ <property name="svnversion.exe" value="svnversion" />
+ <exec dir="." executable="${svnversion.exe}" outputproperty="fakeReleaseSvnRevision" failifexecutionfails="false"/>
<subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
@@ -301,6 +303,7 @@
<arg value="-B"/>
<arg file="dev-tools/scripts/smokeTestRelease.py"/>
<arg value="${fakeRelease.uri}"/>
+ <arg value="${fakeReleaseSvnRevision}"/>
<arg value="${fakeReleaseVersion}"/>
<arg file="${fakeReleaseTmp}"/>
<arg value="false"/>
Modified: lucene/dev/branches/lucene4258/dev-tools/idea/.idea/libraries/JUnit.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/idea/.idea/libraries/JUnit.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/idea/.idea/libraries/JUnit.xml (original)
+++ lucene/dev/branches/lucene4258/dev-tools/idea/.idea/libraries/JUnit.xml Tue May 7 11:20:55 2013
@@ -2,7 +2,7 @@
<library name="JUnit">
<CLASSES>
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
- <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.0.8.jar!/" />
+ <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.0.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
Modified: lucene/dev/branches/lucene4258/dev-tools/idea/.idea/misc.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/idea/.idea/misc.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/idea/.idea/misc.xml (original)
+++ lucene/dev/branches/lucene4258/dev-tools/idea/.idea/misc.xml Tue May 7 11:20:55 2013
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
- <component name="ProjectRootManager" version="2" languageLevel="JDK_1_6"/>
+ <component name="ProjectRootManager" version="2" languageLevel="JDK_1_7"/>
</project>
Modified: lucene/dev/branches/lucene4258/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/maven/pom.xml.template?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/branches/lucene4258/dev-tools/maven/pom.xml.template Tue May 7 11:20:55 2013
@@ -44,8 +44,9 @@
<base.specification.version>5.0.0</base.specification.version>
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
<java.compat.version>1.7</java.compat.version>
- <jetty.version>8.1.8.v20121106</jetty.version>
- <slf4j.version>1.6.4</slf4j.version>
+ <jetty.version>8.1.10.v20130312</jetty.version>
+ <slf4j.version>1.6.6</slf4j.version>
+ <log4j.version>1.2.16</log4j.version>
<tika.version>1.3</tika.version>
<httpcomponents.version>4.2.3</httpcomponents.version>
<commons-io.version>2.1</commons-io.version>
@@ -148,7 +149,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <version>13.0.1</version>
+ <version>14.0.1</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
@@ -413,20 +414,25 @@
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
+ <artifactId>jul-to-slf4j</artifactId>
<version>${slf4j.version}</version>
- </dependency>
+ </dependency>
<dependency>
<groupId>org.slf4j</groupId>
- <artifactId>slf4j-jdk14</artifactId>
+ <artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
- </dependency>
+ </dependency>
<dependency>
<groupId>org.slf4j</groupId>
- <artifactId>slf4j-simple</artifactId>
+ <artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>${log4j.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
<version>3.0.0.v201112011016</version>
@@ -455,7 +461,7 @@
<dependency>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>randomizedtesting-runner</artifactId>
- <version>2.0.8</version>
+ <version>2.0.9</version>
</dependency>
</dependencies>
</dependencyManagement>
@@ -481,10 +487,10 @@
<plugin>
<groupId>de.thetaphi</groupId>
<artifactId>forbiddenapis</artifactId>
- <version>1.2</version>
+ <version>1.3</version>
<configuration>
<!--
- This is the default setting, we don't support Java 8 at the moment.
+ This is the default setting, we don't support too new Java versions.
The checker simply passes by default and only prints a warning.
-->
<failOnUnsupportedJava>false</failOnUnsupportedJava>
Modified: lucene/dev/branches/lucene4258/dev-tools/maven/solr/core/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/maven/solr/core/src/java/pom.xml.template?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/maven/solr/core/src/java/pom.xml.template (original)
+++ lucene/dev/branches/lucene4258/dev-tools/maven/solr/core/src/java/pom.xml.template Tue May 7 11:20:55 2013
@@ -71,6 +71,7 @@
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-morfologik</artifactId>
<version>${project.version}</version>
+ <optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
@@ -146,14 +147,6 @@
<version>${restlet.version}</version>
</dependency>
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jcl-over-slf4j</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-jdk14</artifactId>
- </dependency>
- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
Modified: lucene/dev/branches/lucene4258/dev-tools/maven/solr/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/maven/solr/pom.xml.template?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/maven/solr/pom.xml.template (original)
+++ lucene/dev/branches/lucene4258/dev-tools/maven/solr/pom.xml.template Tue May 7 11:20:55 2013
@@ -86,12 +86,28 @@
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
+ <optional>true</optional>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
- <artifactId>slf4j-jdk14</artifactId>
- <scope>test</scope>
+ <artifactId>slf4j-log4j12</artifactId>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <optional>true</optional>
</dependency>
</dependencies>
<build>
Modified: lucene/dev/branches/lucene4258/dev-tools/maven/solr/solrj/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/maven/solr/solrj/src/java/pom.xml.template?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/maven/solr/solrj/src/java/pom.xml.template (original)
+++ lucene/dev/branches/lucene4258/dev-tools/maven/solr/solrj/src/java/pom.xml.template Tue May 7 11:20:55 2013
@@ -77,11 +77,6 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jcl-over-slf4j</artifactId>
- <optional>true</optional>
- </dependency>
- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
Modified: lucene/dev/branches/lucene4258/dev-tools/maven/solr/webapp/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/maven/solr/webapp/pom.xml.template?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/maven/solr/webapp/pom.xml.template (original)
+++ lucene/dev/branches/lucene4258/dev-tools/maven/solr/webapp/pom.xml.template Tue May 7 11:20:55 2013
@@ -57,15 +57,6 @@
<artifactId>javax.servlet</artifactId>
<scope>provided</scope>
</dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-jdk14</artifactId>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>jcl-over-slf4j</artifactId>
- </dependency>
</dependencies>
<build>
<directory>${build-directory}</directory>
Modified: lucene/dev/branches/lucene4258/dev-tools/scripts/buildAndPushRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/scripts/buildAndPushRelease.py?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/scripts/buildAndPushRelease.py (original)
+++ lucene/dev/branches/lucene4258/dev-tools/scripts/buildAndPushRelease.py Tue May 7 11:20:55 2013
@@ -15,9 +15,11 @@
import datetime
import re
+import time
import shutil
import os
import sys
+import subprocess
# Usage: python3.2 -u buildAndPushRelease.py [-sign gpgKey(eg: 6E68DA61)] [-prepare] [-push userName] [-pushLocal dirName] [-smoke tmpDir] /path/to/checkout version(eg: 3.4.0) rcNum(eg: 0)
#
@@ -43,6 +45,25 @@ def run(command):
print(msg)
raise RuntimeError(msg)
+def runAndSendGPGPassword(command, password):
+ p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
+ f = open(LOG, 'ab')
+ while True:
+ line = p.stdout.readline()
+ if len(line) == 0:
+ break
+ f.write(line)
+ if line.find(b'Enter GPG keystore password:') != -1:
+ time.sleep(1.0)
+ p.stdin.write((password + '\n').encode('UTF-8'))
+ p.stdin.write('\n'.encode('UTF-8'))
+
+ result = p.poll()
+ if result != 0:
+ msg = ' FAILED: %s [see log %s]' % (command, LOG)
+ print(msg)
+ raise RuntimeError(msg)
+
def scrubCheckout():
# removes any files not checked into svn
@@ -68,7 +89,7 @@ def getSVNRev():
return rev
-def prepare(root, version, gpgKeyID, doTest):
+def prepare(root, version, gpgKeyID, gpgPassword, doTest):
print()
print('Prepare release...')
if os.path.exists(LOG):
@@ -98,7 +119,11 @@ def prepare(root, version, gpgKeyID, doT
cmd += ' -Dgpg.key=%s prepare-release' % gpgKeyID
else:
cmd += ' prepare-release-no-sign'
- run(cmd)
+
+ if gpgPassword is not None:
+ runAndSendGPGPassword(cmd, gpgPassword)
+ else:
+ run(cmd)
print(' solr prepare-release')
os.chdir('../solr')
@@ -107,7 +132,12 @@ def prepare(root, version, gpgKeyID, doT
cmd += ' -Dgpg.key=%s prepare-release' % gpgKeyID
else:
cmd += ' prepare-release-no-sign'
- run(cmd)
+
+ if gpgPassword is not None:
+ runAndSendGPGPassword(cmd, gpgPassword)
+ else:
+ run(cmd)
+
print(' done!')
print()
return rev
@@ -115,7 +145,7 @@ def prepare(root, version, gpgKeyID, doT
def push(version, root, rev, rcNum, username):
print('Push...')
dir = 'lucene-solr-%s-RC%d-rev%s' % (version, rcNum, rev)
- s = os.popen('ssh %s@people.apache.org "ls -ld public_html/staging_area/%s" 2>&1' % (username, dir)).read().decode('UTF-8')
+ s = os.popen('ssh %s@people.apache.org "ls -ld public_html/staging_area/%s" 2>&1' % (username, dir)).read()
if 'no such file or directory' not in s.lower():
print(' Remove old dir...')
run('ssh %s@people.apache.org "chmod -R u+rwX public_html/staging_area/%s; rm -rf public_html/staging_area/%s"' %
@@ -253,12 +283,16 @@ def main():
gpgKeyID = sys.argv[idx+1]
del sys.argv[idx:idx+2]
+ sys.stdout.flush()
+ import getpass
+ gpgPassword = getpass.getpass('Enter GPG keystore password: ')
+
root = os.path.abspath(sys.argv[1])
version = sys.argv[2]
rcNum = int(sys.argv[3])
if doPrepare:
- rev = prepare(root, version, gpgKeyID, smokeTmpDir is None)
+ rev = prepare(root, version, gpgKeyID, gpgPassword, smokeTmpDir is None)
else:
os.chdir(root)
rev = open('rev.txt', encoding='UTF-8').read()
@@ -276,7 +310,7 @@ def main():
if smokeTmpDir is not None:
import smokeTestRelease
smokeTestRelease.DEBUG = False
- smokeTestRelease.smokeTest(url, version, smokeTmpDir, gpgKeyID is not None)
+ smokeTestRelease.smokeTest(url, rev, version, smokeTmpDir, gpgKeyID is not None)
if __name__ == '__main__':
try:
Modified: lucene/dev/branches/lucene4258/dev-tools/scripts/poll-mirrors.pl
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/scripts/poll-mirrors.pl?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/scripts/poll-mirrors.pl (original)
+++ lucene/dev/branches/lucene4258/dev-tools/scripts/poll-mirrors.pl Tue May 7 11:20:55 2013
@@ -51,7 +51,7 @@ my $previously_selected = select STDOUT;
$| = 1; # turn off buffering of STDOUT, so status is printed immediately
select $previously_selected;
-my $apache_url_suffix = "lucene/java/$version/lucene-$version.zip.asc";
+my $apache_url_suffix = "lucene/java/$version/KEYS";
my $apache_mirrors_list_url = "http://www.apache.org/mirrors/";
my $maven_url = "http://repo1.maven.org/maven2/org/apache/lucene/lucene-core/$version/lucene-core-$version.pom.asc";
Modified: lucene/dev/branches/lucene4258/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/dev-tools/scripts/smokeTestRelease.py?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/lucene4258/dev-tools/scripts/smokeTestRelease.py Tue May 7 11:20:55 2013
@@ -14,11 +14,14 @@
# limitations under the License.
import os
+import zipfile
import codecs
import tarfile
import zipfile
import threading
import traceback
+import datetime
+import time
import subprocess
import signal
import shutil
@@ -71,11 +74,10 @@ def verifyJavaVersion(version):
# http://s.apache.org/lusolr32rc2
env = os.environ
-
try:
JAVA7_HOME = env['JAVA7_HOME']
except KeyError:
- JAVA7_HOME = '/usr/local/jdk1.7.0_01'
+ raise RuntimeError('please set JAVA7_HOME in the env before running smokeTestRelease')
print('JAVA7_HOME is %s' % JAVA7_HOME)
verifyJavaVersion('1.7')
@@ -126,31 +128,37 @@ def getHREFs(urlString):
return links
def download(name, urlString, tmpDir, quiet=False):
+ startTime = time.time()
fileName = '%s/%s' % (tmpDir, name)
if not FORCE_CLEAN and os.path.exists(fileName):
if not quiet and fileName.find('.asc') == -1:
print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
return
- fIn = urllib.request.urlopen(urlString)
- fOut = open(fileName, 'wb')
- success = False
try:
- while True:
- s = fIn.read(65536)
- if s == b'':
- break
- fOut.write(s)
- fOut.close()
- fIn.close()
- success = True
- finally:
- fIn.close()
- fOut.close()
- if not success:
- os.remove(fileName)
- if not quiet and fileName.find('.asc') == -1:
- print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
-
+ fIn = urllib.request.urlopen(urlString)
+ fOut = open(fileName, 'wb')
+ success = False
+ try:
+ while True:
+ s = fIn.read(65536)
+ if s == b'':
+ break
+ fOut.write(s)
+ fOut.close()
+ fIn.close()
+ success = True
+ finally:
+ fIn.close()
+ fOut.close()
+ if not success:
+ os.remove(fileName)
+ if not quiet and fileName.find('.asc') == -1:
+ t = time.time()-startTime
+ sizeMB = os.path.getsize(fileName)/1024./1024.
+ print(' %.1f MB in %.2f sec (%.1f MB/sec)' % (sizeMB, t, sizeMB/t))
+ except Exception as e:
+ raise RuntimeError('failed to download url "%s"' % urlString) from e
+
def load(urlString):
return urllib.request.urlopen(urlString).read().decode('utf-8')
@@ -167,7 +175,7 @@ MANIFEST_FILE_NAME = 'META-INF/MANIFEST.
NOTICE_FILE_NAME = 'META-INF/NOTICE.txt'
LICENSE_FILE_NAME = 'META-INF/LICENSE.txt'
-def checkJARMetaData(desc, jarFile, version):
+def checkJARMetaData(desc, jarFile, svnRevision, version):
with zipfile.ZipFile(jarFile, 'r') as z:
for name in (MANIFEST_FILE_NAME, NOTICE_FILE_NAME, LICENSE_FILE_NAME):
@@ -182,15 +190,21 @@ def checkJARMetaData(desc, jarFile, vers
s = decodeUTF8(z.read(MANIFEST_FILE_NAME))
for verify in (
+ 'Specification-Vendor: The Apache Software Foundation',
'Implementation-Vendor: The Apache Software Foundation',
# Make sure 1.7 compiler was used to build release bits:
'X-Compile-Source-JDK: 1.7',
+ # Make sure 1.8 ant was used to build release bits: (this will match 1.8+)
+ 'Ant-Version: Apache Ant 1.8',
# Make sure .class files are 1.7 format:
'X-Compile-Target-JDK: 1.7',
- # Make sure this matches the version we think we are releasing:
- 'Specification-Version: %s' % version):
+ # Make sure this matches the version and svn revision we think we are releasing:
+ 'Implementation-Version: %s %s ' % (version, svnRevision),
+ 'Specification-Version: %s' % version,
+ # Make sure the release was compiled with 1.7:
+ 'Created-By: 1.7'):
if s.find(verify) == -1:
- raise RuntimeError('%s is missing "%s" inside its META-INF/MANIFES.MF' % \
+ raise RuntimeError('%s is missing "%s" inside its META-INF/MANIFEST.MF' % \
(desc, verify))
notice = decodeUTF8(z.read(NOTICE_FILE_NAME))
@@ -230,7 +244,7 @@ def checkJARMetaData(desc, jarFile, vers
def normSlashes(path):
return path.replace(os.sep, '/')
-def checkAllJARs(topDir, project, version):
+def checkAllJARs(topDir, project, svnRevision, version):
print(' verify JAR/WAR metadata...')
for root, dirs, files in os.walk(topDir):
@@ -249,10 +263,10 @@ def checkAllJARs(topDir, project, versio
fullPath = '%s/%s' % (root, file)
noJavaPackageClasses('JAR file "%s"' % fullPath, fullPath)
if file.lower().find('lucene') != -1 or file.lower().find('solr') != -1:
- checkJARMetaData('JAR file "%s"' % fullPath, fullPath, version)
+ checkJARMetaData('JAR file "%s"' % fullPath, fullPath, svnRevision, version)
-def checkSolrWAR(warFileName, version):
+def checkSolrWAR(warFileName, svnRevision, version):
"""
Crawls for JARs inside the WAR and ensures there are no classes
@@ -261,7 +275,7 @@ def checkSolrWAR(warFileName, version):
print(' make sure WAR file has no javax.* or java.* classes...')
- checkJARMetaData(warFileName, warFileName, version)
+ checkJARMetaData(warFileName, warFileName, svnRevision, version)
with zipfile.ZipFile(warFileName, 'r') as z:
for name in z.namelist():
@@ -271,6 +285,7 @@ def checkSolrWAR(warFileName, version):
if name.lower().find('lucene') != -1 or name.lower().find('solr') != -1:
checkJARMetaData('JAR file %s inside WAR file %s' % (name, warFileName),
io.BytesIO(z.read(name)),
+ svnRevision,
version)
def checkSigs(project, urlString, version, tmpDir, isSigned):
@@ -414,7 +429,9 @@ def testChangesText(dir, version, projec
fullPath = '%s/CHANGES.txt' % root
#print 'CHECK %s' % fullPath
checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
-
+
+reChangesSectionHREF = re.compile('<a id="(.*?)".*?>(.*?)</a>', re.IGNORECASE)
+
def checkChangesContent(s, version, name, project, isHTML):
if isHTML and s.find('Release %s' % version) == -1:
@@ -434,6 +451,23 @@ def checkChangesContent(s, version, name
if name.find('/benchmark/') == -1:
raise RuntimeError('did not see "%s" in %s' % (sub, name))
+ if isHTML:
+ # Make sure a section only appears once under each release:
+ seenIDs = set()
+ seenText = set()
+
+ release = None
+ for id, text in reChangesSectionHREF.findall(s):
+ if text.lower().startswith('release '):
+ release = text[8:].strip()
+ seenText.clear()
+ if id in seenIDs:
+ raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
+ seenIDs.add(id)
+ if text in seenText:
+ raise RuntimeError('%s has duplicate section "%s" under release "%s"' % (name, text, release))
+ seenText.add(text)
+
reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s])*' + r"|'(?:\\'|[^'])*')" \
+ r'|(/(?:\\.|[^"\'\s])*)' \
+ r'|("/(?:\\.|[^"])*")' \
@@ -533,7 +567,7 @@ def getDirEntries(urlString):
if text == 'Parent Directory' or text == '..':
return links[(i+1):]
-def unpackAndVerify(project, tmpDir, artifact, version):
+def unpackAndVerify(project, tmpDir, artifact, svnRevision, version):
destDir = '%s/unpack' % tmpDir
if os.path.exists(destDir):
shutil.rmtree(destDir)
@@ -553,14 +587,14 @@ def unpackAndVerify(project, tmpDir, art
raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
unpackPath = '%s/%s' % (destDir, expected)
- verifyUnpacked(project, artifact, unpackPath, version, tmpDir)
+ verifyUnpacked(project, artifact, unpackPath, svnRevision, version, tmpDir)
LUCENE_NOTICE = None
LUCENE_LICENSE = None
SOLR_NOTICE = None
SOLR_LICENSE = None
-def verifyUnpacked(project, artifact, unpackPath, version, tmpDir):
+def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, tmpDir):
global LUCENE_NOTICE
global LUCENE_LICENSE
global SOLR_NOTICE
@@ -678,13 +712,13 @@ def verifyUnpacked(project, artifact, un
else:
- checkAllJARs(os.getcwd(), project, version)
+ checkAllJARs(os.getcwd(), project, svnRevision, version)
if project == 'lucene':
testDemo(isSrc, version, '1.7')
else:
- checkSolrWAR('%s/example/webapps/solr.war' % unpackPath, version)
+ checkSolrWAR('%s/example/webapps/solr.war' % unpackPath, svnRevision, version)
print(' copying unpacked distribution for Java 7 ...')
java7UnpackPath = '%s-java7' %unpackPath
@@ -722,7 +756,7 @@ def readSolrOutput(p, startupEvent, fail
f = open(logFile, 'wb')
try:
while True:
- line = p.stderr.readline()
+ line = p.stdout.readline()
if len(line) == 0:
p.poll()
if not startupEvent.isSet():
@@ -756,7 +790,7 @@ def testSolrExample(unpackPath, javaPath
env.update(os.environ)
env['JAVA_HOME'] = javaPath
env['PATH'] = '%s/bin:%s' % (javaPath, env['PATH'])
- server = subprocess.Popen(['java', '-jar', 'start.jar'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, env=env)
+ server = subprocess.Popen(['java', '-jar', 'start.jar'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, env=env)
startupEvent = threading.Event()
failureEvent = threading.Event()
@@ -764,19 +798,21 @@ def testSolrExample(unpackPath, javaPath
serverThread.setDaemon(True)
serverThread.start()
- # Make sure Solr finishes startup:
- if not startupEvent.wait(1800):
- raise RuntimeError('startup took more than 30 minutes')
- if failureEvent.isSet():
- logFile = os.path.abspath(logFile)
- print
- print('Startup failed; see log %s' % logFile)
- printFileContents(logFile)
- raise RuntimeError('failure on startup; see log %s' % logFile)
-
- print(' startup done')
-
try:
+
+ # Make sure Solr finishes startup:
+ if not startupEvent.wait(1800):
+ raise RuntimeError('startup took more than 30 minutes')
+
+ if failureEvent.isSet():
+ logFile = os.path.abspath(logFile)
+ print
+ print('Startup failed; see log %s' % logFile)
+ printFileContents(logFile)
+ raise RuntimeError('failure on startup; see log %s' % logFile)
+
+ print(' startup done')
+
print(' test utf8...')
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
print(' index example docs...')
@@ -858,7 +894,7 @@ def testDemo(isSrc, version, jdk):
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
print(' got %d hits for query "lucene"' % numHits)
-def checkMaven(baseURL, tmpDir, version, isSigned):
+def checkMaven(baseURL, tmpDir, svnRevision, version, isSigned):
# Locate the release branch in subversion
m = re.match('(\d+)\.(\d+)', version) # Get Major.minor version components
releaseBranchText = 'lucene_solr_%s_%s/' % (m.group(1), m.group(2))
@@ -907,8 +943,8 @@ def checkMaven(baseURL, tmpDir, version,
print(' verify that Maven artifacts are same as in the binary distribution...')
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
- checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, 'lucene', version)
- checkAllJARs('%s/maven/org/apache/solr' % tmpDir, 'solr', version)
+ checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, 'lucene', svnRevision, version)
+ checkAllJARs('%s/maven/org/apache/solr' % tmpDir, 'solr', svnRevision, version)
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
distributionFiles = defaultdict()
@@ -938,7 +974,7 @@ def getDistributionsForMavenChecks(tmpDi
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
for project in ('lucene', 'solr'):
for artifact in artifacts[project]:
- if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
+ if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps:
javadocJar = artifact[:-4] + '-javadoc.jar'
if javadocJar not in artifacts[project]:
raise RuntimeError('missing: %s' % javadocJar)
@@ -951,7 +987,7 @@ def checkIdenticalNonMavenizedDeps(distr
distFilenames = dict()
for file in distributionFiles[project]:
distFilenames[os.path.basename(file)] = file
- for dep in list(nonMavenizedDeps.keys()):
+ for dep in nonMavenizedDeps.keys():
if ('/%s/' % project) in dep:
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
if not depOrigFilename in distFilenames:
@@ -961,6 +997,15 @@ def checkIdenticalNonMavenizedDeps(distr
raise RuntimeError('Deployed non-mavenized dep %s differs from distribution dep %s'
% (dep, distFilenames[depOrigFilename]))
+def getZipFileEntries(fileName):
+ entries = []
+ with zipfile.ZipFile(fileName) as zf:
+ for zi in zf.infolist():
+ entries.append(zi.filename)
+ # Sort by name:
+ entries.sort()
+ return entries
+
def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version):
reJarWar = re.compile(r'%s\.[wj]ar$' % version) # exclude *-javadoc.jar and *-sources.jar
for project in ('lucene', 'solr'):
@@ -970,11 +1015,20 @@ def checkIdenticalMavenArtifacts(distrib
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
- if artifact not in list(nonMavenizedDeps.keys()):
+ entries = getZipFileEntries(artifact)
+ if artifact not in nonMavenizedDeps:
artifactFilename = os.path.basename(artifact)
- if artifactFilename not in list(distFilenames.keys()):
+ if artifactFilename not in distFilenames:
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
% (artifact, project))
+ else:
+ binaryEntries = getZipFileEntries(distFilenames[artifactFilename])
+ if binaryEntries != entries:
+ raise RuntimeError('Maven artifact %s has different contents than binary distribution\n maven:\n%s\n binary:\n%s\n' % \
+ (artifactFilename,
+ '\n'.join(entries),
+ '\n'.join(binaryEntries)))
+
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
# identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False)
# if not identical:
@@ -1245,27 +1299,32 @@ reAllowedVersion = re.compile(r'^\d+\.\d
def main():
- if len(sys.argv) < 4:
+ if len(sys.argv) < 5:
+ print()
+ print('Usage python -u %s BaseURL SvnRevision version tmpDir' % sys.argv[0])
print()
- print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
+ print(' example: python3.2 -u dev-tools/scripts/smokeTestRelease.py http://people.apache.org/~whoever/staging_area/lucene-solr-4.3.0-RC1-rev1469340 1469340 4.3.0 /path/to/a/tmp/dir')
print()
sys.exit(1)
baseURL = sys.argv[1]
- version = sys.argv[2]
+ svnRevision = sys.argv[2]
+ version = sys.argv[3]
if not reAllowedVersion.match(version):
raise RuntimeError('version "%s" does not match format X.Y.Z[-ALPHA|-BETA]' % version)
- tmpDir = os.path.abspath(sys.argv[3])
+ tmpDir = os.path.abspath(sys.argv[4])
isSigned = True
- if len(sys.argv) == 5:
- isSigned = (sys.argv[4] == "True")
+ if len(sys.argv) == 6:
+ isSigned = (sys.argv[5] == "True")
- smokeTest(baseURL, version, tmpDir, isSigned)
+ smokeTest(baseURL, svnRevision, version, tmpDir, isSigned)
-def smokeTest(baseURL, version, tmpDir, isSigned):
+def smokeTest(baseURL, svnRevision, version, tmpDir, isSigned):
+ startTime = datetime.datetime.now()
+
if FORCE_CLEAN:
if os.path.exists(tmpDir):
raise RuntimeError('temp dir %s exists; please remove first' % tmpDir)
@@ -1297,21 +1356,21 @@ def smokeTest(baseURL, version, tmpDir,
print('Test Lucene...')
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
- unpackAndVerify('lucene', tmpDir, artifact, version)
- unpackAndVerify('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
+ unpackAndVerify('lucene', tmpDir, artifact, svnRevision, version)
+ unpackAndVerify('lucene', tmpDir, 'lucene-%s-src.tgz' % version, svnRevision, version)
print()
print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
- unpackAndVerify('solr', tmpDir, artifact, version)
- unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, version)
+ unpackAndVerify('solr', tmpDir, artifact, svnRevision, version)
+ unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, svnRevision, version)
print()
print('Test Maven artifacts for Lucene and Solr...')
- checkMaven(baseURL, tmpDir, version, isSigned)
+ checkMaven(baseURL, tmpDir, svnRevision, version, isSigned)
- print('\nSUCCESS!\n')
+ print('\nSUCCESS! [%s]\n' % (datetime.datetime.now() - startTime))
if __name__ == '__main__':
print('NOTE: output encoding is %s' % sys.stdout.encoding)
Modified: lucene/dev/branches/lucene4258/extra-targets.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/extra-targets.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/extra-targets.xml (original)
+++ lucene/dev/branches/lucene4258/extra-targets.xml Tue May 7 11:20:55 2013
@@ -89,7 +89,7 @@
statusClient.doStatus(basedir, SVNRevision.WORKING, SVNDepth.fromRecurse(true), false, true, false, false, {
status ->
SVNStatusType nodeStatus = status.getNodeStatus();
- if (nodeStatus == SVNStatusType.STATUS_UNVERSIONED) {
+ if (nodeStatus == SVNStatusType.STATUS_UNVERSIONED || nodeStatus == SVNStatusType.STATUS_MISSING) {
unversioned.add(convertRelative(status.getFile()));
} else if (status.getKind() == SVNNodeKind.FILE && nodeStatus != SVNStatusType.STATUS_DELETED) {
missingProps.add(convertRelative(status.getFile()));
Modified: lucene/dev/branches/lucene4258/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/CHANGES.txt?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4258/lucene/CHANGES.txt Tue May 7 11:20:55 2013
@@ -19,11 +19,133 @@ Changes in backwards compatibility polic
(Nikola Tanković, Uwe Schindler, Chris Male, Mike McCandless,
Robert Muir)
+* LUCENE-4924: DocIdSetIterator.docID() must now return -1 when the iterator is
+ not positioned. This change affects all classes that inherit from
+ DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)
+
New Features
* LUCENE-4747: Move to Java 7 as minimum Java version.
(Robert Muir, Uwe Schindler)
+Optimizations
+
+* LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
+ for NIOFSDirectory and MMapDirectory. This allows to delete open files
+ on Windows if NIOFSDirectory is used, mmapped files are still locked.
+ (Michael Poindexter, Robert Muir, Uwe Schindler)
+
+======================= Lucene 4.4.0 =======================
+
+Changes in backwards compatibility policy
+
+* LUCENE-4955: NGramTokenFilter now emits all n-grams for the same token at the
+ same position and preserves the position length and the offsets of the
+ original token. (Simon Willnauer, Adrien Grand)
+
+* LUCENE-4955: NGramTokenizer now emits n-grams in a different order
+ (a, ab, b, bc, c) instead of (a, b, c, ab, bc) and doesn't trim trailing
+ whitespaces. (Adrien Grand)
+
+* LUCENE-4967: NRTManager is replaced by
+ ControlledRealTimeReopenThread, for controlling which requests must
+ see which indexing changes, so that it can work with any
+ ReferenceManager (Mike McCandless)
+
+* LUCENE-4973: SnapshotDeletionPolicy no longer requires a unique
+ String id (Mike McCandless, Shai Erera)
+
+* LUCENE-4946: The internal sorting API (SorterTemplate, now Sorter) has been
+ completely refactored to allow for a better implementation of TimSort.
+ (Adrien Grand, Uwe Schindler, Dawid Weiss)
+
+* LUCENE-4963: Some TokenFilter options that generate broken TokenStreams have
+ been deprecated: updateOffsets=true on TrimFilter and
+ enablePositionIncrements=false on all classes that inherit from
+ FilteringTokenFilter: JapanesePartOfSpeechStopFilter, KeepWordFilter,
+ LengthFilter, StopFilter and TypeTokenFilter. (Adrien Grand)
+
+* LUCENE-4963: In order not to take position increments into account in
+ suggesters, you now need to call setPreservePositionIncrements(false) instead
+ of configuring the token filters to not increment positions. (Adrien Grand)
+
+Bug Fixes
+
+* LUCENE-4935: CustomScoreQuery wrongly applied its query boost twice
+ (boost^2). (Robert Muir)
+
+* LUCENE-4948: Fixed ArrayIndexOutOfBoundsException in PostingsHighlighter
+ if you had a 64-bit JVM without compressed OOPS: IBM J9, or Oracle with
+ large heap/explicitly disabled. (Mike McCandless, Uwe Schindler, Robert Muir)
+
+* LUCENE-4955: NGramTokenizer now supports inputs larger than 1024 chars.
+ (Adrien Grand)
+
+* LUCENE-4953: Fixed ParallelCompositeReader to inform ReaderClosedListeners of
+ its synthetic subreaders. FieldCaches keyed on the atomic childs will be purged
+ earlier and FC insanity prevented. In addition, ParallelCompositeReader's
+ toString() was changed to better reflect the reader structure.
+ (Mike McCandless, Uwe Schindler)
+
+* LUCENE-4959: Fix incorrect return value in
+ SimpleNaiveBayesClassifier.assignClass. (Alexey Kutin via Adrien Grand)
+
+* LUCENE-4968: Fixed ToParentBlockJoinQuery/Collector: correctly handle parent
+ hits that had no child matches, don't throw IllegalArgumentEx when
+ the child query has no hits, more aggressively catch cases where childQuery
+ incorrectly matches parent documents (Mike McCandless)
+
+* LUCENE-4970: Fix boost value of rewritten NGramPhraseQuery.
+ (Shingo Sasaki via Adrien Grand)
+
+* LUCENE-4974: CommitIndexTask was broken if no params were set. (Shai Erera)
+
+* LUCENE-4972: DirectoryTaxonomyWriter created empty commits even if no changes
+ were made. (Shai Erera, Michael McCandless)
+
+* LUCENE-949: AnalyzingQueryParser can't work with leading wildcards.
+ (Tim Allison, Robert Muir, Steve Rowe)
+
+Optimizations
+
+* LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher
+ methods that take top-N. (Uwe Schindler, Mike McCandless, Robert Muir)
+
+* LUCENE-4936: Improve numeric doc values compression in case all values share
+ a common divisor. In particular, this improves the compression ratio of dates
+ without time when they are encoded as milliseconds since Epoch. Also support
+ TABLE compressed numerics in the Disk codec. (Robert Muir, Adrien Grand)
+
+* LUCENE-4951: DrillSideways uses the new Scorer.cost() method to make
+ better decisions about which scorer to use internally. (Mike McCandless)
+
+* LUCENE-4976: PersistentSnapshotDeletionPolicy writes its state to a
+ single snapshots_N file, and no longer requires closing (Mike
+ McCandless, Shai Erera)
+
+New Features
+
+* LUCENE-4766: Added a PatternCaptureGroupTokenFilter that uses Java regexes to
+ emit multiple tokens one for each capture group in one or more patterns.
+ (Simon Willnauer, Clinton Gormley)
+
+* LUCENE-4952: Expose control (protected method) in DrillSideways to
+ force all sub-scorers to be on the same document being collected.
+ This is necessary when using collectors like
+ ToParentBlockJoinCollector with DrillSideways. (Mike McCandless)
+
+* SOLR-4761: Add SimpleMergedSegmentWarmer, which just initializes terms,
+ norms, docvalues, and so on. (Mark Miller, Mike McCandless, Robert Muir)
+
+* LUCENE-4964: Allow arbitrary Query for per-dimension drill-down to
+ DrillDownQuery and DrillSideways, to support future dynamic faceting
+ methods (Mike McCandless)
+
+* LUCENE-4966: Add CachingWrapperFilter.sizeInBytes() (Mike McCandless)
+
+* LUCENE-4965: Add dynamic (no taxonomy index used) numeric range
+ faceting to Lucene's facet module (Mike McCandless, Shai Erera)
+
======================= Lucene 4.3.0 =======================
Changes in backwards compatibility policy
@@ -43,12 +165,39 @@ Changes in backwards compatibility polic
* LUCENE-4833: IndexWriterConfig used to use LogByteSizeMergePolicy when
calling setMergePolicy(null) although the default merge policy is
TieredMergePolicy. IndexWriterConfig setters now throw an exception when
- passed null if null is not a valid value.
+ passed null if null is not a valid value. (Adrien Grand)
* LUCENE-4849: Made ParallelTaxonomyArrays abstract with a concrete
implementation for DirectoryTaxonomyWriter/Reader. Also moved it under
o.a.l.facet.taxonomy. (Shai Erera)
+* LUCENE-4876: IndexDeletionPolicy is now an abstract class instead of an
+ interface. IndexDeletionPolicy, MergeScheduler and InfoStream now implement
+ Cloneable. (Adrien Grand)
+
+* LUCENE-4874: FilterAtomicReader and related classes (FilterTerms,
+ FilterDocsEnum, ...) don't forward anymore to the filtered instance when the
+ method has a default implementation through other abstract methods.
+ (Adrien Grand, Robert Muir)
+
+* LUCENE-4642, LUCENE-4877: Implementors of TokenizerFactory, TokenFilterFactory,
+ and CharFilterFactory now need to provide at least one constructor taking
+ Map<String,String> to be able to be loaded by the SPI framework (e.g., from Solr).
+ In addition, TokenizerFactory needs to implement the abstract
+ create(AttributeFactory,Reader) method. (Renaud Delbru, Uwe Schindler,
+ Steve Rowe, Robert Muir)
+
+API Changes
+
+* LUCENE-4896: Made PassageFormatter abstract in PostingsHighlighter, made
+ members of DefaultPassageFormatter protected. (Luca Cavanna via Robert Muir)
+
+* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
+ TaxonomyReader.getParallelArrays().parents() instead. (Shai Erera)
+
+* LUCENE-4742: Renamed spatial 'Node' to 'Cell', along with any method names
+ and variables using this terminology. (David Smiley)
+
New Features
* LUCENE-4815: DrillSideways now allows more than one FacetRequest per
@@ -78,8 +227,8 @@ New Features
* LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an
arbitrary byte[] per suggestion (Mike McCandless)
-* LUCENE-4816: Passing null as the BreakIterator to PostingsHighlighter
- now highlights the entire content as a single Passage. (Robert
+* LUCENE-4816: Add WholeBreakIterator to PostingsHighlighter
+ for treating the entire content as a single Passage. (Robert
Muir, Mike McCandless)
* LUCENE-4827: Add additional ctor to PostingsHighlighter PassageScorer
@@ -116,10 +265,165 @@ New Features
* LUCENE-4859: IndexReader now exposes Terms statistics: getDocCount,
getSumDocFreq, getSumTotalTermFreq. (Shai Erera)
-API Changes
+* LUCENE-4862: It is now possible to terminate collection of a single
+ IndexReader leaf by throwing a CollectionTerminatedException in
+ Collector.collect. (Adrien Grand, Shai Erera)
+
+* LUCENE-4752: New SortingMergePolicy (in lucene/misc) that sorts documents
+ before merging segments. (Adrien Grand, Shai Erera, David Smiley)
+
+* LUCENE-4860: Customize scoring and formatting per-field in
+ PostingsHighlighter by subclassing and overriding the getFormatter
+ and/or getScorer methods. This also changes Passage.getMatchTerms()
+ to return BytesRef[] instead of Term[]. (Robert Muir, Mike
+ McCandless)
-* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
- TaxonomyReader.getParallelArrays().parents() instead. (Shai Erera)
+* LUCENE-4839: Added SorterTemplate.timSort, a O(n log n) stable sort algorithm
+ that performs well on partially sorted data. (Adrien Grand)
+
+* LUCENE-4644: Added support for the "IsWithin" spatial predicate for
+ RecursivePrefixTreeStrategy. It's for matching non-point indexed shapes; if
+ you only have points (1/doc) then "Intersects" is equivalent and faster.
+ See the javadocs. (David Smiley)
+
+* LUCENE-4861: Make BreakIterator per-field in PostingsHighlighter. This means
+ you can override getBreakIterator(String field) to use different mechanisms
+ for e.g. title vs. body fields. (Mike McCandless, Robert Muir)
+
+* LUCENE-4645: Added support for the "Contains" spatial predicate for
+ RecursivePrefixTreeStrategy. (David Smiley)
+
+* LUCENE-4898: DirectoryReader.openIfChanged now allows opening a reader
+ on an IndexCommit starting from a near-real-time reader (previously
+ this would throw IllegalArgumentException). (Mike McCandless)
+
+* LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter.
+ (Robert Muir)
+
+* LUCENE-4897: Added TaxonomyReader.getChildren for traversing a category's
+ children. (Shai Erera)
+
+* LUCENE-4902: Added FilterDirectoryReader to allow easy filtering of a
+ DirectoryReader's subreaders. (Alan Woodward, Adrien Grand, Uwe Schindler)
+
+* LUCENE-4858: Added EarlyTerminatingSortingCollector to be used in conjunction
+ with SortingMergePolicy, which allows to early terminate queries on sorted
+ indexes, when the sort order matches the index order. (Adrien Grand, Shai Erera)
+
+* LUCENE-4904: Added descending sort order to NumericDocValuesSorter. (Shai Erera)
+
+* LUCENE-3786: Added SearcherTaxonomyManager, to manage access to both
+ IndexSearcher and DirectoryTaxonomyReader for near-real-time
+ faceting. (Shai Erera, Mike McCandless)
+
+* LUCENE-4915: DrillSideways now allows drilling down on fields that
+ are not faceted. (Mike McCandless)
+
+* LUCENE-4895: Added support for the "IsDisjointTo" spatial predicate for
+ RecursivePrefixTreeStrategy. (David Smiley)
+
+* LUCENE-4774: Added FieldComparator that allows sorting parent documents based on
+ fields on the child / nested document level. (Martijn van Groningen)
+
+Optimizations
+
+* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
+ the default implementation which merges in-place by a faster implementation
+ that could require fewer swaps at the expense of some extra memory.
+ ArrayUtil and CollectionUtil override it so that their mergeSort and timSort
+ methods are faster but only require up to 1% of extra memory. (Adrien Grand)
+
+* LUCENE-4571: Speed up BooleanQuerys with minNrShouldMatch to use
+ skipping. (Stefan Pohl via Robert Muir)
+
+* LUCENE-4863: StemmerOverrideFilter now uses a FST to represent its overrides
+ in memory. (Simon Willnauer)
+
+* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
+ non-array-lookup version. (Dawid Weiss)
+
+* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
+ (Robert Muir)
+
+* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir)
+
+* LUCENE-4930: Reduce contention in older/buggy JVMs when using
+ AttributeSource#addAttribute() because java.lang.ref.ReferenceQueue#poll()
+ is implemented using synchronization. (Christian Ziech, Karl Wright,
+ Uwe Schindler)
+
+Bug Fixes
+
+* LUCENE-4868: SumScoreFacetsAggregator used an incorrect index into
+ the scores array. (Shai Erera)
+
+* LUCENE-4882: FacetsAccumulator did not allow to count ROOT category (i.e.
+ count dimensions). (Shai Erera)
+
+* LUCENE-4876: IndexWriterConfig.clone() now clones its MergeScheduler,
+ IndexDeletionPolicy and InfoStream in order to make an IndexWriterConfig and
+ its clone fully independent. (Adrien Grand)
+
+* LUCENE-4893: Facet counts were multiplied as many times as
+ FacetsCollector.getFacetResults() is called. (Shai Erera)
+
+* LUCENE-4888: Fixed SloppyPhraseScorer, MultiDocs(AndPositions)Enum and
+ MultiSpansWrapper which happened to sometimes call DocIdSetIterator.advance
+ with target<=current (in this case the behavior of advance is undefined).
+ (Adrien Grand)
+
+* LUCENE-4899: FastVectorHighlighter failed with StringIndexOutOfBoundsException
+ if a single highlight phrase or term was greater than the fragCharSize producing
+ negative string offsets. (Simon Willnauer)
+
+* LUCENE-4877: Throw exception for invalid arguments in analysis factories.
+ (Steve Rowe, Uwe Schindler, Robert Muir)
+
+* LUCENE-4914: SpatialPrefixTree's Node/Cell.reset() forgot to reset the 'leaf'
+ flag. It affects SpatialRecursivePrefixTreeStrategy on non-point indexed
+ shapes, as of Lucene 4.2. (David Smiley)
+
+* LUCENE-4913: FacetResultNode.ordinal was always 0 when all children
+ are returned. (Mike McCandless)
+
+* LUCENE-4918: Highlighter closes the given IndexReader if QueryScorer
+ is used with an external IndexReader. (Simon Willnauer, Sirvan Yahyaei)
+
+* LUCENE-4880: Fix MemoryIndex to consume empty terms from the tokenstream consistent
+ with IndexWriter. Previously it discarded them. (Timothy Allison via Robert Muir)
+
+* LUCENE-4885: FacetsAccumulator did not set the correct value for
+ FacetResult.numValidDescendants. (Mike McCandless, Shai Erera)
+
+* LUCENE-4925: Fixed IndexSearcher.search when the argument list contains a Sort
+ and one of the sort fields is the relevance score. Only IndexSearchers created
+ with an ExecutorService are concerned. (Adrien Grand)
+
+* LUCENE-4738, LUCENE-2727, LUCENE-2812: Simplified
+ DirectoryReader.indexExists so that it's more robust to transient
+ IOExceptions (e.g. due to issues like file descriptor exhaustion),
+ but this will also cause it to err towards returning true for
+ example if the directory contains a corrupted index or an incomplete
+ initial commit. In addition, IndexWriter with OpenMode.CREATE will
+ now succeed even if the directory contains a corrupted index (Billow
+ Gao, Robert Muir, Mike McCandless)
+
+* LUCENE-4928: Stored fields and term vectors could become super slow in case
+ of tiny documents (a few bytes). This is especially problematic when switching
+ codecs since bulk-merge strategies can't be applied and the same chunk of
+ documents can end up being decompressed thousands of times. A hard limit on
+ the number of documents per chunk has been added to fix this issue.
+ (Robert Muir, Adrien Grand)
+
+* LUCENE-4934: Fix minor equals/hashcode problems in facet/DrillDownQuery,
+ BoostingQuery, MoreLikeThisQuery, FuzzyLikeThisQuery, and block join queries.
+ (Robert Muir, Uwe Schindler)
+
+* LUCENE-4504: Fix broken sort comparator in ValueSource.getSortField,
+ used when sorting by a function query. (Tom Shally via Robert Muir)
+
+* LUCENE-4937: Fix incorrect sorting of float/double values (+/-0, NaN).
+ (Robert Muir, Uwe Schindler)
Documentation
@@ -127,6 +431,12 @@ Documentation
to use the new SortedSetDocValues backed facet implementation.
(Shai Erera, Mike McCandless)
+Build
+
+* LUCENE-4879: Upgrade randomized testing to version 2.0.9:
+ Filter stack traces on console output. (Dawid Weiss, Robert Muir)
+
+
======================= Lucene 4.2.1 =======================
Bug Fixes
@@ -155,6 +465,15 @@ Bug Fixes
* SOLR-4589: Fixed CPU spikes and poor performance in lazy field loading
of multivalued fields. (hossman)
+* LUCENE-4870: Fix bug where an entire index might be deleted by the IndexWriter
+ due to false detection if an index exists in the directory when
+ OpenMode.CREATE_OR_APPEND is used. This might also affect application that set
+ the open mode manually using DirectoryReader#indexExists. (Simon Willnauer)
+
+* LUCENE-4878: Override getRegexpQuery in MultiFieldQueryParser to prefent
+ NullPointerException when regular expression syntax is used with
+ MultiFieldQueryParser. (Simon Willnauer, Adam Rauch)
+
Optimizations
* LUCENE-4819: Added Sorted[Set]DocValues.termsEnum(), and optimized the
@@ -672,9 +991,6 @@ Bug Fixes
romaji even for out-of-vocabulary kana cases (e.g. half-width forms).
(Robert Muir)
-* LUCENE-4504: Fix broken sort comparator in ValueSource.getSortField,
- used when sorting by a function query. (Tom Shally via Robert Muir)
-
* LUCENE-4511: TermsFilter might return wrong results if a field is not
indexed or doesn't exist in the index. (Simon Willnauer)
Modified: lucene/dev/branches/lucene4258/lucene/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/NOTICE.txt?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/NOTICE.txt (original)
+++ lucene/dev/branches/lucene4258/lucene/NOTICE.txt Tue May 7 11:20:55 2013
@@ -27,11 +27,6 @@ Jean-Philippe Barrette-LaPierre. This li
see http://sites.google.com/site/rrettesite/moman and
http://bitbucket.org/jpbarrette/moman/overview/
-The class org.apache.lucene.util.SorterTemplate was inspired by CGLIB's class
-with the same name. The implementation part is mainly done using pre-existing
-Lucene sorting code. In-place stable mergesort was borrowed from CGLIB,
-which is Apache-licensed.
-
The class org.apache.lucene.util.WeakIdentityMap was derived from
the Apache CXF project and is Apache License 2.0.
Modified: lucene/dev/branches/lucene4258/lucene/SYSTEM_REQUIREMENTS.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/SYSTEM_REQUIREMENTS.txt?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/SYSTEM_REQUIREMENTS.txt (original)
+++ lucene/dev/branches/lucene4258/lucene/SYSTEM_REQUIREMENTS.txt Tue May 7 11:20:55 2013
@@ -5,7 +5,7 @@ install at least Update 1! With all Java
recommended to not use experimental `-XX` JVM options. It is also
recommended to always use the latest update version of your Java VM,
because bugs may affect Lucene. An overview of known JVM bugs can be
-found on http://wiki.apache.org/lucene-java/SunJavaBugs.
+found on http://wiki.apache.org/lucene-java/JavaBugs.
CPU, disk and memory requirements are based on the many choices made in
implementing Lucene (document size, number of documents, and number of
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,26 +17,34 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
-
/**
* Factory for {@link ArabicNormalizationFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
* <filter class="solr.ArabicNormalizationFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ /** Creates a new ArabicNormalizationFilterFactory */
+ public ArabicNormalizationFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public ArabicNormalizationFilter create(TokenStream input) {
return new ArabicNormalizationFilter(input);
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,14 +17,15 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
-
/**
* Factory for {@link ArabicStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -32,10 +33,16 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.ArabicStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class ArabicStemFilterFactory extends TokenFilterFactory {
+ /** Creates a new ArabicStemFilterFactory */
+ public ArabicStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public ArabicStemFilter create(TokenStream input) {
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.bg;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link BulgarianStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.BulgarianStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class BulgarianStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new BulgarianStemFilterFactory */
+ public BulgarianStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input);
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.br;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link BrazilianStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.BrazilianStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class BrazilianStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new BrazilianStemFilterFactory */
+ public BrazilianStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in);
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,31 +17,35 @@ package org.apache.lucene.analysis.charf
* limitations under the License.
*/
-import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.lucene.analysis.util.CharFilterFactory;
import java.io.Reader;
-import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
-* Factory for {@link HTMLStripCharFilter}.
- * <pre class="prettyprint" >
+ * Factory for {@link HTMLStripCharFilter}.
+ * <pre class="prettyprint">
* <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" />
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
- public class HTMLStripCharFilterFactory extends CharFilterFactory {
+public class HTMLStripCharFilterFactory extends CharFilterFactory {
+ final Set<String> escapedTags;
+ static final Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
- Set<String> escapedTags = null;
- Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
+ /** Creates a new HTMLStripCharFilterFactory */
+ public HTMLStripCharFilterFactory(Map<String,String> args) {
+ super(args);
+ escapedTags = getSet(args, "escapedTags");
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public HTMLStripCharFilter create(Reader input) {
@@ -53,19 +57,4 @@ import java.util.regex.Pattern;
}
return charFilter;
}
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
- String escapedTagsArg = args.get("escapedTags");
- if (null != escapedTagsArg) {
- Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
- while (matcher.find()) {
- if (null == escapedTags) {
- escapedTags = new HashSet<String>();
- }
- escapedTags.add(matcher.group(0));
- }
- }
- }
}
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java Tue May 7 11:20:55 2013
@@ -22,16 +22,19 @@ import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.charfilter.MappingCharFilter;
-import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
/**
* Factory for {@link MappingCharFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
@@ -39,21 +42,26 @@ import org.apache.lucene.analysis.util.*
* </analyzer>
* </fieldType></pre>
*
- *
* @since Solr 1.4
- *
*/
public class MappingCharFilterFactory extends CharFilterFactory implements
ResourceLoaderAware, MultiTermAwareComponent {
protected NormalizeCharMap normMap;
- private String mapping;
+ private final String mapping;
+
+ /** Creates a new MappingCharFilterFactory */
+ public MappingCharFilterFactory(Map<String,String> args) {
+ super(args);
+ mapping = get(args, "mapping");
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
// TODO: this should use inputstreams from the loader, not File!
@Override
public void inform(ResourceLoader loader) throws IOException {
- mapping = args.get("mapping");
-
if (mapping != null) {
List<String> wlist = null;
File mappingFile = new File(mapping);
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java Tue May 7 11:20:55 2013
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.T
/**
* Factory for {@link CJKBigramFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_cjk" class="solr.TextField">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -38,26 +38,30 @@ import org.apache.lucene.analysis.util.T
* </fieldType></pre>
*/
public class CJKBigramFilterFactory extends TokenFilterFactory {
- int flags;
- boolean outputUnigrams;
+ final int flags;
+ final boolean outputUnigrams;
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
- flags = 0;
- if (getBoolean("han", true)) {
+ /** Creates a new CJKBigramFilterFactory */
+ public CJKBigramFilterFactory(Map<String,String> args) {
+ super(args);
+ int flags = 0;
+ if (getBoolean(args, "han", true)) {
flags |= CJKBigramFilter.HAN;
}
- if (getBoolean("hiragana", true)) {
+ if (getBoolean(args, "hiragana", true)) {
flags |= CJKBigramFilter.HIRAGANA;
}
- if (getBoolean("katakana", true)) {
+ if (getBoolean(args, "katakana", true)) {
flags |= CJKBigramFilter.KATAKANA;
}
- if (getBoolean("hangul", true)) {
+ if (getBoolean(args, "hangul", true)) {
flags |= CJKBigramFilter.HANGUL;
}
- outputUnigrams = getBoolean("outputUnigrams", false);
+ this.flags = flags;
+ this.outputUnigrams = getBoolean(args, "outputUnigrams", false);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cjk;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.T
/**
* Factory for {@link CJKWidthFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_cjk" class="solr.TextField">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -35,9 +37,16 @@ import org.apache.lucene.analysis.util.T
* </analyzer>
* </fieldType></pre>
*/
-
public class CJKWidthFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ /** Creates a new CJKWidthFilterFactory */
+ public CJKWidthFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new CJKWidthFilter(input);
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Tue May 7 11:20:55 2013
@@ -1,10 +1,18 @@
/*
- * Licensed under the Apache License,
- * Version 2.0 (the "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
package org.apache.lucene.analysis.commongrams;
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java Tue May 7 11:20:55 2013
@@ -18,37 +18,45 @@ package org.apache.lucene.analysis.commo
*/
import java.io.IOException;
+import java.util.Map;
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.util.*;
/**
* Constructs a {@link CommonGramsFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
* </analyzer>
* </fieldType></pre>
- *
*/
-
-/*
- * This is pretty close to a straight copy from StopFilterFactory
- */
-public class CommonGramsFilterFactory extends TokenFilterFactory implements
- ResourceLoaderAware {
+public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ // TODO: shared base class for Stop/Keep/CommonGrams?
+ private CharArraySet commonWords;
+ private final String commonWordFiles;
+ private final String format;
+ private final boolean ignoreCase;
+
+ /** Creates a new CommonGramsFilterFactory */
+ public CommonGramsFilterFactory(Map<String,String> args) {
+ super(args);
+ commonWordFiles = get(args, "words");
+ format = get(args, "format");
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public void inform(ResourceLoader loader) throws IOException {
- String commonWordFiles = args.get("words");
- ignoreCase = getBoolean("ignoreCase", false);
-
if (commonWordFiles != null) {
- if ("snowball".equalsIgnoreCase(args.get("format"))) {
+ if ("snowball".equalsIgnoreCase(format)) {
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
} else {
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
@@ -57,10 +65,6 @@ public class CommonGramsFilterFactory ex
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}
-
- //Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
- private CharArraySet commonWords;
- private boolean ignoreCase;
public boolean isIgnoreCase() {
return ignoreCase;
@@ -71,7 +75,7 @@ public class CommonGramsFilterFactory ex
}
@Override
- public CommonGramsFilter create(TokenStream input) {
+ public TokenFilter create(TokenStream input) {
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
return commonGrams;
}
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,77 +17,37 @@ package org.apache.lucene.analysis.commo
* limitations under the License.
*/
-import java.io.IOException;
import java.util.Map;
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilterFactory;
-import org.apache.lucene.analysis.util.*;
/**
* Construct {@link CommonGramsQueryFilter}.
*
- * This is pretty close to a straight copy from {@link StopFilterFactory}.
- *
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
* </analyzer>
* </fieldType></pre>
- *
*/
-public class CommonGramsQueryFilterFactory extends TokenFilterFactory
- implements ResourceLoaderAware {
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
- assureMatchVersion();
- }
-
- @Override
- public void inform(ResourceLoader loader) throws IOException {
- String commonWordFiles = args.get("words");
- ignoreCase = getBoolean("ignoreCase", false);
-
- if (commonWordFiles != null) {
- if ("snowball".equalsIgnoreCase(args.get("format"))) {
- commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
- } else {
- commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
- }
- } else {
- commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
- }
- }
-
- // Force the use of a char array set, as it is the most performant, although
- // this may break things if Lucene ever goes away from it. See SOLR-1095
- private CharArraySet commonWords;
-
- private boolean ignoreCase;
-
- public boolean isIgnoreCase() {
- return ignoreCase;
- }
+public class CommonGramsQueryFilterFactory extends CommonGramsFilterFactory {
- public CharArraySet getCommonWords() {
- return commonWords;
+ /** Creates a new CommonGramsQueryFilterFactory */
+ public CommonGramsQueryFilterFactory(Map<String,String> args) {
+ super(args);
}
/**
* Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
*/
@Override
- public CommonGramsQueryFilter create(TokenStream input) {
- CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
- CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
- commonGrams);
- return commonGramsQuery;
+ public TokenFilter create(TokenStream input) {
+ CommonGramsFilter commonGrams = (CommonGramsFilter) super.create(input);
+ return new CommonGramsQueryFilter(commonGrams);
}
}
Modified: lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/lucene4258/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java Tue May 7 11:20:55 2013
@@ -17,15 +17,18 @@ package org.apache.lucene.analysis.compo
* limitations under the License.
*/
-import org.apache.lucene.analysis.util.*;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
import java.io.IOException;
/**
* Factory for {@link DictionaryCompoundWordTokenFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -33,37 +36,39 @@ import java.io.IOException;
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
* </analyzer>
* </fieldType></pre>
- *
*/
-public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
- private String dictFile;
- private int minWordSize;
- private int minSubwordSize;
- private int maxSubwordSize;
- private boolean onlyLongestMatch;
- @Override
- public void init(Map<String, String> args) {
- super.init(args);
+ private final String dictFile;
+ private final int minWordSize;
+ private final int minSubwordSize;
+ private final int maxSubwordSize;
+ private final boolean onlyLongestMatch;
+
+ /** Creates a new DictionaryCompoundWordTokenFilterFactory */
+ public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
+ super(args);
assureMatchVersion();
- dictFile = args.get("dictionary");
- if (null == dictFile) {
- throw new IllegalArgumentException("Missing required parameter: dictionary");
+ dictFile = require(args, "dictionary");
+ minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
}
-
- minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
- onlyLongestMatch = getBoolean("onlyLongestMatch",true);
}
+
@Override
public void inform(ResourceLoader loader) throws IOException {
dictionary = super.getWordSet(loader, dictFile, false);
}
+
@Override
public TokenStream create(TokenStream input) {
// if the dictionary is null, it means it was empty
- return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion,input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
+ return dictionary == null ? input : new DictionaryCompoundWordTokenFilter
+ (luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
}