You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/08/13 15:53:27 UTC
svn commit: r1372423 [3/45] - in /lucene/dev/branches/LUCENE-2878: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ de...
Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py Mon Aug 13 13:52:46 2012
@@ -23,7 +23,7 @@ reMarkup = re.compile('<.*?>')
def checkSummary(fullPath):
printed = False
- f = open(fullPath)
+ f = open(fullPath, encoding='UTF-8')
anyMissing = False
sawPackage = False
desc = []
@@ -41,10 +41,10 @@ def checkSummary(fullPath):
desc = desc.strip()
if desc == '':
if not printed:
- print
- print fullPath
+ print()
+ print(fullPath)
printed = True
- print ' no package description (missing package.html in src?)'
+ print(' no package description (missing package.html in src?)')
anyMissing = True
desc = None
else:
@@ -52,17 +52,17 @@ def checkSummary(fullPath):
if lineLower in ('<td> </td>', '<td></td>', '<td class="collast"> </td>'):
if not printed:
- print
- print fullPath
+ print()
+ print(fullPath)
printed = True
- print ' missing: %s' % unescapeHTML(lastHREF)
+ print(' missing: %s' % unescapeHTML(lastHREF))
anyMissing = True
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
if not printed:
- print
- print fullPath
+ print()
+ print(fullPath)
printed = True
- print ' license-is-javadoc: %s' % unescapeHTML(lastHREF)
+ print(' license-is-javadoc: %s' % unescapeHTML(lastHREF))
anyMissing = True
m = reHREF.search(line)
if m is not None:
@@ -85,17 +85,17 @@ def checkPackageSummaries(root, level='c
"""
if level != 'class' and level != 'package':
- print 'unsupported level: %s, must be "class" or "package"' % level
+ print('unsupported level: %s, must be "class" or "package"' % level)
sys.exit(1)
#for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
if False:
os.chdir(root)
- print
- print 'Run "ant javadocs" > javadocs.log...'
+ print()
+ print('Run "ant javadocs" > javadocs.log...')
if os.system('ant javadocs > javadocs.log 2>&1'):
- print ' FAILED'
+ print(' FAILED')
sys.exit(1)
anyMissing = False
@@ -116,14 +116,14 @@ def checkPackageSummaries(root, level='c
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3:
- print 'usage: %s <dir> [class|package]' % sys.argv[0]
+ print('usage: %s <dir> [class|package]' % sys.argv[0])
sys.exit(1)
if len(sys.argv) == 2:
level = 'class'
else:
level = sys.argv[2]
if checkPackageSummaries(sys.argv[1], level):
- print
- print 'Missing javadocs were found!'
+ print()
+ print('Missing javadocs were found!')
sys.exit(1)
sys.exit(0)
Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py Mon Aug 13 13:52:46 2012
@@ -65,7 +65,7 @@ class FindHyperlinks(HTMLParser):
href = href.strip()
self.links.append(urlparse.urljoin(self.baseURL, href))
else:
- if self.baseURL.endswith(os.path.sep + 'AttributeSource.html'):
+ if self.baseURL.endswith('/AttributeSource.html'):
# LUCENE-4010: AttributeSource's javadocs has an unescaped <A> generics!! Seems to be a javadocs bug... (fixed in Java 7)
pass
else:
@@ -126,7 +126,7 @@ def checkAll(dirName):
main not in ('deprecated-list',):
# Somehow even w/ java 7 generaged javadocs,
# deprecated-list.html can fail to escape generics types
- fullPath = os.path.join(root, f)
+ fullPath = os.path.join(root, f).replace(os.path.sep,'/')
#print ' %s' % fullPath
allFiles[fullPath] = parse(fullPath, open('%s/%s' % (root, f), encoding='UTF-8').read())
@@ -193,6 +193,14 @@ def checkAll(dirName):
# see LUCENE-4011: this is a javadocs bug for constants
# on annotations it seems?
pass
+ elif link.startswith('file:'):
+ filepath = urlparse.unquote(urlparse.urlparse(link).path)
+ if not (os.path.exists(filepath) or os.path.exists(filepath[1:])):
+ if not printed:
+ printed = True
+ print()
+ print(fullPath)
+ print(' BROKEN LINK: %s' % link)
elif link not in allFiles:
# We only load HTML... so if the link is another resource (eg
# SweetSpotSimilarity refs
Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py Mon Aug 13 13:52:46 2012
@@ -20,12 +20,12 @@ import subprocess
import signal
import shutil
import hashlib
-import httplib
+import http.client
import re
-import urllib2
-import urlparse
+import urllib.request, urllib.error, urllib.parse
+import urllib.parse
import sys
-import HTMLParser
+import html.parser
from collections import defaultdict
import xml.etree.ElementTree as ET
import filecmp
@@ -38,9 +38,9 @@ import checkJavadocLinks
# tested on Linux and on Cygwin under Windows 7.
def unshortenURL(url):
- parsed = urlparse.urlparse(url)
+ parsed = urllib.parse.urlparse(url)
if parsed[0] in ('http', 'https'):
- h = httplib.HTTPConnection(parsed.netloc)
+ h = http.client.HTTPConnection(parsed.netloc)
h.request('HEAD', parsed.path)
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
@@ -58,7 +58,7 @@ def javaExe(version):
def verifyJavaVersion(version):
s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
- if s.find('java version "%s.' % version) == -1:
+ if s.find(' version "%s.' % version) == -1:
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
# http://s.apache.org/lusolr32rc2
@@ -101,8 +101,8 @@ def getHREFs(urlString):
# Deref any redirects
while True:
- url = urlparse.urlparse(urlString)
- h = httplib.HTTPConnection(url.netloc)
+ url = urllib.parse.urlparse(urlString)
+ h = http.client.HTTPConnection(url.netloc)
h.request('GET', url.path)
r = h.getresponse()
newLoc = r.getheader('location')
@@ -112,8 +112,8 @@ def getHREFs(urlString):
break
links = []
- for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
- fullURL = urlparse.urljoin(urlString, subUrl)
+ for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
+ fullURL = urllib.parse.urljoin(urlString, subUrl)
links.append((text, fullURL))
return links
@@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, qu
fileName = '%s/%s' % (tmpDir, name)
if DEBUG and os.path.exists(fileName):
if not quiet and fileName.find('.asc') == -1:
- print ' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
+ print(' already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
return
- fIn = urllib2.urlopen(urlString)
+ fIn = urllib.request.urlopen(urlString)
fOut = open(fileName, 'wb')
success = False
try:
while True:
s = fIn.read(65536)
- if s == '':
+ if s == b'':
break
fOut.write(s)
fOut.close()
@@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, qu
if not success:
os.remove(fileName)
if not quiet and fileName.find('.asc') == -1:
- print ' %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
+ print(' %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
def load(urlString):
- return urllib2.urlopen(urlString).read()
+ return urllib.request.urlopen(urlString).read().decode('utf-8')
def checkSigs(project, urlString, version, tmpDir, isSigned):
- print ' test basics...'
+ print(' test basics...')
ents = getDirEntries(urlString)
artifact = None
keysURL = None
@@ -210,7 +210,7 @@ def checkSigs(project, urlString, versio
if keysURL is None:
raise RuntimeError('%s is missing KEYS' % project)
- print ' get KEYS'
+ print(' get KEYS')
download('%s.KEYS' % project, keysURL, tmpDir)
keysFile = '%s/%s.KEYS' % (tmpDir, project)
@@ -219,7 +219,7 @@ def checkSigs(project, urlString, versio
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
- os.makedirs(gpgHomeDir, 0700)
+ os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
@@ -232,12 +232,12 @@ def checkSigs(project, urlString, versio
testChanges(project, version, changesURL)
for artifact, urlString in artifacts:
- print ' download %s...' % artifact
+ print(' download %s...' % artifact)
download(artifact, urlString, tmpDir)
verifyDigests(artifact, urlString, tmpDir)
if isSigned:
- print ' verify sig'
+ print(' verify sig')
# Test sig (this is done with a clean brand-new GPG world)
download(artifact + '.asc', urlString + '.asc', tmpDir)
sigFile = '%s/%s.asc' % (tmpDir, artifact)
@@ -246,28 +246,28 @@ def checkSigs(project, urlString, versio
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
- f = open(logFile, 'rb')
+ f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1:
- print ' GPG: %s' % line.strip()
+ print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
run('gpg --import %s' % (keysFile),
'%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
- print ' verify trust'
+ print(' verify trust')
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
- f = open(logFile, 'rb')
+ f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1:
- print ' GPG: %s' % line.strip()
+ print(' GPG: %s' % line.strip())
f.close()
def testChanges(project, version, changesURLString):
- print ' check changes HTML...'
+ print(' check changes HTML...')
changesURL = None
for text, subURL in getDirEntries(changesURLString):
if text == 'Changes.html':
@@ -287,7 +287,7 @@ def testChangesText(dir, version, projec
if 'CHANGES.txt' in files:
fullPath = '%s/CHANGES.txt' % root
#print 'CHECK %s' % fullPath
- checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
+ checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
def checkChangesContent(s, version, name, project, isHTML):
@@ -336,7 +336,7 @@ def run(command, logFile):
raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
def verifyDigests(artifact, urlString, tmpDir):
- print ' verify md5/sha1 digests'
+ print(' verify md5/sha1 digests')
md5Expected, t = load(urlString + '.md5').strip().split()
if t != '*'+artifact:
raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
@@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, t
m = hashlib.md5()
s = hashlib.sha1()
- f = open('%s/%s' % (tmpDir, artifact))
+ f = open('%s/%s' % (tmpDir, artifact), 'rb')
while True:
x = f.read(65536)
- if x == '':
+ if len(x) == 0:
break
m.update(x)
s.update(x)
@@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, t
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
def getDirEntries(urlString):
+ if urlString.startswith('file:/') and not urlString.startswith('file://'):
+ # stupid bogus ant URI
+ urlString = "file:///" + urlString[6:]
+
if urlString.startswith('file://'):
path = urlString[7:]
if path.endswith('/'):
@@ -388,7 +392,7 @@ def unpack(project, tmpDir, artifact, ve
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
- print ' unpack %s...' % artifact
+ print(' unpack %s...' % artifact)
unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
@@ -437,12 +441,14 @@ def verifyUnpacked(project, artifact, un
if project == 'lucene':
# TODO: clean this up to not be a list of modules that we must maintain
- extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
+ extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
if isSrc:
extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
else:
extras = ()
+ # TODO: if solr, verify lucene/licenses, solr/licenses are present
+
for e in extras:
if e not in l:
raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
@@ -453,81 +459,81 @@ def verifyUnpacked(project, artifact, un
raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
if isSrc:
- print ' make sure no JARs/WARs in src dist...'
+ print(' make sure no JARs/WARs in src dist...')
lines = os.popen('find . -name \\*.jar').readlines()
if len(lines) != 0:
- print ' FAILED:'
+ print(' FAILED:')
for line in lines:
- print ' %s' % line.strip()
+ print(' %s' % line.strip())
raise RuntimeError('source release has JARs...')
lines = os.popen('find . -name \\*.war').readlines()
if len(lines) != 0:
- print ' FAILED:'
+ print(' FAILED:')
for line in lines:
- print ' %s' % line.strip()
+ print(' %s' % line.strip())
raise RuntimeError('source release has WARs...')
- print ' run "ant validate"'
+ print(' run "ant validate"')
run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
if project == 'lucene':
- print ' run tests w/ Java 6...'
+ print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
testDemo(isSrc, version)
# test javadocs
- print ' generate javadocs w/ Java 6...'
+ print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
else:
- print ' run tests w/ Java 6...'
+ print(' run tests w/ Java 6...')
run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
# test javadocs
- print ' generate javadocs w/ Java 6...'
+ print(' generate javadocs w/ Java 6...')
run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
- print ' run tests w/ Java 7...'
+ print(' run tests w/ Java 7...')
run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
# test javadocs
- print ' generate javadocs w/ Java 7...'
+ print(' generate javadocs w/ Java 7...')
run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
checkJavadocpath('%s/build/docs' % unpackPath)
os.chdir('solr')
- print ' test solr example w/ Java 6...'
+ print(' test solr example w/ Java 6...')
run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA6_HOME, True)
- print ' test solr example w/ Java 7...'
+ print(' test solr example w/ Java 7...')
run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
testSolrExample(unpackPath, JAVA7_HOME, True)
os.chdir('..')
- print ' check NOTICE'
+ print(' check NOTICE')
testNotice(unpackPath)
else:
if project == 'lucene':
testDemo(isSrc, version)
else:
- print ' test solr example w/ Java 6...'
+ print(' test solr example w/ Java 6...')
testSolrExample(unpackPath, JAVA6_HOME, False)
- print ' test solr example w/ Java 7...'
+ print(' test solr example w/ Java 7...')
testSolrExample(unpackPath, JAVA7_HOME, False)
testChangesText('.', version, project)
if project == 'lucene' and not isSrc:
- print ' check Lucene\'s javadoc JAR'
+ print(' check Lucene\'s javadoc JAR')
checkJavadocpath('%s/docs' % unpackPath)
def testNotice(unpackPath):
- solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
- luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
+ solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
+ luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
expected = """
=========================================================================
@@ -545,12 +551,12 @@ def readSolrOutput(p, startupEvent, logF
try:
while True:
line = p.readline()
- if line == '':
+ if len(line) == 0:
break
f.write(line)
f.flush()
# print 'SOLR: %s' % line.strip()
- if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
+ if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
startupEvent.set()
finally:
f.close()
@@ -558,7 +564,7 @@ def readSolrOutput(p, startupEvent, logF
def testSolrExample(unpackPath, javaPath, isSrc):
logFile = '%s/solr-example.log' % unpackPath
os.chdir('example')
- print ' start Solr instance (log=%s)...' % logFile
+ print(' start Solr instance (log=%s)...' % logFile)
env = {}
env.update(os.environ)
env['JAVA_HOME'] = javaPath
@@ -572,21 +578,21 @@ def testSolrExample(unpackPath, javaPath
# Make sure Solr finishes startup:
startupEvent.wait()
- print ' startup done'
+ print(' startup done')
try:
- print ' test utf8...'
+ print(' test utf8...')
run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
- print ' index example docs...'
+ print(' index example docs...')
run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
- print ' run query...'
- s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
+ print(' run query...')
+ s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
if s.find('<result name="response" numFound="3" start="0">') == -1:
- print 'FAILED: response is:\n%s' % s
+ print('FAILED: response is:\n%s' % s)
raise RuntimeError('query on solr example instance failed')
finally:
# Stop server:
- print ' stop server (SIGINT)...'
+ print(' stop server (SIGINT)...')
os.kill(server.pid, signal.SIGINT)
# Give it 10 seconds to gracefully shut down
@@ -594,14 +600,14 @@ def testSolrExample(unpackPath, javaPath
if serverThread.isAlive():
# Kill server:
- print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
+ print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
os.kill(server.pid, signal.SIGKILL)
serverThread.join(10.0)
if serverThread.isAlive():
# Shouldn't happen unless something is seriously wrong...
- print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
+ print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
os.chdir('..')
@@ -615,13 +621,13 @@ def checkJavadocpath(path):
if checkJavaDocs.checkPackageSummaries(path):
# disabled: RM cannot fix all this, see LUCENE-3887
# raise RuntimeError('javadoc problems')
- print '\n***WARNING***: javadocs want to fail!\n'
+ print('\n***WARNING***: javadocs want to fail!\n')
if checkJavadocLinks.checkAll(path):
raise RuntimeError('broken javadocs links found!')
def testDemo(isSrc, version):
- print ' test demo...'
+ print(' test demo...')
sep = ';' if cygwin else ':'
if isSrc:
cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
@@ -632,14 +638,14 @@ def testDemo(isSrc, version):
run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
reMatchingDocs = re.compile('(\d+) total matching documents')
- m = reMatchingDocs.search(open('search.log', 'rb').read())
+ m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
if m is None:
raise RuntimeError('lucene demo\'s SearchFiles found no results')
else:
numHits = int(m.group(1))
if numHits < 100:
raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
- print ' got %d hits for query "lucene"' % numHits
+ print(' got %d hits for query "lucene"' % numHits)
def checkMaven(baseURL, tmpDir, version, isSigned):
# Locate the release branch in subversion
@@ -652,11 +658,11 @@ def checkMaven(baseURL, tmpDir, version,
if text == releaseBranchText:
releaseBranchSvnURL = subURL
- print ' get POM templates',
+ print(' get POM templates', end=' ')
POMtemplates = defaultdict()
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
- print
- print ' download artifacts',
+ print()
+ print(' download artifacts', end=' ')
artifacts = {'lucene': [], 'solr': []}
for project in ('lucene', 'solr'):
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
@@ -664,30 +670,30 @@ def checkMaven(baseURL, tmpDir, version,
if not os.path.exists(targetDir):
os.makedirs(targetDir)
crawl(artifacts[project], artifactsURL, targetDir)
- print
- print ' verify that each binary artifact has a deployed POM...'
+ print()
+ print(' verify that each binary artifact has a deployed POM...')
verifyPOMperBinaryArtifact(artifacts, version)
- print ' verify that there is an artifact for each POM template...'
+ print(' verify that there is an artifact for each POM template...')
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
- print " verify Maven artifacts' md5/sha1 digests..."
+ print(" verify Maven artifacts' md5/sha1 digests...")
verifyMavenDigests(artifacts)
- print ' verify that all non-Mavenized deps are deployed...'
+ print(' verify that all non-Mavenized deps are deployed...')
nonMavenizedDeps = dict()
checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
version, releaseBranchSvnURL)
- print ' check for javadoc and sources artifacts...'
+ print(' check for javadoc and sources artifacts...')
checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
- print " verify deployed POMs' coordinates..."
+ print(" verify deployed POMs' coordinates...")
verifyDeployedPOMsCoordinates(artifacts, version)
if isSigned:
- print ' verify maven artifact sigs',
+ print(' verify maven artifact sigs', end=' ')
verifyMavenSigs(baseURL, tmpDir, artifacts)
distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
- print ' verify that non-Mavenized deps are same as in the binary distribution...'
+ print(' verify that non-Mavenized deps are same as in the binary distribution...')
checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
- print ' verify that Maven artifacts are same as in the binary distribution...'
+ print(' verify that Maven artifacts are same as in the binary distribution...')
checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
def getDistributionsForMavenChecks(tmpDir, version, baseURL):
@@ -697,19 +703,19 @@ def getDistributionsForMavenChecks(tmpDi
if project == 'solr': distribution = 'apache-' + distribution
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
distURL = '%s/%s/%s' % (baseURL, project, distribution)
- print ' download %s...' % distribution,
+ print(' download %s...' % distribution, end=' ')
download(distribution, distURL, tmpDir)
destDir = '%s/unpack-%s-maven' % (tmpDir, project)
if os.path.exists(destDir):
shutil.rmtree(destDir)
os.makedirs(destDir)
os.chdir(destDir)
- print ' unpack %s...' % distribution
+ print(' unpack %s...' % distribution)
unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
if project == 'solr': # unpack the Solr war
unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
- print ' unpack Solr war...'
+ print(' unpack Solr war...')
run('jar xvf */dist/*.war', unpackLogFile)
distributionFiles[project] = []
for root, dirs, files in os.walk(destDir):
@@ -719,7 +725,7 @@ def getDistributionsForMavenChecks(tmpDi
def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
for project in ('lucene', 'solr'):
for artifact in artifacts[project]:
- if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
+ if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
javadocJar = artifact[:-4] + '-javadoc.jar'
if javadocJar not in artifacts[project]:
raise RuntimeError('missing: %s' % javadocJar)
@@ -732,7 +738,7 @@ def checkIdenticalNonMavenizedDeps(distr
distFilenames = dict()
for file in distributionFiles[project]:
distFilenames[os.path.basename(file)] = file
- for dep in nonMavenizedDeps.keys():
+ for dep in list(nonMavenizedDeps.keys()):
if ('/%s/' % project) in dep:
depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
if not depOrigFilename in distFilenames:
@@ -753,9 +759,9 @@ def checkIdenticalMavenArtifacts(distrib
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
- if artifact not in nonMavenizedDeps.keys():
+ if artifact not in list(nonMavenizedDeps.keys()):
artifactFilename = os.path.basename(artifact)
- if artifactFilename not in distFilenames.keys():
+ if artifactFilename not in list(distFilenames.keys()):
raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
% (artifact, project))
# TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
@@ -772,16 +778,17 @@ def verifyMavenDigests(artifacts):
raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
if artifactFile + '.sha1' not in artifacts[project]:
raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
- with open(artifactFile + '.md5', 'r') as md5File:
+ with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
md5Expected = md5File.read().strip()
- with open(artifactFile + '.sha1', 'r') as sha1File:
+ with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
sha1Expected = sha1File.read().strip()
md5 = hashlib.md5()
sha1 = hashlib.sha1()
- inputFile = open(artifactFile)
+ inputFile = open(artifactFile, 'rb')
while True:
bytes = inputFile.read(65536)
- if bytes == '': break
+ if len(bytes) == 0:
+ break
md5.update(bytes)
sha1.update(bytes)
inputFile.close()
@@ -846,7 +853,7 @@ def checkNonMavenizedDeps(nonMavenizedDe
if releaseBranchSvnURL is None:
pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
if os.path.exists(pomPath):
- doc2 = ET.XML(open(pomPath).read())
+ doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
break
else:
entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
@@ -891,7 +898,7 @@ def verifyMavenSigs(baseURL, tmpDir, art
gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
if os.path.exists(gpgHomeDir):
shutil.rmtree(gpgHomeDir)
- os.makedirs(gpgHomeDir, 0700)
+ os.makedirs(gpgHomeDir, 0o700)
run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
'%s/%s.gpg.import.log' % (tmpDir, project))
@@ -904,12 +911,12 @@ def verifyMavenSigs(baseURL, tmpDir, art
run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
logFile)
# Forward any GPG warnings, except the expected one (since its a clean world)
- f = open(logFile, 'rb')
+ f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
- print ' GPG: %s' % line.strip()
+ print(' GPG: %s' % line.strip())
f.close()
# Test trust (this is done with the real users config)
@@ -918,16 +925,16 @@ def verifyMavenSigs(baseURL, tmpDir, art
logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
# Forward any GPG warnings:
- f = open(logFile, 'rb')
+ f = open(logFile, encoding='UTF-8')
for line in f.readlines():
if line.lower().find('warning') != -1 \
and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
and line.find('WARNING: using insecure memory') == -1:
- print ' GPG: %s' % line.strip()
+ print(' GPG: %s' % line.strip())
f.close()
sys.stdout.write('.')
- print
+ print()
def verifyPOMperBinaryArtifact(artifacts, version):
"""verify that each binary jar and war has a corresponding POM file"""
@@ -1023,17 +1030,20 @@ def crawl(downloadedFiles, urlString, ta
def main():
- if len(sys.argv) != 4:
- print
- print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
- print
+ if len(sys.argv) < 4:
+ print()
+ print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
+ print()
sys.exit(1)
baseURL = sys.argv[1]
version = sys.argv[2]
tmpDir = os.path.abspath(sys.argv[3])
+ isSigned = True
+ if len(sys.argv) == 5:
+ isSigned = (sys.argv[4] == "True")
- smokeTest(baseURL, version, tmpDir, True)
+ smokeTest(baseURL, version, tmpDir, isSigned)
def smokeTest(baseURL, version, tmpDir, isSigned):
@@ -1046,11 +1056,11 @@ def smokeTest(baseURL, version, tmpDir,
lucenePath = None
solrPath = None
- print
- print 'Load release URL "%s"...' % baseURL
+ print()
+ print('Load release URL "%s"...' % baseURL)
newBaseURL = unshortenURL(baseURL)
if newBaseURL != baseURL:
- print ' unshortened: %s' % newBaseURL
+ print(' unshortened: %s' % newBaseURL)
baseURL = newBaseURL
for text, subURL in getDirEntries(baseURL):
@@ -1064,23 +1074,28 @@ def smokeTest(baseURL, version, tmpDir,
if solrPath is None:
raise RuntimeError('could not find solr subdir')
- print
- print 'Test Lucene...'
+ print()
+ print('Test Lucene...')
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
unpack('lucene', tmpDir, artifact, version)
unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
- print
- print 'Test Solr...'
+ print()
+ print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
unpack('solr', tmpDir, artifact, version)
unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
- print 'Test Maven artifacts for Lucene and Solr...'
+ print('Test Maven artifacts for Lucene and Solr...')
checkMaven(baseURL, tmpDir, version, isSigned)
if __name__ == '__main__':
- main()
-
+ try:
+ main()
+ except:
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
+ sys.exit(0)
Modified: lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt Mon Aug 13 13:52:46 2012
@@ -6,11 +6,68 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 =======================
+======================= Lucene 4.0.0 =======================
+
+New Features
+
+* LUCENE-1888: Added the option to store payloads in the term
+ vectors (IndexableFieldType.storeTermVectorPayloads()). Note
+ that you must store term vector positions to store payloads.
+ (Robert Muir)
+
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+ Previously you had no real way to know that a term vector field
+ had positions or offsets, since this can be configured on a
+ per-field-per-document basis. (Robert Muir)
+
+* Removed DocsAndPositionsEnum.hasPayload() and simplified the
+ contract of getPayload(). It returns null if there is no payload,
+ otherwise returns the current payload. You can now call it multiple
+ times per position if you want. (Robert Muir)
+
+* Removed FieldsEnum. Fields API instead implements Iterable<String>
+ and exposes Iterator, so you can iterate over field names with
+ for (String field : fields) instead. (Robert Muir)
+
+Bug Fixes
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+ twice for conjunctions: for most users this is no problem, but
+ if you had a customized Similarity that returned something other
+ than 1 when overlap == maxOverlap (always the case for conjunctions),
+ then the score would be incorrect. (Pascal Chollet, Robert Muir)
+
+* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
+ did not work at all, it would infinitely recurse.
+ (Alberto Paro via Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+ had a custom Similarity where coord(1,1) != 1F, then the rewritten
+ query would be scored differently. (Robert Muir)
+
+* Don't allow negatives in the positions file. If you have an index
+ from 2.4.0 or earlier with such negative positions, and you already
+ upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run
+ CheckIndex. If it fails, then you need to upgrade again to 4.0 (Robert Muir)
+
+Build
+
+* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
+ thread leak detection. Added support for suite timeouts. (Dawid Weiss)
======================= Lucene 4.0.0-BETA =======================
New features
+* LUCENE-4249: Changed the explanation of the PayloadTermWeight to use the
+ underlying PayloadFunction's explanation as the explanation
+ for the payload score. (Scott Smerchek via Robert Muir)
+
+* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
+ such as primary keys (Mark Harwood, Mike McCandless)
+
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
iteration marks. (Robert Muir, Christian Moen)
@@ -22,6 +79,30 @@ New features
respect field boundaries in the case of highlighting for multivalued fields.
(Martijn van Groningen)
+* LUCENE-4227: Added DirectPostingsFormat, to hold all postings in
+ memory as uncompressed simple arrays. This uses a tremendous amount
+ of RAM but gives good search performance gains. (Mike McCandless)
+
+* LUCENE-2510, LUCENE-4044: Migrated Solr's Tokenizer-, TokenFilter-, and
+ CharFilterFactories to the lucene-analysis module. The API is still
+ experimental. (Chris Male, Robert Muir, Uwe Schindler)
+
+* LUCENE-4230: When pulling a DocsAndPositionsEnum you can now
+ specify whether or not you require payloads (in addition to
+ offsets); turning one or both off may allow some codec
+ implementations to optimize the enum implementation. (Robert Muir,
+ Mike McCandless)
+
+* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
+ int docID), to attempt deletion by docID as long as the provided
+ reader is an NRT reader, and the segment has not yet been merged
+ away (Mike McCandless).
+
+* LUCENE-4286: Added option to CJKBigramFilter to always also output
+ unigrams. This can be used for a unigram+bigram approach, or at
+ index-time only for better support of short queries.
+ (Tom Burton-West, Robert Muir)
+
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
@@ -46,6 +127,34 @@ API Changes
filter another reader and you override correct() for offset correction.
(Robert Muir)
+* LUCENE-4240: Analyzer api now just takes fieldName for getOffsetGap. If the
+ field is not analyzed (e.g. StringField), then the analyzer is not invoked
+ at all. If you want to tweak things like positionIncrementGap and offsetGap,
+ analyze the field with KeywordTokenizer instead. (Grant Ingersoll, Robert Muir)
+
+* LUCENE-4250: Pass fieldName to the PayloadFunction explain method, so it
+ parallels with docScore and the default implementation is correct.
+ (Robert Muir)
+
+* LUCENE-3747: Support Unicode 6.1.0. (Steve Rowe)
+
+* LUCENE-3884: Moved ElisionFilter out of org.apache.lucene.analysis.fr
+ package into org.apache.lucene.analysis.util. (Robert Muir)
+
+* LUCENE-4230: When pulling a DocsAndPositionsEnum you now pass an int
+ flags instead of the previous boolean needOffsets. Currently
+ recognized flags are DocsAndPositionsEnum.FLAG_PAYLOADS and
+ DocsAndPositionsEnum.FLAG_OFFSETS (Robert Muir, Mike McCandless)
+
+* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
+ instead of the previous boolean needsFlags; consistent with the changes
+ for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
+ is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
+
+* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
+ as the Store parameter didn't make sense: if you supplied Store.YES, you would only
+ receive an exception anyway. (Robert Muir)
+
Optimizations
* LUCENE-4171: Performance improvements to Packed64.
@@ -57,8 +166,19 @@ Optimizations
* LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
(Uwe Schindler)
+* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
+ making them substantially more lightweight. Behavior is unchanged.
+ (Robert Muir)
+
+* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
+ such as StandardTokenizer from 32kb to 8kb.
+ (Raintung Li, Steven Rowe, Robert Muir)
+
Bug Fixes
+* LUCENE-4109: BooleanQueries are not parsed correctly with the
+ flexible query parser. (Karsten Rauch via Robert Muir)
+
* LUCENE-4176: Fix AnalyzingQueryParser to analyze range endpoints as bytes,
so that it works correctly with Analyzers that produce binary non-UTF-8 terms
such as CollationAnalyzer. (Nattapong Sirilappanich via Robert Muir)
@@ -84,6 +204,36 @@ Bug Fixes
all queries. Made Scorer.freq() abstract.
(Koji Sekiguchi, Mike McCandless, Robert Muir)
+* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest,
+ and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
+
+* LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
+ non-interruptible. (Mark Miller, Uwe Schindler)
+
+* LUCENE-4190: restrict allowed filenames that a codec may create to
+ the patterns recognized by IndexFileNames. This also fixes
+ IndexWriter to only delete files matching this pattern from an index
+ directory, to reduce risk when the wrong index path is accidentally
+ passed to IndexWriter (Robert Muir, Mike McCandless)
+
+* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
+ instance are already checked out and queued up but not yet flushed.
+ (Simon Willnauer)
+
+* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+ (Johannes Christen, Uwe Schindler, Robert Muir)
+
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+ (Robert Muir)
+
+Changes in Runtime Behavior
+
+* LUCENE-4109: Enable position increments in the flexible queryparser by default.
+ (Karsten Rauch via Robert Muir)
+
+* LUCENE-3616: Field throws exception if you try to set a boost on an
+ unindexed field or one that omits norms. (Robert Muir)
+
Build
* LUCENE-4094: Support overriding file.encoding on forked test JVMs
@@ -1234,7 +1384,60 @@ Build
tasks) to correctly encode build file names as URIs for later processing by
XSL. (Greg Bowyer, Uwe Schindler)
+
+======================= Lucene 3.6.1 =======================
+More information about this release, including any errata related to the
+release notes, upgrade instructions, or other changes may be found online at:
+ https://wiki.apache.org/lucene-java/Lucene3.6.1
+
+Bug Fixes
+
+* LUCENE-3969: Throw IAE on bad arguments that could cause confusing
+ errors in KeywordTokenizer.
+ (Uwe Schindler, Mike McCandless, Robert Muir)
+
+* LUCENE-3971: MappingCharFilter could return invalid final token position.
+ (Dawid Weiss, Robert Muir)
+
+* LUCENE-4023: DisjunctionMaxScorer now implements visitSubScorers().
+ (Uwe Schindler)
+
+* LUCENE-2566: + - operators allow any amount of whitespace (yonik, janhoy)
+
+* LUCENE-3590: Fix AIOOBE in BytesRef/CharsRef copyBytes/copyChars when
+ offset is nonzero, fix off-by-one in CharsRef.subSequence, and fix
+ CharsRef's CharSequence methods to throw exceptions in boundary cases
+ to properly meet the specification. (Robert Muir)
+
+* LUCENE-4222: TieredMergePolicy.getFloorSegmentMB was returning the
+ size in bytes not MB (Chris Fuller via Mike McCandless)
+
+API Changes
+
+* LUCENE-4023: Changed the visibility of Scorer#visitSubScorers() to
+ public, otherwise it's impossible to implement Scorers outside
+ the Lucene package. (Uwe Schindler)
+
+Optimizations
+
+* LUCENE-4163: Improve concurrency of MMapIndexInput.clone() by using
+ the new WeakIdentityMap on top of a ConcurrentHashMap to manage
+ the cloned instances. WeakIdentityMap was extended to support
+ iterating over its keys. (Uwe Schindler)
+
+Tests
+
+* LUCENE-3873: add MockGraphTokenFilter, testing analyzers with
+ random graph tokens. (Mike McCandless)
+
+* LUCENE-3968: factor out LookaheadTokenFilter from
+ MockGraphTokenFilter (Mike Mccandless)
+
+
======================= Lucene 3.6.0 =======================
+More information about this release, including any errata related to the
+release notes, upgrade instructions, or other changes may be found online at:
+ https://wiki.apache.org/lucene-java/Lucene3.6
Changes in backwards compatibility policy
@@ -1290,7 +1493,7 @@ Changes in backwards compatibility polic
* LUCENE-3712: Removed unused and untested ReaderUtil#subReader methods.
(Uwe Schindler)
-
+
* LUCENE-3672: Deprecate Directory.fileModified,
IndexCommit.getTimestamp and .getVersion and
IndexReader.lastModified and getCurrentVersion (Andrzej Bialecki,
@@ -1313,6 +1516,10 @@ Changes in backwards compatibility polic
* LUCENE-3738: All readXxx methods in BufferedIndexInput were made
final. Subclasses should only override protected readInternal /
seekInternal. (Uwe Schindler)
+
+* LUCENE-2599: Deprecated the spatial contrib module, which was buggy and not
+ well maintained. Lucene 4 includes a new spatial module that replaces this.
+ (David Smiley, Ryan McKinley, Chris Male)
Changes in Runtime Behavior
@@ -1354,7 +1561,7 @@ API Changes
query time, wrap your IndexReader using FilterIndexReader, overriding
FilterIndexReader.norms(). To persist the changes on disk, copy the
FilteredIndexReader to a new index using IndexWriter.addIndexes().
- In Lucene 4.0, Similarity will allow you to customize scoring
+ In Lucene 4.0, SimilarityProvider will allow you to customize scoring
using external norms, too. (Uwe Schindler, Robert Muir)
* LUCENE-3735: PayloadProcessorProvider was changed to return a
@@ -1379,7 +1586,7 @@ API Changes
never applying deletes). (MJB, Shai Erera, Mike McCandless)
* LUCENE-3761: Generalize SearcherManager into an abstract ReferenceManager.
- SearcherManager remains a concrete class, but due to the refactoring, the
+ SearcherManager remains a concrete class, but due to the refactoring, the
method maybeReopen has been deprecated in favor of maybeRefresh().
(Shai Erera, Mike McCandless, Simon Willnauer)
@@ -1404,7 +1611,7 @@ New Features
queries. Literal asterisks may be represented by quoting or escaping
(i.e. \* or "*") Custom QueryParser subclasses overriding getRangeQuery()
will be passed null for any open endpoint. (Ingo Renner, Adriano
- Crestani, yonik, Mike McCandless
+ Crestani, yonik, Mike McCandless
* LUCENE-3121: Add sugar reverse lookup (given an output, find the
input mapping to it) for FSTs that have strictly monotonic long
@@ -1424,7 +1631,7 @@ New Features
* LUCENE-3789: Expose MTQ TermsEnum via RewriteMethod for non package private
access (Simon Willnauer)
-
+
* LUCENE-3881: Added UAX29URLEmailAnalyzer: a standard analyzer that recognizes
URLs and emails. (Steve Rowe)
Modified: lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt Mon Aug 13 13:52:46 2012
@@ -9,7 +9,7 @@ enumeration APIs. Here are the major ch
by the BytesRef class (which provides an offset + length "slice"
into an existing byte[]).
- * Fields are separately enumerated (FieldsEnum) from the terms
+ * Fields are separately enumerated (Fields.iterator()) from the terms
within each field (TermEnum). So instead of this:
TermEnum termsEnum = ...;
@@ -20,10 +20,8 @@ enumeration APIs. Here are the major ch
Do this:
- FieldsEnum fieldsEnum = ...;
- String field;
- while((field = fieldsEnum.next()) != null) {
- TermsEnum termsEnum = fieldsEnum.terms();
+ for(String field : fields) {
+ TermsEnum termsEnum = fields.terms(field);
BytesRef text;
while((text = termsEnum.next()) != null) {
System.out.println("field=" + field + "; text=" + text.utf8ToString());
@@ -57,30 +55,6 @@ enumeration APIs. Here are the major ch
...
}
- The bulk read API has also changed. Instead of this:
-
- int[] docs = new int[256];
- int[] freqs = new int[256];
-
- while(true) {
- int count = td.read(docs, freqs)
- if (count == 0) {
- break;
- }
- // use docs[i], freqs[i]
- }
-
- do this:
-
- DocsEnum.BulkReadResult bulk = td.getBulkResult();
- while(true) {
- int count = td.read();
- if (count == 0) {
- break;
- }
- // use bulk.docs.ints[i] and bulk.freqs.ints[i]
- }
-
* TermPositions is renamed to DocsAndPositionsEnum, and no longer
extends the docs only enumerator (DocsEnum).
@@ -170,7 +144,7 @@ enumeration APIs. Here are the major ch
Bits liveDocs = reader.getLiveDocs();
DocsEnum docsEnum = null;
- docsEnum = termsEnum.docs(liveDocs, docsEnum);
+ docsEnum = termsEnum.docs(liveDocs, docsEnum, needsFreqs);
You can pass in a prior DocsEnum and it will be reused if possible.
@@ -187,7 +161,7 @@ enumeration APIs. Here are the major ch
String field;
BytesRef text;
- DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text);
+ DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text, needsFreqs);
Likewise for DocsAndPositionsEnum.
@@ -340,11 +314,12 @@ an AtomicReader. Note: using "atomicity
slowdowns due to the need to merge terms, postings, DocValues, and
FieldCache, use them with care!
-## LUCENE-2413: Analyzer package changes
+## LUCENE-2413,LUCENE-3396: Analyzer package changes
Lucene's core and contrib analyzers, along with Solr's analyzers,
were consolidated into lucene/analysis. During the refactoring some
-package names have changed:
+package names have changed, and ReusableAnalyzerBase was renamed to
+Analyzer:
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@@ -369,7 +344,7 @@ package names have changed:
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
- - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+ - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
@@ -629,3 +604,8 @@ you can now do this:
instance exposing the inverted index of the one document. From
Fields you can enumerate all fields, terms, positions, offsets.
+* LUCENE-4227: If you were previously using Instantiated index, you
+ may want to use DirectPostingsFormat after upgrading: it stores all
+ postings in simple arrrays (byte[] for terms, int[] for docs, freqs,
+ positions, offsets). Note that this only covers postings, whereas
+ Instantiated covered all other parts of the index as well.
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java Mon Aug 13 13:52:46 2012
@@ -24,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowba
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
@@ -127,7 +127,7 @@ public final class CatalanAnalyzer exten
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
- result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+ result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro Mon Aug 13 13:52:46 2012
@@ -14,43 +14,49 @@
* limitations under the License.
*/
-// Generated using ICU4J 4.8.1.1 on Friday, January 13, 2012 6:20:39 PM UTC
+// Generated using ICU4J 49.1.0.0 on Sunday, July 15, 2012 5:42:00 AM UTC
// by org.apache.lucene.analysis.icu.GenerateHTMLStripCharFilterSupplementaryMacros
ID_Start_Supp = (
- [\uD81A][\uDC00-\uDE38]
+ [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+ | [\uD81A][\uDC00-\uDE38]
| [\uD869][\uDC00-\uDED6\uDF00-\uDFFF]
| [\uD835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]
| [\uD80C\uD840-\uD868\uD86A-\uD86C][\uDC00-\uDFFF]
| [\uD82C][\uDC00\uDC01]
- | [\uD804][\uDC03-\uDC37\uDC83-\uDCAF]
| [\uD86D][\uDC00-\uDF34\uDF40-\uDFFF]
+ | [\uD81B][\uDF00-\uDF44\uDF50\uDF93-\uDF9F]
| [\uD87E][\uDC00-\uDE1D]
+ | [\uD804][\uDC03-\uDC37\uDC83-\uDCAF\uDCD0-\uDCE8\uDD03-\uDD26\uDD83-\uDDB2\uDDC1-\uDDC4]
+ | [\uD83B][\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]
| [\uD809][\uDC00-\uDC62]
| [\uD808][\uDC00-\uDF6E]
| [\uD803][\uDC00-\uDC48]
| [\uD800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]
| [\uD80D][\uDC00-\uDC2E]
+ | [\uD805][\uDE80-\uDEAA]
| [\uD86E][\uDC00-\uDC1D]
- | [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
| [\uD801][\uDC00-\uDC9D]
)
ID_Continue_Supp = (
[\uD81A][\uDC00-\uDE38]
| [\uD869][\uDC00-\uDED6\uDF00-\uDFFF]
| [\uD80C\uD840-\uD868\uD86A-\uD86C][\uDC00-\uDFFF]
- | [\uD804][\uDC00-\uDC46\uDC66-\uDC6F\uDC80-\uDCBA]
| [\uD82C][\uDC00\uDC01]
- | [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE38-\uDE3A\uDE3F\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+ | [\uD81B][\uDF00-\uDF44\uDF50-\uDF7E\uDF8F-\uDF9F]
| [\uD801][\uDC00-\uDC9D\uDCA0-\uDCA9]
| [\uD86D][\uDC00-\uDF34\uDF40-\uDFFF]
| [\uD87E][\uDC00-\uDE1D]
+ | [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE38-\uDE3A\uDE3F\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+ | [\uD805][\uDE80-\uDEB7\uDEC0-\uDEC9]
+ | [\uD83B][\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]
| [\uD809][\uDC00-\uDC62]
| [\uD808][\uDC00-\uDF6E]
| [\uD803][\uDC00-\uDC48]
| [\uD80D][\uDC00-\uDC2E]
| [\uD800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDDFD\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]
+ | [\uD804][\uDC00-\uDC46\uDC66-\uDC6F\uDC80-\uDCBA\uDCD0-\uDCE8\uDCF0-\uDCF9\uDD00-\uDD34\uDD36-\uDD3F\uDD80-\uDDC4\uDDD0-\uDDD9]
| [\uD86E][\uDC00-\uDC1D]
| [\uDB40][\uDD00-\uDDEF]
| [\uD834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]