You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/08/13 15:53:27 UTC
svn commit: r1372423 [3/45] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/analysis/common/ dev-tools/maven/lucene/analysis/icu/ de...

Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavaDocs.py Mon Aug 13 13:52:46 2012
@@ -23,7 +23,7 @@ reMarkup = re.compile('<.*?>')
 
 def checkSummary(fullPath):
   printed = False
-  f = open(fullPath)
+  f = open(fullPath, encoding='UTF-8')
   anyMissing = False
   sawPackage = False
   desc = []
@@ -41,10 +41,10 @@ def checkSummary(fullPath):
           desc = desc.strip()
           if desc == '':
             if not printed:
-              print
-              print fullPath
+              print()
+              print(fullPath)
               printed = True
-            print '  no package description (missing package.html in src?)'
+            print('  no package description (missing package.html in src?)')
             anyMissing = True
           desc = None
         else:
@@ -52,17 +52,17 @@ def checkSummary(fullPath):
       
     if lineLower in ('<td>&nbsp;</td>', '<td></td>', '<td class="collast">&nbsp;</td>'):
       if not printed:
-        print
-        print fullPath
+        print()
+        print(fullPath)
         printed = True
-      print '  missing: %s' % unescapeHTML(lastHREF)
+      print('  missing: %s' % unescapeHTML(lastHREF))
       anyMissing = True
     elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
       if not printed:
-        print
-        print fullPath
+        print()
+        print(fullPath)
         printed = True
-      print '  license-is-javadoc: %s' % unescapeHTML(lastHREF)
+      print('  license-is-javadoc: %s' % unescapeHTML(lastHREF))
       anyMissing = True
     m = reHREF.search(line)
     if m is not None:
@@ -85,17 +85,17 @@ def checkPackageSummaries(root, level='c
   """
 
   if level != 'class' and level != 'package':
-    print 'unsupported level: %s, must be "class" or "package"' % level
+    print('unsupported level: %s, must be "class" or "package"' % level)
     sys.exit(1)
   
   #for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
 
   if False:
     os.chdir(root)
-    print
-    print 'Run "ant javadocs" > javadocs.log...'
+    print()
+    print('Run "ant javadocs" > javadocs.log...')
     if os.system('ant javadocs > javadocs.log 2>&1'):
-      print '  FAILED'
+      print('  FAILED')
       sys.exit(1)
     
   anyMissing = False
@@ -116,14 +116,14 @@ def checkPackageSummaries(root, level='c
 
 if __name__ == '__main__':
   if len(sys.argv) < 2 or len(sys.argv) > 3:
-    print 'usage: %s <dir> [class|package]' % sys.argv[0]
+    print('usage: %s <dir> [class|package]' % sys.argv[0])
     sys.exit(1)
   if len(sys.argv) == 2:
     level = 'class'
   else:
     level = sys.argv[2]
   if checkPackageSummaries(sys.argv[1], level):
-    print
-    print 'Missing javadocs were found!'
+    print()
+    print('Missing javadocs were found!')
     sys.exit(1)
   sys.exit(0)

Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/checkJavadocLinks.py Mon Aug 13 13:52:46 2012
@@ -65,7 +65,7 @@ class FindHyperlinks(HTMLParser):
         href = href.strip()
         self.links.append(urlparse.urljoin(self.baseURL, href))
       else:
-        if self.baseURL.endswith(os.path.sep + 'AttributeSource.html'):
+        if self.baseURL.endswith('/AttributeSource.html'):
           # LUCENE-4010: AttributeSource's javadocs has an unescaped <A> generics!!  Seems to be a javadocs bug... (fixed in Java 7)
           pass
         else:
@@ -126,7 +126,7 @@ def checkAll(dirName):
          main not in ('deprecated-list',):
         # Somehow even w/ java 7 generaged javadocs,
         # deprecated-list.html can fail to escape generics types
-        fullPath = os.path.join(root, f)
+        fullPath = os.path.join(root, f).replace(os.path.sep,'/')
         #print '  %s' % fullPath
         allFiles[fullPath] = parse(fullPath, open('%s/%s' % (root, f), encoding='UTF-8').read())
 
@@ -193,6 +193,14 @@ def checkAll(dirName):
         # see LUCENE-4011: this is a javadocs bug for constants 
         # on annotations it seems?
         pass
+      elif link.startswith('file:'):
+        filepath = urlparse.unquote(urlparse.urlparse(link).path)
+        if not (os.path.exists(filepath) or os.path.exists(filepath[1:])):
+          if not printed:
+            printed = True
+            print()
+            print(fullPath)
+          print('  BROKEN LINK: %s' % link)
       elif link not in allFiles:
         # We only load HTML... so if the link is another resource (eg
         # SweetSpotSimilarity refs

Modified: lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/LUCENE-2878/dev-tools/scripts/smokeTestRelease.py Mon Aug 13 13:52:46 2012
@@ -20,12 +20,12 @@ import subprocess
 import signal
 import shutil
 import hashlib
-import httplib
+import http.client
 import re
-import urllib2
-import urlparse
+import urllib.request, urllib.error, urllib.parse
+import urllib.parse
 import sys
-import HTMLParser
+import html.parser
 from collections import defaultdict
 import xml.etree.ElementTree as ET
 import filecmp
@@ -38,9 +38,9 @@ import checkJavadocLinks
 # tested on Linux and on Cygwin under Windows 7.
 
 def unshortenURL(url):
-  parsed = urlparse.urlparse(url)
+  parsed = urllib.parse.urlparse(url)
   if parsed[0] in ('http', 'https'):
-    h = httplib.HTTPConnection(parsed.netloc)
+    h = http.client.HTTPConnection(parsed.netloc)
     h.request('HEAD', parsed.path)
     response = h.getresponse()
     if response.status/100 == 3 and response.getheader('Location'):
@@ -58,7 +58,7 @@ def javaExe(version):
 
 def verifyJavaVersion(version):
   s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
-  if s.find('java version "%s.' % version) == -1:
+  if s.find(' version "%s.' % version) == -1:
     raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
 
 # http://s.apache.org/lusolr32rc2
@@ -101,8 +101,8 @@ def getHREFs(urlString):
 
   # Deref any redirects
   while True:
-    url = urlparse.urlparse(urlString)
-    h = httplib.HTTPConnection(url.netloc)
+    url = urllib.parse.urlparse(urlString)
+    h = http.client.HTTPConnection(url.netloc)
     h.request('GET', url.path)
     r = h.getresponse()
     newLoc = r.getheader('location')
@@ -112,8 +112,8 @@ def getHREFs(urlString):
       break
 
   links = []
-  for subUrl, text in reHREF.findall(urllib2.urlopen(urlString).read()):
-    fullURL = urlparse.urljoin(urlString, subUrl)
+  for subUrl, text in reHREF.findall(urllib.request.urlopen(urlString).read().decode('UTF-8')):
+    fullURL = urllib.parse.urljoin(urlString, subUrl)
     links.append((text, fullURL))
   return links
 
@@ -121,15 +121,15 @@ def download(name, urlString, tmpDir, qu
   fileName = '%s/%s' % (tmpDir, name)
   if DEBUG and os.path.exists(fileName):
     if not quiet and fileName.find('.asc') == -1:
-      print '    already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
+      print('    already done: %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
     return
-  fIn = urllib2.urlopen(urlString)
+  fIn = urllib.request.urlopen(urlString)
   fOut = open(fileName, 'wb')
   success = False
   try:
     while True:
       s = fIn.read(65536)
-      if s == '':
+      if s == b'':
         break
       fOut.write(s)
     fOut.close()
@@ -141,14 +141,14 @@ def download(name, urlString, tmpDir, qu
     if not success:
       os.remove(fileName)
   if not quiet and fileName.find('.asc') == -1:
-    print '    %.1f MB' % (os.path.getsize(fileName)/1024./1024.)
+    print('    %.1f MB' % (os.path.getsize(fileName)/1024./1024.))
     
 def load(urlString):
-  return urllib2.urlopen(urlString).read()
+  return urllib.request.urlopen(urlString).read().decode('utf-8')
   
 def checkSigs(project, urlString, version, tmpDir, isSigned):
 
-  print '  test basics...'
+  print('  test basics...')
   ents = getDirEntries(urlString)
   artifact = None
   keysURL = None
@@ -210,7 +210,7 @@ def checkSigs(project, urlString, versio
   if keysURL is None:
     raise RuntimeError('%s is missing KEYS' % project)
 
-  print '  get KEYS'
+  print('  get KEYS')
   download('%s.KEYS' % project, keysURL, tmpDir)
 
   keysFile = '%s/%s.KEYS' % (tmpDir, project)
@@ -219,7 +219,7 @@ def checkSigs(project, urlString, versio
   gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
   if os.path.exists(gpgHomeDir):
     shutil.rmtree(gpgHomeDir)
-  os.makedirs(gpgHomeDir, 0700)
+  os.makedirs(gpgHomeDir, 0o700)
   run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
       '%s/%s.gpg.import.log 2>&1' % (tmpDir, project))
 
@@ -232,12 +232,12 @@ def checkSigs(project, urlString, versio
     testChanges(project, version, changesURL)
 
   for artifact, urlString in artifacts:
-    print '  download %s...' % artifact
+    print('  download %s...' % artifact)
     download(artifact, urlString, tmpDir)
     verifyDigests(artifact, urlString, tmpDir)
 
     if isSigned:
-      print '    verify sig'
+      print('    verify sig')
       # Test sig (this is done with a clean brand-new GPG world)
       download(artifact + '.asc', urlString + '.asc', tmpDir)
       sigFile = '%s/%s.asc' % (tmpDir, artifact)
@@ -246,28 +246,28 @@ def checkSigs(project, urlString, versio
       run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
           logFile)
       # Forward any GPG warnings, except the expected one (since its a clean world)
-      f = open(logFile, 'rb')
+      f = open(logFile, encoding='UTF-8')
       for line in f.readlines():
         if line.lower().find('warning') != -1 \
         and line.find('WARNING: This key is not certified with a trusted signature') == -1:
-          print '      GPG: %s' % line.strip()
+          print('      GPG: %s' % line.strip())
       f.close()
 
       # Test trust (this is done with the real users config)
       run('gpg --import %s' % (keysFile),
           '%s/%s.gpg.trust.import.log 2>&1' % (tmpDir, project))
-      print '    verify trust'
+      print('    verify trust')
       logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
       run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
       # Forward any GPG warnings:
-      f = open(logFile, 'rb')
+      f = open(logFile, encoding='UTF-8')
       for line in f.readlines():
         if line.lower().find('warning') != -1:
-          print '      GPG: %s' % line.strip()
+          print('      GPG: %s' % line.strip())
       f.close()
 
 def testChanges(project, version, changesURLString):
-  print '  check changes HTML...'
+  print('  check changes HTML...')
   changesURL = None
   for text, subURL in getDirEntries(changesURLString):
     if text == 'Changes.html':
@@ -287,7 +287,7 @@ def testChangesText(dir, version, projec
     if 'CHANGES.txt' in files:
       fullPath = '%s/CHANGES.txt' % root
       #print 'CHECK %s' % fullPath
-      checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
+      checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False)
       
 def checkChangesContent(s, version, name, project, isHTML):
 
@@ -336,7 +336,7 @@ def run(command, logFile):
     raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath))
     
 def verifyDigests(artifact, urlString, tmpDir):
-  print '    verify md5/sha1 digests'
+  print('    verify md5/sha1 digests')
   md5Expected, t = load(urlString + '.md5').strip().split()
   if t != '*'+artifact:
     raise RuntimeError('MD5 %s.md5 lists artifact %s but expected *%s' % (urlString, t, artifact))
@@ -347,10 +347,10 @@ def verifyDigests(artifact, urlString, t
   
   m = hashlib.md5()
   s = hashlib.sha1()
-  f = open('%s/%s' % (tmpDir, artifact))
+  f = open('%s/%s' % (tmpDir, artifact), 'rb')
   while True:
     x = f.read(65536)
-    if x == '':
+    if len(x) == 0:
       break
     m.update(x)
     s.update(x)
@@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, t
     raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
 
 def getDirEntries(urlString):
+  if urlString.startswith('file:/') and not urlString.startswith('file://'):
+    # stupid bogus ant URI
+    urlString = "file:///" + urlString[6:]
+
   if urlString.startswith('file://'):
     path = urlString[7:]
     if path.endswith('/'):
@@ -388,7 +392,7 @@ def unpack(project, tmpDir, artifact, ve
     shutil.rmtree(destDir)
   os.makedirs(destDir)
   os.chdir(destDir)
-  print '    unpack %s...' % artifact
+  print('    unpack %s...' % artifact)
   unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact)
   if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'):
     run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile)
@@ -437,12 +441,14 @@ def verifyUnpacked(project, artifact, un
 
   if project == 'lucene':
     # TODO: clean this up to not be a list of modules that we must maintain
-    extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework')
+    extras = ('analysis', 'benchmark', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
     if isSrc:
       extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
   else:
     extras = ()
 
+  # TODO: if solr, verify lucene/licenses, solr/licenses are present
+
   for e in extras:
     if e not in l:
       raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact))
@@ -453,81 +459,81 @@ def verifyUnpacked(project, artifact, un
       raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l))
 
   if isSrc:
-    print '    make sure no JARs/WARs in src dist...'
+    print('    make sure no JARs/WARs in src dist...')
     lines = os.popen('find . -name \\*.jar').readlines()
     if len(lines) != 0:
-      print '    FAILED:'
+      print('    FAILED:')
       for line in lines:
-        print '      %s' % line.strip()
+        print('      %s' % line.strip())
       raise RuntimeError('source release has JARs...')
     lines = os.popen('find . -name \\*.war').readlines()
     if len(lines) != 0:
-      print '    FAILED:'
+      print('    FAILED:')
       for line in lines:
-        print '      %s' % line.strip()
+        print('      %s' % line.strip())
       raise RuntimeError('source release has WARs...')
 
-    print '    run "ant validate"'
+    print('    run "ant validate"')
     run('%s; ant validate' % javaExe('1.7'), '%s/validate.log' % unpackPath)
 
     if project == 'lucene':
-      print '    run tests w/ Java 6...'
+      print('    run tests w/ Java 6...')
       run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
       run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath)
       testDemo(isSrc, version)
       # test javadocs
-      print '    generate javadocs w/ Java 6...'
+      print('    generate javadocs w/ Java 6...')
       run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
       checkJavadocpath('%s/build/docs' % unpackPath)
     else:
-      print '    run tests w/ Java 6...'
+      print('    run tests w/ Java 6...')
       run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath)
 
       # test javadocs
-      print '    generate javadocs w/ Java 6...'
+      print('    generate javadocs w/ Java 6...')
       run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath)
       checkJavadocpath('%s/build/docs' % unpackPath)
 
-      print '    run tests w/ Java 7...'
+      print('    run tests w/ Java 7...')
       run('%s; ant test' % javaExe('1.7'), '%s/test.log' % unpackPath)
  
       # test javadocs
-      print '    generate javadocs w/ Java 7...'
+      print('    generate javadocs w/ Java 7...')
       run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath)
       checkJavadocpath('%s/build/docs' % unpackPath)
 
       os.chdir('solr')
-      print '    test solr example w/ Java 6...'
+      print('    test solr example w/ Java 6...')
       run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath)
       testSolrExample(unpackPath, JAVA6_HOME, True)
 
-      print '    test solr example w/ Java 7...'
+      print('    test solr example w/ Java 7...')
       run('%s; ant clean example' % javaExe('1.7'), '%s/antexample.log' % unpackPath)
       testSolrExample(unpackPath, JAVA7_HOME, True)
       os.chdir('..')
 
-      print '    check NOTICE'
+      print('    check NOTICE')
       testNotice(unpackPath)
 
   else:
     if project == 'lucene':
       testDemo(isSrc, version)
     else:
-      print '    test solr example w/ Java 6...'
+      print('    test solr example w/ Java 6...')
       testSolrExample(unpackPath, JAVA6_HOME, False)
 
-      print '    test solr example w/ Java 7...'
+      print('    test solr example w/ Java 7...')
       testSolrExample(unpackPath, JAVA7_HOME, False)
 
   testChangesText('.', version, project)
 
   if project == 'lucene' and not isSrc:
-    print '    check Lucene\'s javadoc JAR'
+    print('    check Lucene\'s javadoc JAR')
     checkJavadocpath('%s/docs' % unpackPath)
 
 def testNotice(unpackPath):
-  solrNotice = open('%s/NOTICE.txt' % unpackPath).read()
-  luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath).read()
+  solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
+  luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read()
 
   expected = """
 =========================================================================
@@ -545,12 +551,12 @@ def readSolrOutput(p, startupEvent, logF
   try:
     while True:
       line = p.readline()
-      if line == '':
+      if len(line) == 0:
         break
       f.write(line)
       f.flush()
       # print 'SOLR: %s' % line.strip()
-      if line.find('Started SocketConnector@0.0.0.0:8983') != -1:
+      if line.decode('UTF-8').find('Started SocketConnector@0.0.0.0:8983') != -1:
         startupEvent.set()
   finally:
     f.close()
@@ -558,7 +564,7 @@ def readSolrOutput(p, startupEvent, logF
 def testSolrExample(unpackPath, javaPath, isSrc):
   logFile = '%s/solr-example.log' % unpackPath
   os.chdir('example')
-  print '      start Solr instance (log=%s)...' % logFile
+  print('      start Solr instance (log=%s)...' % logFile)
   env = {}
   env.update(os.environ)
   env['JAVA_HOME'] = javaPath
@@ -572,21 +578,21 @@ def testSolrExample(unpackPath, javaPath
 
   # Make sure Solr finishes startup:
   startupEvent.wait()
-  print '      startup done'
+  print('      startup done')
   
   try:
-    print '      test utf8...'
+    print('      test utf8...')
     run('sh ./exampledocs/test_utf8.sh', 'utf8.log')
-    print '      index example docs...'
+    print('      index example docs...')
     run('sh ./exampledocs/post.sh ./exampledocs/*.xml', 'post-example-docs.log')
-    print '      run query...'
-    s = urllib2.urlopen('http://localhost:8983/solr/select/?q=video').read()
+    print('      run query...')
+    s = urllib.request.urlopen('http://localhost:8983/solr/select/?q=video').read().decode('UTF-8')
     if s.find('<result name="response" numFound="3" start="0">') == -1:
-      print 'FAILED: response is:\n%s' % s
+      print('FAILED: response is:\n%s' % s)
       raise RuntimeError('query on solr example instance failed')
   finally:
     # Stop server:
-    print '      stop server (SIGINT)...'
+    print('      stop server (SIGINT)...')
     os.kill(server.pid, signal.SIGINT)
 
     # Give it 10 seconds to gracefully shut down
@@ -594,14 +600,14 @@ def testSolrExample(unpackPath, javaPath
 
     if serverThread.isAlive():
       # Kill server:
-      print '***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...'
+      print('***WARNING***: Solr instance didn\'t respond to SIGINT; using SIGKILL now...')
       os.kill(server.pid, signal.SIGKILL)
 
       serverThread.join(10.0)
 
       if serverThread.isAlive():
         # Shouldn't happen unless something is seriously wrong...
-        print '***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...'
+        print('***WARNING***: Solr instance didn\'t respond to SIGKILL; ignoring...')
 
   os.chdir('..')
     
@@ -615,13 +621,13 @@ def checkJavadocpath(path):
   if checkJavaDocs.checkPackageSummaries(path):
     # disabled: RM cannot fix all this, see LUCENE-3887
     # raise RuntimeError('javadoc problems')
-    print '\n***WARNING***: javadocs want to fail!\n'
+    print('\n***WARNING***: javadocs want to fail!\n')
 
   if checkJavadocLinks.checkAll(path):
     raise RuntimeError('broken javadocs links found!')
 
 def testDemo(isSrc, version):
-  print '    test demo...'
+  print('    test demo...')
   sep = ';' if cygwin else ':'
   if isSrc:
     cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep)
@@ -632,14 +638,14 @@ def testDemo(isSrc, version):
   run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log')
   run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log')
   reMatchingDocs = re.compile('(\d+) total matching documents')
-  m = reMatchingDocs.search(open('search.log', 'rb').read())
+  m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read())
   if m is None:
     raise RuntimeError('lucene demo\'s SearchFiles found no results')
   else:
     numHits = int(m.group(1))
     if numHits < 100:
       raise RuntimeError('lucene demo\'s SearchFiles found too few results: %s' % numHits)
-    print '      got %d hits for query "lucene"' % numHits
+    print('      got %d hits for query "lucene"' % numHits)
 
 def checkMaven(baseURL, tmpDir, version, isSigned):
   # Locate the release branch in subversion
@@ -652,11 +658,11 @@ def checkMaven(baseURL, tmpDir, version,
     if text == releaseBranchText:
       releaseBranchSvnURL = subURL
 
-  print '    get POM templates',
+  print('    get POM templates', end=' ')
   POMtemplates = defaultdict()
   getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
-  print
-  print '    download artifacts',
+  print()
+  print('    download artifacts', end=' ')
   artifacts = {'lucene': [], 'solr': []}
   for project in ('lucene', 'solr'):
     artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
@@ -664,30 +670,30 @@ def checkMaven(baseURL, tmpDir, version,
     if not os.path.exists(targetDir):
       os.makedirs(targetDir)
     crawl(artifacts[project], artifactsURL, targetDir)
-  print
-  print '    verify that each binary artifact has a deployed POM...'
+  print()
+  print('    verify that each binary artifact has a deployed POM...')
   verifyPOMperBinaryArtifact(artifacts, version)
-  print '    verify that there is an artifact for each POM template...'
+  print('    verify that there is an artifact for each POM template...')
   verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
-  print "    verify Maven artifacts' md5/sha1 digests..."
+  print("    verify Maven artifacts' md5/sha1 digests...")
   verifyMavenDigests(artifacts)
-  print '    verify that all non-Mavenized deps are deployed...'
+  print('    verify that all non-Mavenized deps are deployed...')
   nonMavenizedDeps = dict()
   checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
                         version, releaseBranchSvnURL)
-  print '    check for javadoc and sources artifacts...'
+  print('    check for javadoc and sources artifacts...')
   checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
-  print "    verify deployed POMs' coordinates..."
+  print("    verify deployed POMs' coordinates...")
   verifyDeployedPOMsCoordinates(artifacts, version)
   if isSigned:
-    print '    verify maven artifact sigs',
+    print('    verify maven artifact sigs', end=' ')
     verifyMavenSigs(baseURL, tmpDir, artifacts)
 
   distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
 
-  print '    verify that non-Mavenized deps are same as in the binary distribution...'
+  print('    verify that non-Mavenized deps are same as in the binary distribution...')
   checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
-  print '    verify that Maven artifacts are same as in the binary distribution...'
+  print('    verify that Maven artifacts are same as in the binary distribution...')
   checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
 
 def getDistributionsForMavenChecks(tmpDir, version, baseURL):
@@ -697,19 +703,19 @@ def getDistributionsForMavenChecks(tmpDi
     if project == 'solr': distribution = 'apache-' + distribution
     if not os.path.exists('%s/%s' % (tmpDir, distribution)):
       distURL = '%s/%s/%s' % (baseURL, project, distribution)
-      print '    download %s...' % distribution,
+      print('    download %s...' % distribution, end=' ')
       download(distribution, distURL, tmpDir)
     destDir = '%s/unpack-%s-maven' % (tmpDir, project)
     if os.path.exists(destDir):
       shutil.rmtree(destDir)
     os.makedirs(destDir)
     os.chdir(destDir)
-    print '    unpack %s...' % distribution
+    print('    unpack %s...' % distribution)
     unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
     run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
     if project == 'solr': # unpack the Solr war
       unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
-      print '        unpack Solr war...'
+      print('        unpack Solr war...')
       run('jar xvf */dist/*.war', unpackLogFile)
     distributionFiles[project] = []
     for root, dirs, files in os.walk(destDir):
@@ -719,7 +725,7 @@ def getDistributionsForMavenChecks(tmpDi
 def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
   for project in ('lucene', 'solr'):
     for artifact in artifacts[project]:
-      if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps.keys():
+      if artifact.endswith(version + '.jar') and artifact not in list(nonMavenizedDeps.keys()):
         javadocJar = artifact[:-4] + '-javadoc.jar'
         if javadocJar not in artifacts[project]:
           raise RuntimeError('missing: %s' % javadocJar)
@@ -732,7 +738,7 @@ def checkIdenticalNonMavenizedDeps(distr
     distFilenames = dict()
     for file in distributionFiles[project]:
       distFilenames[os.path.basename(file)] = file
-    for dep in nonMavenizedDeps.keys():
+    for dep in list(nonMavenizedDeps.keys()):
       if ('/%s/' % project) in dep:
         depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
         if not depOrigFilename in distFilenames:
@@ -753,9 +759,9 @@ def checkIdenticalMavenArtifacts(distrib
       distFilenames[baseName] = file
     for artifact in artifacts[project]:
       if reJarWar.search(artifact):
-        if artifact not in nonMavenizedDeps.keys():
+        if artifact not in list(nonMavenizedDeps.keys()):
           artifactFilename = os.path.basename(artifact)
-          if artifactFilename not in distFilenames.keys():
+          if artifactFilename not in list(distFilenames.keys()):
             raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
                               % (artifact, project))
          # TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
@@ -772,16 +778,17 @@ def verifyMavenDigests(artifacts):
         raise RuntimeError('missing: MD5 digest for %s' % artifactFile)
       if artifactFile + '.sha1' not in artifacts[project]:
         raise RuntimeError('missing: SHA1 digest for %s' % artifactFile)
-      with open(artifactFile + '.md5', 'r') as md5File:
+      with open(artifactFile + '.md5', encoding='UTF-8') as md5File:
         md5Expected = md5File.read().strip()
-      with open(artifactFile + '.sha1', 'r') as sha1File:
+      with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File:
         sha1Expected = sha1File.read().strip()
       md5 = hashlib.md5()
       sha1 = hashlib.sha1()
-      inputFile = open(artifactFile)
+      inputFile = open(artifactFile, 'rb')
       while True:
         bytes = inputFile.read(65536)
-        if bytes == '': break
+        if len(bytes) == 0:
+          break
         md5.update(bytes)
         sha1.update(bytes)
       inputFile.close()
@@ -846,7 +853,7 @@ def checkNonMavenizedDeps(nonMavenizedDe
                 if releaseBranchSvnURL is None:
                   pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
                   if os.path.exists(pomPath):
-                    doc2 = ET.XML(open(pomPath).read())
+                    doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
                     break
                 else:
                   entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
@@ -891,7 +898,7 @@ def verifyMavenSigs(baseURL, tmpDir, art
     gpgHomeDir = '%s/%s.gpg' % (tmpDir, project)
     if os.path.exists(gpgHomeDir):
       shutil.rmtree(gpgHomeDir)
-    os.makedirs(gpgHomeDir, 0700)
+    os.makedirs(gpgHomeDir, 0o700)
     run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile),
         '%s/%s.gpg.import.log' % (tmpDir, project))
 
@@ -904,12 +911,12 @@ def verifyMavenSigs(baseURL, tmpDir, art
       run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile),
           logFile)
       # Forward any GPG warnings, except the expected one (since its a clean world)
-      f = open(logFile, 'rb')
+      f = open(logFile, encoding='UTF-8')
       for line in f.readlines():
         if line.lower().find('warning') != -1 \
            and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
            and line.find('WARNING: using insecure memory') == -1:
-          print '      GPG: %s' % line.strip()
+          print('      GPG: %s' % line.strip())
       f.close()
 
       # Test trust (this is done with the real users config)
@@ -918,16 +925,16 @@ def verifyMavenSigs(baseURL, tmpDir, art
       logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact)
       run('gpg --verify %s %s' % (sigFile, artifactFile), logFile)
       # Forward any GPG warnings:
-      f = open(logFile, 'rb')
+      f = open(logFile, encoding='UTF-8')
       for line in f.readlines():
         if line.lower().find('warning') != -1 \
            and line.find('WARNING: This key is not certified with a trusted signature') == -1 \
            and line.find('WARNING: using insecure memory') == -1:
-          print '      GPG: %s' % line.strip()
+          print('      GPG: %s' % line.strip())
       f.close()
 
       sys.stdout.write('.')
-  print
+  print()
 
 def verifyPOMperBinaryArtifact(artifacts, version):
   """verify that each binary jar and war has a corresponding POM file"""
@@ -1023,17 +1030,20 @@ def crawl(downloadedFiles, urlString, ta
 
 def main():
 
-  if len(sys.argv) != 4:
-    print
-    print 'Usage python -u %s BaseURL version tmpDir' % sys.argv[0]
-    print
+  if len(sys.argv) < 4:
+    print()
+    print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
+    print()
     sys.exit(1)
 
   baseURL = sys.argv[1]
   version = sys.argv[2]
   tmpDir = os.path.abspath(sys.argv[3])
+  isSigned = True 
+  if len(sys.argv) == 5:
+    isSigned = (sys.argv[4] == "True")
 
-  smokeTest(baseURL, version, tmpDir, True)
+  smokeTest(baseURL, version, tmpDir, isSigned)
 
 def smokeTest(baseURL, version, tmpDir, isSigned):
 
@@ -1046,11 +1056,11 @@ def smokeTest(baseURL, version, tmpDir, 
   
   lucenePath = None
   solrPath = None
-  print
-  print 'Load release URL "%s"...' % baseURL
+  print()
+  print('Load release URL "%s"...' % baseURL)
   newBaseURL = unshortenURL(baseURL)
   if newBaseURL != baseURL:
-    print '  unshortened: %s' % newBaseURL
+    print('  unshortened: %s' % newBaseURL)
     baseURL = newBaseURL
     
   for text, subURL in getDirEntries(baseURL):
@@ -1064,23 +1074,28 @@ def smokeTest(baseURL, version, tmpDir, 
   if solrPath is None:
     raise RuntimeError('could not find solr subdir')
 
-  print
-  print 'Test Lucene...'
+  print()
+  print('Test Lucene...')
   checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
   for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
     unpack('lucene', tmpDir, artifact, version)
   unpack('lucene', tmpDir, 'lucene-%s-src.tgz' % version, version)
 
-  print
-  print 'Test Solr...'
+  print()
+  print('Test Solr...')
   checkSigs('solr', solrPath, version, tmpDir, isSigned)
   for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
     unpack('solr', tmpDir, artifact, version)
   unpack('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
 
-  print 'Test Maven artifacts for Lucene and Solr...'
+  print('Test Maven artifacts for Lucene and Solr...')
   checkMaven(baseURL, tmpDir, version, isSigned)
 
 if __name__ == '__main__':
-  main()
-  
+  try:
+    main()
+  except:
+    import traceback
+    traceback.print_exc()
+    sys.exit(1)
+  sys.exit(0)

Modified: lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/CHANGES.txt Mon Aug 13 13:52:46 2012
@@ -6,11 +6,68 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 5.0.0 =======================
 
+======================= Lucene 4.0.0 =======================
+
+New Features
+
+* LUCENE-1888: Added the option to store payloads in the term
+  vectors (IndexableFieldType.storeTermVectorPayloads()). Note 
+  that you must store term vector positions to store payloads.
+  (Robert Muir)
+
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+  Previously you had no real way to know that a term vector field
+  had positions or offsets, since this can be configured on a 
+  per-field-per-document basis. (Robert Muir)
+
+* Removed DocsAndPositionsEnum.hasPayload() and simplified the
+  contract of getPayload(). It returns null if there is no payload,
+  otherwise returns the current payload. You can now call it multiple
+  times per position if you want. (Robert Muir)
+
+* Removed FieldsEnum. Fields API instead implements Iterable<String>
+  and exposes Iterator, so you can iterate over field names with
+  for (String field : fields) instead.  (Robert Muir)
+
+Bug Fixes
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+  twice for conjunctions: for most users this is no problem, but
+  if you had a customized Similarity that returned something other
+  than 1 when overlap == maxOverlap (always the case for conjunctions),
+  then the score would be incorrect.  (Pascal Chollet, Robert Muir)
+
+* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
+  did not work at all, it would infinitely recurse.
+  (Alberto Paro via Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
+* Don't allow negatives in the positions file. If you have an index
+  from 2.4.0 or earlier with such negative positions, and you already 
+  upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run 
+  CheckIndex. If it fails, then you need to upgrade again to 4.0  (Robert Muir)
+
+Build
+
+* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 
+  thread leak detection. Added support for suite timeouts. (Dawid Weiss)
 
 ======================= Lucene 4.0.0-BETA =======================
 
 New features
 
+* LUCENE-4249: Changed the explanation of the PayloadTermWeight to use the
+  underlying PayloadFunction's explanation as the explanation
+  for the payload score. (Scott Smerchek via Robert Muir)
+  
+* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
+  such as primary keys (Mark Harwood, Mike McCandless) 
+  
 * LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
   iteration marks. (Robert Muir, Christian Moen)
 
@@ -22,6 +79,30 @@ New features
   respect field boundaries in the case of highlighting for multivalued fields.
   (Martijn van Groningen)
 
+* LUCENE-4227: Added DirectPostingsFormat, to hold all postings in
+  memory as uncompressed simple arrays.  This uses a tremendous amount
+  of RAM but gives good search performance gains.  (Mike McCandless)
+
+* LUCENE-2510, LUCENE-4044: Migrated Solr's Tokenizer-, TokenFilter-, and
+  CharFilterFactories to the lucene-analysis module. The API is still
+  experimental.  (Chris Male, Robert Muir, Uwe Schindler)
+
+* LUCENE-4230: When pulling a DocsAndPositionsEnum you can now
+  specify whether or not you require payloads (in addition to
+  offsets); turning one or both off may allow some codec
+  implementations to optimize the enum implementation.  (Robert Muir,
+  Mike McCandless)
+
+* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
+  int docID), to attempt deletion by docID as long as the provided
+  reader is an NRT reader, and the segment has not yet been merged
+  away (Mike McCandless).
+  
+* LUCENE-4286: Added option to CJKBigramFilter to always also output
+  unigrams. This can be used for a unigram+bigram approach, or at 
+  index-time only for better support of short queries.
+  (Tom Burton-West, Robert Muir)
+
 API Changes
 
 * LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
@@ -46,6 +127,34 @@ API Changes
   filter another reader and you override correct() for offset correction.
   (Robert Muir)
 
+* LUCENE-4240: Analyzer api now just takes fieldName for getOffsetGap. If the
+  field is not analyzed (e.g. StringField), then the analyzer is not invoked
+  at all. If you want to tweak things like positionIncrementGap and offsetGap,
+  analyze the field with KeywordTokenizer instead.  (Grant Ingersoll, Robert Muir)
+
+* LUCENE-4250: Pass fieldName to the PayloadFunction explain method, so it
+  parallels with docScore and the default implementation is correct.
+  (Robert Muir)
+
+* LUCENE-3747: Support Unicode 6.1.0. (Steve Rowe)
+
+* LUCENE-3884: Moved ElisionFilter out of org.apache.lucene.analysis.fr
+  package into org.apache.lucene.analysis.util.  (Robert Muir)
+
+* LUCENE-4230: When pulling a DocsAndPositionsEnum you now pass an int
+  flags instead of the previous boolean needOffsets.  Currently
+  recognized flags are DocsAndPositionsEnum.FLAG_PAYLOADS and
+  DocsAndPositionsEnum.FLAG_OFFSETS (Robert Muir, Mike McCandless)
+
+* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
+  instead of the previous boolean needsFlags; consistent with the changes
+  for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
+  is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
+  
+* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
+  as the Store parameter didn't make sense: if you supplied Store.YES, you would only 
+  receive an exception anyway. (Robert Muir)
+
 Optimizations
 
 * LUCENE-4171: Performance improvements to Packed64.
@@ -57,8 +166,19 @@ Optimizations
 * LUCENE-4235: Remove enforcing of Filter rewrite for NRQ queries.
   (Uwe Schindler)
 
+* LUCENE-4279: Regenerated snowball Stemmers from snowball r554,
+  making them substantially more lightweight. Behavior is unchanged. 
+  (Robert Muir)
+
+* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers 
+  such as StandardTokenizer from 32kb to 8kb.  
+  (Raintung Li, Steven Rowe, Robert Muir)
+
 Bug Fixes
 
+* LUCENE-4109: BooleanQueries are not parsed correctly with the 
+  flexible query parser. (Karsten Rauch via Robert Muir)
+
 * LUCENE-4176: Fix AnalyzingQueryParser to analyze range endpoints as bytes,
   so that it works correctly with Analyzers that produce binary non-UTF-8 terms
   such as CollationAnalyzer. (Nattapong Sirilappanich via Robert Muir) 
@@ -84,6 +204,36 @@ Bug Fixes
   all queries.  Made Scorer.freq() abstract. 
   (Koji Sekiguchi, Mike McCandless, Robert Muir)
 
+* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest, 
+  and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
+
+* LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
+  non-interruptible.  (Mark Miller, Uwe Schindler)
+
+* LUCENE-4190: restrict allowed filenames that a codec may create to
+  the patterns recognized by IndexFileNames.  This also fixes
+  IndexWriter to only delete files matching this pattern from an index
+  directory, to reduce risk when the wrong index path is accidentally
+  passed to IndexWriter (Robert Muir, Mike McCandless)
+  
+* LUCENE-4277: Fix IndexWriter deadlock during rollback if flushable DWPT
+  instance are already checked out and queued up but not yet flushed. 
+  (Simon Willnauer)
+
+* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+  (Johannes Christen, Uwe Schindler, Robert Muir)
+
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+  (Robert Muir)
+
+Changes in Runtime Behavior
+
+* LUCENE-4109: Enable position increments in the flexible queryparser by default.
+  (Karsten Rauch via Robert Muir)
+  
+* LUCENE-3616: Field throws exception if you try to set a boost on an 
+  unindexed field or one that omits norms. (Robert Muir)
+
 Build
 
 * LUCENE-4094: Support overriding file.encoding on forked test JVMs
@@ -1234,7 +1384,60 @@ Build
   tasks) to correctly encode build file names as URIs for later processing by
   XSL.  (Greg Bowyer, Uwe Schindler)
 
+
+======================= Lucene 3.6.1 =======================
+More information about this release, including any errata related to the 
+release notes, upgrade instructions, or other changes may be found online at:
+   https://wiki.apache.org/lucene-java/Lucene3.6.1
+
+Bug Fixes
+
+* LUCENE-3969: Throw IAE on bad arguments that could cause confusing 
+  errors in KeywordTokenizer. 
+  (Uwe Schindler, Mike McCandless, Robert Muir)
+
+* LUCENE-3971: MappingCharFilter could return invalid final token position.
+  (Dawid Weiss, Robert Muir)
+
+* LUCENE-4023: DisjunctionMaxScorer now implements visitSubScorers().
+  (Uwe Schindler)
+
+* LUCENE-2566: + - operators allow any amount of whitespace (yonik, janhoy)
+
+* LUCENE-3590: Fix AIOOBE in BytesRef/CharsRef copyBytes/copyChars when 
+  offset is nonzero, fix off-by-one in CharsRef.subSequence, and fix
+  CharsRef's CharSequence methods to throw exceptions in boundary cases
+  to properly meet the specification.  (Robert Muir)
+
+* LUCENE-4222: TieredMergePolicy.getFloorSegmentMB was returning the
+  size in bytes not MB (Chris Fuller via Mike McCandless)
+
+API Changes
+
+* LUCENE-4023: Changed the visibility of Scorer#visitSubScorers() to
+  public, otherwise it's impossible to implement Scorers outside
+  the Lucene package.  (Uwe Schindler)
+
+Optimizations
+
+* LUCENE-4163: Improve concurrency of MMapIndexInput.clone() by using
+  the new WeakIdentityMap on top of a ConcurrentHashMap to manage
+  the cloned instances. WeakIdentityMap was extended to support
+  iterating over its keys.  (Uwe Schindler)
+
+Tests
+
+* LUCENE-3873: add MockGraphTokenFilter, testing analyzers with
+  random graph tokens.  (Mike McCandless)
+
+* LUCENE-3968: factor out LookaheadTokenFilter from 
+  MockGraphTokenFilter (Mike Mccandless)
+
+
 ======================= Lucene 3.6.0 =======================
+More information about this release, including any errata related to the 
+release notes, upgrade instructions, or other changes may be found online at:
+   https://wiki.apache.org/lucene-java/Lucene3.6
 
 Changes in backwards compatibility policy
 
@@ -1290,7 +1493,7 @@ Changes in backwards compatibility polic
   
 * LUCENE-3712: Removed unused and untested ReaderUtil#subReader methods.
   (Uwe Schindler)
-
+  
 * LUCENE-3672: Deprecate Directory.fileModified,
   IndexCommit.getTimestamp and .getVersion and
   IndexReader.lastModified and getCurrentVersion (Andrzej Bialecki,
@@ -1313,6 +1516,10 @@ Changes in backwards compatibility polic
 * LUCENE-3738: All readXxx methods in BufferedIndexInput were made
   final. Subclasses should only override protected readInternal /
   seekInternal.  (Uwe Schindler)
+
+* LUCENE-2599: Deprecated the spatial contrib module, which was buggy and not
+  well maintained.  Lucene 4 includes a new spatial module that replaces this.
+  (David Smiley, Ryan McKinley, Chris Male)
   
 Changes in Runtime Behavior
 
@@ -1354,7 +1561,7 @@ API Changes
   query time, wrap your IndexReader using FilterIndexReader, overriding
   FilterIndexReader.norms(). To persist the changes on disk, copy the
   FilteredIndexReader to a new index using IndexWriter.addIndexes().
-  In Lucene 4.0, Similarity will allow you to customize scoring
+  In Lucene 4.0, SimilarityProvider will allow you to customize scoring
   using external norms, too.  (Uwe Schindler, Robert Muir)
 
 * LUCENE-3735: PayloadProcessorProvider was changed to return a
@@ -1379,7 +1586,7 @@ API Changes
   never applying deletes).  (MJB, Shai Erera, Mike McCandless)
 
 * LUCENE-3761: Generalize SearcherManager into an abstract ReferenceManager.
-  SearcherManager remains a concrete class, but due to the refactoring, the
+  SearcherManager remains a concrete class, but due to the refactoring, the 
   method maybeReopen has been deprecated in favor of maybeRefresh().
   (Shai Erera, Mike McCandless, Simon Willnauer)
 
@@ -1404,7 +1611,7 @@ New Features
   queries.  Literal asterisks may be represented by quoting or escaping
   (i.e. \* or "*")  Custom QueryParser subclasses overriding getRangeQuery()
   will be passed null for any open endpoint. (Ingo Renner, Adriano
-  Crestani, yonik, Mike McCandless
+  Crestani, yonik, Mike McCandless 
 
 * LUCENE-3121: Add sugar reverse lookup (given an output, find the
   input mapping to it) for FSTs that have strictly monotonic long
@@ -1424,7 +1631,7 @@ New Features
 
 * LUCENE-3789: Expose MTQ TermsEnum via RewriteMethod for non package private
   access (Simon Willnauer)
-
+  
 * LUCENE-3881: Added UAX29URLEmailAnalyzer: a standard analyzer that recognizes
   URLs and emails. (Steve Rowe)
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/MIGRATE.txt Mon Aug 13 13:52:46 2012
@@ -9,7 +9,7 @@ enumeration APIs.  Here are the major ch
     by the BytesRef class (which provides an offset + length "slice"
     into an existing byte[]).
 
-  * Fields are separately enumerated (FieldsEnum) from the terms
+  * Fields are separately enumerated (Fields.iterator()) from the terms
     within each field (TermEnum).  So instead of this:
 
         TermEnum termsEnum = ...;
@@ -20,10 +20,8 @@ enumeration APIs.  Here are the major ch
 
     Do this:
 
-        FieldsEnum fieldsEnum = ...;
-        String field;
-        while((field = fieldsEnum.next()) != null) {
-            TermsEnum termsEnum = fieldsEnum.terms();
+        for(String field : fields) {
+            TermsEnum termsEnum = fields.terms(field);
             BytesRef text;
             while((text = termsEnum.next()) != null) {
               System.out.println("field=" + field + "; text=" + text.utf8ToString());
@@ -57,30 +55,6 @@ enumeration APIs.  Here are the major ch
           ...
         }
 
-    The bulk read API has also changed.  Instead of this:
-
-        int[] docs = new int[256];
-        int[] freqs = new int[256];
-
-        while(true) {
-          int count = td.read(docs, freqs)
-          if (count == 0) {
-            break;
-          }
-          // use docs[i], freqs[i]
-        }
-
-    do this:
-
-        DocsEnum.BulkReadResult bulk = td.getBulkResult();
-        while(true) {
-          int count = td.read();
-          if (count == 0) {
-            break;
-          }
-          // use bulk.docs.ints[i] and bulk.freqs.ints[i]
-        }
-
   * TermPositions is renamed to DocsAndPositionsEnum, and no longer
     extends the docs only enumerator (DocsEnum).
 
@@ -170,7 +144,7 @@ enumeration APIs.  Here are the major ch
         Bits liveDocs = reader.getLiveDocs();
         DocsEnum docsEnum = null;
 
-        docsEnum = termsEnum.docs(liveDocs, docsEnum);
+        docsEnum = termsEnum.docs(liveDocs, docsEnum, needsFreqs);
 
     You can pass in a prior DocsEnum and it will be reused if possible.
 
@@ -187,7 +161,7 @@ enumeration APIs.  Here are the major ch
 
         String field;
         BytesRef text;
-        DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text);
+        DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text, needsFreqs);
 
     Likewise for DocsAndPositionsEnum.
 
@@ -340,11 +314,12 @@ an AtomicReader. Note: using "atomicity 
 slowdowns due to the need to merge terms, postings, DocValues, and 
 FieldCache, use them with care! 
 
-## LUCENE-2413: Analyzer package changes
+## LUCENE-2413,LUCENE-3396: Analyzer package changes
 
 Lucene's core and contrib analyzers, along with Solr's analyzers,
 were consolidated into lucene/analysis. During the refactoring some
-package names have changed:
+package names have changed, and ReusableAnalyzerBase was renamed to
+Analyzer:
 
   - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
   - o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@@ -369,7 +344,7 @@ package names have changed:
   - o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
   - o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
   - o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
-  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
   - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
   - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
   - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
@@ -629,3 +604,8 @@ you can now do this:
   instance exposing the inverted index of the one document.  From
   Fields you can enumerate all fields, terms, positions, offsets.
 
+* LUCENE-4227: If you were previously using Instantiated index, you
+  may want to use DirectPostingsFormat after upgrading: it stores all
+  postings in simple arrrays (byte[] for terms, int[] for docs, freqs,
+  positions, offsets).  Note that this only covers postings, whereas
+  Instantiated covered all other parts of the index as well.

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java Mon Aug 13 13:52:46 2012
@@ -24,7 +24,6 @@ import java.util.Arrays;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.fr.ElisionFilter;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -32,6 +31,7 @@ import org.apache.lucene.analysis.snowba
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.CatalanStemmer;
@@ -127,7 +127,7 @@ public final class CatalanAnalyzer exten
       Reader reader) {
     final Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, source);
-    result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+    result = new ElisionFilter(result, DEFAULT_ARTICLES);
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro Mon Aug 13 13:52:46 2012
@@ -14,43 +14,49 @@
  * limitations under the License.
  */
 
-// Generated using ICU4J 4.8.1.1 on Friday, January 13, 2012 6:20:39 PM UTC
+// Generated using ICU4J 49.1.0.0 on Sunday, July 15, 2012 5:42:00 AM UTC
 // by org.apache.lucene.analysis.icu.GenerateHTMLStripCharFilterSupplementaryMacros
 
 
 ID_Start_Supp = (
-	  [\uD81A][\uDC00-\uDE38]
+	  [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+	| [\uD81A][\uDC00-\uDE38]
 	| [\uD869][\uDC00-\uDED6\uDF00-\uDFFF]
 	| [\uD835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]
 	| [\uD80C\uD840-\uD868\uD86A-\uD86C][\uDC00-\uDFFF]
 	| [\uD82C][\uDC00\uDC01]
-	| [\uD804][\uDC03-\uDC37\uDC83-\uDCAF]
 	| [\uD86D][\uDC00-\uDF34\uDF40-\uDFFF]
+	| [\uD81B][\uDF00-\uDF44\uDF50\uDF93-\uDF9F]
 	| [\uD87E][\uDC00-\uDE1D]
+	| [\uD804][\uDC03-\uDC37\uDC83-\uDCAF\uDCD0-\uDCE8\uDD03-\uDD26\uDD83-\uDDB2\uDDC1-\uDDC4]
+	| [\uD83B][\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]
 	| [\uD809][\uDC00-\uDC62]
 	| [\uD808][\uDC00-\uDF6E]
 	| [\uD803][\uDC00-\uDC48]
 	| [\uD800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]
 	| [\uD80D][\uDC00-\uDC2E]
+	| [\uD805][\uDE80-\uDEAA]
 	| [\uD86E][\uDC00-\uDC1D]
-	| [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
 	| [\uD801][\uDC00-\uDC9D]
 )
 ID_Continue_Supp = (
 	  [\uD81A][\uDC00-\uDE38]
 	| [\uD869][\uDC00-\uDED6\uDF00-\uDFFF]
 	| [\uD80C\uD840-\uD868\uD86A-\uD86C][\uDC00-\uDFFF]
-	| [\uD804][\uDC00-\uDC46\uDC66-\uDC6F\uDC80-\uDCBA]
 	| [\uD82C][\uDC00\uDC01]
-	| [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE38-\uDE3A\uDE3F\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+	| [\uD81B][\uDF00-\uDF44\uDF50-\uDF7E\uDF8F-\uDF9F]
 	| [\uD801][\uDC00-\uDC9D\uDCA0-\uDCA9]
 	| [\uD86D][\uDC00-\uDF34\uDF40-\uDFFF]
 	| [\uD87E][\uDC00-\uDE1D]
+	| [\uD802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE38-\uDE3A\uDE3F\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72]
+	| [\uD805][\uDE80-\uDEB7\uDEC0-\uDEC9]
+	| [\uD83B][\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]
 	| [\uD809][\uDC00-\uDC62]
 	| [\uD808][\uDC00-\uDF6E]
 	| [\uD803][\uDC00-\uDC48]
 	| [\uD80D][\uDC00-\uDC2E]
 	| [\uD800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDDFD\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]
+	| [\uD804][\uDC00-\uDC46\uDC66-\uDC6F\uDC80-\uDCBA\uDCD0-\uDCE8\uDCF0-\uDCF9\uDD00-\uDD34\uDD36-\uDD3F\uDD80-\uDDC4\uDDD0-\uDDD9]
 	| [\uD86E][\uDC00-\uDC1D]
 	| [\uDB40][\uDD00-\uDDEF]
 	| [\uD834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]