You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/14 20:38:25 UTC
[06/12] lucene-solr:master: LUCENE-7318: graduate StandardAnalyzer
and make it the default for IndexWriterConfig
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
deleted file mode 100644
index ec37924..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/usr/bin/perl
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-use warnings;
-use strict;
-use File::Spec;
-use Getopt::Long;
-use LWP::UserAgent;
-
-my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
-
-my $version = '';
-unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
- print STDERR "Usage: $script_name -v <version>\n";
- print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
- if ($version);
- exit 1;
-}
-my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
-my $scripts_url = "${url_prefix}/Scripts.txt";
-my $line_break_url = "${url_prefix}/LineBreak.txt";
-my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
-my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
-my $underscore_version = $version;
-$underscore_version =~ s/\./_/g;
-my $class_name = "WordBreakTestUnicode_${underscore_version}";
-my $output_filename = "${class_name}.java";
-my $header =<<"__HEADER__";
-package org.apache.lucene.analysis.core;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by ${script_name}
- * from: ${url_prefix}/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words. This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- * \\p{Script = Han} (From $scripts_url)
- * \\p{Script = Hiragana}
- * \\p{LineBreak = Complex_Context} (From $line_break_url)
- * \\p{WordBreak = ALetter} (From $word_break_url)
- * \\p{WordBreak = Hebrew_Letter}
- * \\p{WordBreak = Katakana}
- * \\p{WordBreak = Numeric} (Excludes full-width Arabic digits)
- * [\\uFF10-\\uFF19] (Full-width Arabic digits)
- */
-\@Ignore
-public class ${class_name} extends BaseTokenStreamTestCase {
-
- public void test(Analyzer analyzer) throws Exception {
-__HEADER__
-
-my $codepoints = [];
-map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
-# Complex_Context is an alias for 'SA', which is used in LineBreak.txt
-# Using lowercase versions of property value names to allow for case-
-# insensitive comparison with the names in the Unicode data files.
-parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
-parse_Unicode_data_file($scripts_url, $codepoints,
- {'han' => 1, 'hiragana' => 1});
-parse_Unicode_data_file($word_break_url, $codepoints,
- {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
-my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
-
-my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
-open OUT, ">$output_path"
- || die "Error opening '$output_path' for writing: $!";
-
-print STDERR "Writing '$output_path'...";
-
-print OUT $header;
-
-for my $line (@tests) {
- next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
- # Example line: � 0001 � 0300 � # � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
- my ($sequence) = $line =~ /^(.*?)\s*\#/;
- $line =~ s/\t/ /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
- print OUT " // $line\n";
- $sequence =~ s/\s*�\s*$//; # Trim trailing break character
- my $test_string = $sequence;
- $test_string =~ s/\s*�\s*/\\u/g;
- $test_string =~ s/\s*�\s*/\\u/g;
- $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
- $test_string =~ s/\\u000A/\\n/g;
- $test_string =~ s/\\u000D/\\r/g;
- $test_string =~ s/\\u0022/\\\"/g;
- $sequence =~ s/^\s*�\s*//; # Trim leading break character
- my @tokens = ();
- for my $candidate (split /\s*�\s*/, $sequence) {
- my @chars = ();
- my $has_wanted_char = 0;
- while ($candidate =~ /([0-9A-F]+)/gi) {
- my $hexchar = $1;
- if (4 == length($hexchar)) {
- push @chars, $hexchar;
- } else {
- push @chars, above_BMP_char_to_surrogates($hexchar);
- }
- unless ($has_wanted_char) {
- $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
- }
- }
- if ($has_wanted_char) {
- push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
- }
- }
- print OUT " assertAnalyzesTo(analyzer, \"${test_string}\",\n";
- print OUT " new String[] { ";
- print OUT join(", ", @tokens), " });\n\n";
-}
-
-print OUT " }\n}\n";
-close OUT;
-print STDERR "done.\n";
-
-
-# sub above_BMP_char_to_surrogates
-#
-# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
-# to the corresponding UTF-16 surrogate pair
-#
-# Assumption: input string is a sequence more than four hex digits
-#
-sub above_BMP_char_to_surrogates {
- my $ch = hex(shift);
- my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
- my $low_surrogate = 0xDC00 + ($ch & 0x3FF);
- return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
-}
-
-
-# sub parse_Unicode_data_file
-#
-# Downloads and parses the specified Unicode data file, parses it, and
-# extracts code points assigned any of the given property values, defining
-# the corresponding array position in the passed-in target array.
-#
-# Takes in the following parameters:
-#
-# - URL of the Unicode data file to download and parse
-# - Reference to target array
-# - Reference to hash of property values to get code points for
-#
-sub parse_Unicode_data_file {
- my $url = shift;
- my $target = shift;
- my $wanted_property_values = shift;
- my $content = get_URL_content($url);
- print STDERR "Parsing '$url'...";
- my @lines = split /\r?\n/, $content;
- for (@lines) {
- s/\s*#.*//; # Strip trailing comments
- s/\s+$//; # Strip trailing space
- next unless (/\S/); # Skip empty lines
- my ($start, $end, $property_value);
- if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
- # 00AA ; LATIN
- $start = $end = hex $1;
- $property_value = lc $2; # Property value names are case-insensitive
- } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
- # 0AE6..0AEF ; Gujarati
- $start = hex $1;
- $end = hex $2;
- $property_value = lc $3; # Property value names are case-insensitive
- } else {
- next;
- }
- if (defined($wanted_property_values->{$property_value})) {
- for my $code_point ($start..$end) {
- $target->[$code_point] = 1;
- }
- }
- }
- print STDERR "done.\n";
-}
-
-# sub get_URL_content
-#
-# Retrieves and returns the content of the given URL.
-#
-sub get_URL_content {
- my $url = shift;
- print STDERR "Retrieving '$url'...";
- my $user_agent = LWP::UserAgent->new;
- my $request = HTTP::Request->new(GET => $url);
- my $response = $user_agent->request($request);
- unless ($response->is_success) {
- print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
- exit 1;
- }
- print STDERR "done.\n";
- return $response->content;
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
index b9d586e..b7f45cb 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
index ebe1034..fd7aefd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index 37983de..6eeb9af 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.th;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
/**
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
index 9972702..9cfc6fc 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
deleted file mode 100644
index 66b0dce..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-import java.util.*;
-
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestCharArrayMap extends LuceneTestCase {
- public void doRandom(int iter, boolean ignoreCase) {
- CharArrayMap<Integer> map = new CharArrayMap<>(1, ignoreCase);
- HashMap<String,Integer> hmap = new HashMap<>();
-
- char[] key;
- for (int i=0; i<iter; i++) {
- int len = random().nextInt(5);
- key = new char[len];
- for (int j=0; j<key.length; j++) {
- key[j] = (char)random().nextInt(127);
- }
- String keyStr = new String(key);
- String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr;
-
- int val = random().nextInt();
-
- Object o1 = map.put(key, val);
- Object o2 = hmap.put(hmapKey,val);
- assertEquals(o1,o2);
-
- // add it again with the string method
- assertEquals(val, map.put(keyStr,val).intValue());
-
- assertEquals(val, map.get(key,0,key.length).intValue());
- assertEquals(val, map.get(key).intValue());
- assertEquals(val, map.get(keyStr).intValue());
-
- assertEquals(hmap.size(), map.size());
- }
- }
-
- public void testCharArrayMap() {
- int num = 5 * RANDOM_MULTIPLIER;
- for (int i = 0; i < num; i++) { // pump this up for more random testing
- doRandom(1000,false);
- doRandom(1000,true);
- }
- }
-
- public void testMethods() {
- CharArrayMap<Integer> cm = new CharArrayMap<>(2, false);
- HashMap<String,Integer> hm = new HashMap<>();
- hm.put("foo",1);
- hm.put("bar",2);
- cm.putAll(hm);
- assertEquals(hm.size(), cm.size());
- hm.put("baz", 3);
- cm.putAll(hm);
- assertEquals(hm.size(), cm.size());
-
- CharArraySet cs = cm.keySet();
- int n=0;
- for (Object o : cs) {
- assertTrue(cm.containsKey(o));
- char[] co = (char[]) o;
- assertTrue(cm.containsKey(co, 0, co.length));
- n++;
- }
- assertEquals(hm.size(), n);
- assertEquals(hm.size(), cs.size());
- assertEquals(cm.size(), cs.size());
- cs.clear();
- assertEquals(0, cs.size());
- assertEquals(0, cm.size());
- // keySet() should not allow adding new keys
- expectThrows(UnsupportedOperationException.class, () -> {
- cs.add("test");
- });
-
- cm.putAll(hm);
- assertEquals(hm.size(), cs.size());
- assertEquals(cm.size(), cs.size());
-
- Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
- n=0;
- while (iter1.hasNext()) {
- Map.Entry<Object,Integer> entry = iter1.next();
- Object key = entry.getKey();
- Integer val = entry.getValue();
- assertEquals(cm.get(key), val);
- entry.setValue(val*100);
- assertEquals(val*100, (int)cm.get(key));
- n++;
- }
- assertEquals(hm.size(), n);
- cm.clear();
- cm.putAll(hm);
- assertEquals(cm.size(), n);
-
- CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
- n=0;
- while (iter2.hasNext()) {
- char[] keyc = iter2.nextKey();
- Integer val = iter2.currentValue();
- assertEquals(hm.get(new String(keyc)), val);
- iter2.setValue(val*100);
- assertEquals(val*100, (int)cm.get(keyc));
- n++;
- }
- assertEquals(hm.size(), n);
-
- cm.entrySet().clear();
- assertEquals(0, cm.size());
- assertEquals(0, cm.entrySet().size());
- assertTrue(cm.isEmpty());
- }
-
- // TODO: break this up into simpler test methods vs. "telling a story"
- public void testModifyOnUnmodifiable(){
- CharArrayMap<Integer> map = new CharArrayMap<>(2, false);
- map.put("foo",1);
- map.put("bar",2);
- final int size = map.size();
- assertEquals(2, size);
- assertTrue(map.containsKey("foo"));
- assertEquals(1, map.get("foo").intValue());
- assertTrue(map.containsKey("bar"));
- assertEquals(2, map.get("bar").intValue());
-
- map = CharArrayMap.unmodifiableMap(map);
- assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
- String NOT_IN_MAP = "SirGallahad";
- assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
-
- try{
- map.put(NOT_IN_MAP.toCharArray(), 3);
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.put(NOT_IN_MAP, 3);
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.put(new StringBuilder(NOT_IN_MAP), 3);
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.clear();
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.entrySet().clear();
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.keySet().clear();
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.put((Object) NOT_IN_MAP, 3);
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- try{
- map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));
- fail("Modified unmodifiable map");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
- assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
- assertEquals("Size of unmodifiable map has changed", size, map.size());
- }
-
- assertTrue(map.containsKey("foo"));
- assertEquals(1, map.get("foo").intValue());
- assertTrue(map.containsKey("bar"));
- assertEquals(2, map.get("bar").intValue());
- }
-
- public void testToString() {
- CharArrayMap<Integer> cm = new CharArrayMap<>(Collections.singletonMap("test",1), false);
- assertEquals("[test]",cm.keySet().toString());
- assertEquals("[1]",cm.values().toString());
- assertEquals("[test=1]",cm.entrySet().toString());
- assertEquals("{test=1}",cm.toString());
- cm.put("test2", 2);
- assertTrue(cm.keySet().toString().contains(", "));
- assertTrue(cm.values().toString().contains(", "));
- assertTrue(cm.entrySet().toString().contains(", "));
- assertTrue(cm.toString().contains(", "));
- }
-}
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
deleted file mode 100644
index 1fcee65..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.*;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-
-public class TestCharArraySet extends LuceneTestCase {
-
- static final String[] TEST_STOP_WORDS = {
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with"
- };
-
-
- public void testRehash() throws Exception {
- CharArraySet cas = new CharArraySet(0, true);
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- cas.add(TEST_STOP_WORDS[i]);
- assertEquals(TEST_STOP_WORDS.length, cas.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertTrue(cas.contains(TEST_STOP_WORDS[i]));
- }
-
- public void testNonZeroOffset() {
- String[] words={"Hello","World","this","is","a","test"};
- char[] findme="xthisy".toCharArray();
- CharArraySet set= new CharArraySet(10, true);
- set.addAll(Arrays.asList(words));
- assertTrue(set.contains(findme, 1, 4));
- assertTrue(set.contains(new String(findme,1,4)));
-
- // test unmodifiable
- set = CharArraySet.unmodifiableSet(set);
- assertTrue(set.contains(findme, 1, 4));
- assertTrue(set.contains(new String(findme,1,4)));
- }
-
- public void testObjectContains() {
- CharArraySet set = new CharArraySet(10, true);
- Integer val = Integer.valueOf(1);
- set.add(val);
- assertTrue(set.contains(val));
- assertTrue(set.contains(new Integer(1))); // another integer
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
- // test unmodifiable
- set = CharArraySet.unmodifiableSet(set);
- assertTrue(set.contains(val));
- assertTrue(set.contains(new Integer(1))); // another integer
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
- }
-
- public void testClear(){
- CharArraySet set=new CharArraySet(10,true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
- set.clear();
- assertEquals("not empty", 0, set.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertFalse(set.contains(TEST_STOP_WORDS[i]));
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertTrue(set.contains(TEST_STOP_WORDS[i]));
- }
-
- // TODO: break this up into simpler test methods, vs "telling a story"
- public void testModifyOnUnmodifiable(){
- CharArraySet set=new CharArraySet(10, true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- final int size = set.size();
- set = CharArraySet.unmodifiableSet(set);
- assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
- String NOT_IN_SET = "SirGallahad";
- assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
-
- try{
- set.add(NOT_IN_SET.toCharArray());
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.add(NOT_IN_SET);
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.add(new StringBuilder(NOT_IN_SET));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.clear();
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
- try{
- set.add((Object) NOT_IN_SET);
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
- // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
- // remove() on the iterator
- try{
- set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.addAll(Arrays.asList(NOT_IN_SET));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- }
-
- for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
- assertTrue(set.contains(TEST_STOP_WORDS[i]));
- }
- }
-
- public void testUnmodifiableSet(){
- CharArraySet set = new CharArraySet(10,true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- set.add(Integer.valueOf(1));
- final int size = set.size();
- set = CharArraySet.unmodifiableSet(set);
- assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
- for (String stopword : TEST_STOP_WORDS) {
- assertTrue(set.contains(stopword));
- }
- assertTrue(set.contains(Integer.valueOf(1)));
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
-
- expectThrows(NullPointerException.class, () -> {
- CharArraySet.unmodifiableSet(null);
- });
- }
-
- public void testSupplementaryChars() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- // for reference see
- // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
- String[] upperArr = new String[] {"Abc\ud801\udc1c",
- "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
- String[] lowerArr = new String[] {"abc\ud801\udc44",
- "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
- CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
- assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
- }
- set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
- }
- }
-
- public void testSingleHighSurrogate() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
- "\uD800EfG", "\uD800\ud801\udc1cB" };
-
- String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
- "\uD800efg", "\uD800\ud801\udc44b" };
- CharArraySet set = new CharArraySet(Arrays
- .asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
- assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
- }
- set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS),
- false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
- .contains(lowerArr[i]));
- }
- }
-
- @SuppressWarnings("deprecated")
- public void testCopyCharArraySetBWCompat() {
- CharArraySet setIngoreCase = new CharArraySet(10, true);
- CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
- }
- setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
- setIngoreCase.add(Integer.valueOf(1));
- setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
- setCaseSensitive.add(Integer.valueOf(1));
-
- CharArraySet copy = CharArraySet.copy(setIngoreCase);
- CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
- assertEquals(setIngoreCase.size(), copy.size());
- assertEquals(setCaseSensitive.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copyCaseSens.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copyCaseSens.contains(string));
- }
- // test adding terms to the copy
- List<String> newWords = new ArrayList<>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(setIngoreCase.contains(string));
- assertFalse(setCaseSensitive.contains(string));
-
- }
- }
-
- /**
- * Test the static #copy() function with a CharArraySet as a source
- */
- public void testCopyCharArraySet() {
- CharArraySet setIngoreCase = new CharArraySet(10, true);
- CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
- }
- setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
- setIngoreCase.add(Integer.valueOf(1));
- setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
- setCaseSensitive.add(Integer.valueOf(1));
-
- CharArraySet copy = CharArraySet.copy(setIngoreCase);
- CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
- assertEquals(setIngoreCase.size(), copy.size());
- assertEquals(setCaseSensitive.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copyCaseSens.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copyCaseSens.contains(string));
- }
- // test adding terms to the copy
- List<String> newWords = new ArrayList<>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(setIngoreCase.contains(string));
- assertFalse(setCaseSensitive.contains(string));
-
- }
- }
-
- /**
- * Test the static #copy() function with a JDK {@link Set} as a source
- */
- public void testCopyJDKSet() {
- Set<String> set = new HashSet<>();
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
- }
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
-
- CharArraySet copy = CharArraySet.copy(set);
-
- assertEquals(set.size(), copy.size());
- assertEquals(set.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copy.contains(string));
- }
-
- List<String> newWords = new ArrayList<>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(set.contains(string));
- }
- }
-
- /**
- * Tests a special case of {@link CharArraySet#copy(Set)} where the
- * set to copy is the {@link CharArraySet#EMPTY_SET}
- */
- public void testCopyEmptySet() {
- assertSame(CharArraySet.EMPTY_SET,
- CharArraySet.copy(CharArraySet.EMPTY_SET));
- }
-
- /**
- * Smoketests the static empty set
- */
- public void testEmptySet() {
- assertEquals(0, CharArraySet.EMPTY_SET.size());
-
- assertTrue(CharArraySet.EMPTY_SET.isEmpty());
- for (String stopword : TEST_STOP_WORDS) {
- assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
- }
- assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
- assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
- assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
- assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
- }
-
- /**
- * Test for NPE
- */
- public void testContainsWithNull() {
- CharArraySet set = new CharArraySet(1, true);
-
- expectThrows(NullPointerException.class, () -> {
- set.contains((char[]) null, 0, 10);
- });
-
- expectThrows(NullPointerException.class, () -> {
- set.contains((CharSequence) null);
- });
-
- expectThrows(NullPointerException.class, () -> {
- set.contains((Object) null);
- });
- }
-
- public void testToString() {
- CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
- assertEquals("[test]", set.toString());
- set.add("test2");
- assertTrue(set.toString().contains(", "));
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
deleted file mode 100644
index 04e96ea..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.junit.Test;
-
-/**
- * TestCase for the {@link CharacterUtils} class.
- */
-public class TestCharacterUtils extends LuceneTestCase {
-
- public void testConversions() {
- final char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
- final int[] buf = new int[orig.length];
- final char[] restored = new char[buf.length];
- final int o1 = TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
- final int o2 = TestUtil.nextInt(random(), 0, o1);
- final int o3 = TestUtil.nextInt(random(), 0, o1);
- final int codePointCount = CharacterUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
- final int charCount = CharacterUtils.toChars(buf, o2, codePointCount, restored, o3);
- assertEquals(orig.length - o1, charCount);
- assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored, o3, o3 + charCount));
- }
-
- @Test
- public void testNewCharacterBuffer() {
- CharacterBuffer newCharacterBuffer = CharacterUtils.newCharacterBuffer(1024);
- assertEquals(1024, newCharacterBuffer.getBuffer().length);
- assertEquals(0, newCharacterBuffer.getOffset());
- assertEquals(0, newCharacterBuffer.getLength());
-
- newCharacterBuffer = CharacterUtils.newCharacterBuffer(2);
- assertEquals(2, newCharacterBuffer.getBuffer().length);
- assertEquals(0, newCharacterBuffer.getOffset());
- assertEquals(0, newCharacterBuffer.getLength());
-
- // length must be >= 2
- expectThrows(IllegalArgumentException.class, () -> {
- CharacterUtils.newCharacterBuffer(1);
- });
- }
-
- @Test
- public void testFillNoHighSurrogate() throws IOException {
- Reader reader = new StringReader("helloworld");
- CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
- assertTrue(CharacterUtils.fill(buffer,reader));
- assertEquals(0, buffer.getOffset());
- assertEquals(6, buffer.getLength());
- assertEquals("hellow", new String(buffer.getBuffer()));
- assertFalse(CharacterUtils.fill(buffer,reader));
- assertEquals(4, buffer.getLength());
- assertEquals(0, buffer.getOffset());
-
- assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
- buffer.getLength()));
- assertFalse(CharacterUtils.fill(buffer,reader));
- }
-
- @Test
- public void testFill() throws IOException {
- String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
- Reader reader = new StringReader(input);
- CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
- assertTrue(CharacterUtils.fill(buffer, reader));
- assertEquals(4, buffer.getLength());
- assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(),
- buffer.getLength()));
- assertTrue(CharacterUtils.fill(buffer, reader));
- assertEquals(5, buffer.getLength());
- assertEquals("\ud801\udc1c789", new String(buffer.getBuffer()));
- assertTrue(CharacterUtils.fill(buffer, reader));
- assertEquals(4, buffer.getLength());
- assertEquals("123\ud801", new String(buffer.getBuffer(),
- buffer.getOffset(), buffer.getLength()));
- assertFalse(CharacterUtils.fill(buffer, reader));
- assertEquals(3, buffer.getLength());
- assertEquals("\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
- .getOffset(), buffer.getLength()));
- assertFalse(CharacterUtils.fill(buffer, reader));
- assertEquals(0, buffer.getLength());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
index 5e1d3c1..f8c1198 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
@@ -24,13 +24,13 @@ import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
/**
*
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
index be90611..eaa6174 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
@@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
deleted file mode 100644
index b1dd1b5..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-import org.apache.lucene.analysis.util.WordlistLoader;
-
-public class TestWordlistLoader extends LuceneTestCase {
-
- public void testWordlistLoading() throws IOException {
- String s = "ONE\n two \nthree";
- CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
- checkSet(wordSet1);
- CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
- checkSet(wordSet2);
- }
-
- public void testComments() throws Exception {
- String s = "ONE\n two \nthree\n#comment";
- CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
- checkSet(wordSet1);
- assertFalse(wordSet1.contains("#comment"));
- assertFalse(wordSet1.contains("comment"));
- }
-
-
- private void checkSet(CharArraySet wordset) {
- assertEquals(3, wordset.size());
- assertTrue(wordset.contains("ONE")); // case is not modified
- assertTrue(wordset.contains("two")); // surrounding whitespace is removed
- assertTrue(wordset.contains("three"));
- assertFalse(wordset.contains("four"));
- }
-
- /**
- * Test stopwords in snowball format
- */
- public void testSnowballListLoading() throws IOException {
- String s =
- "|comment\n" + // commented line
- " |comment\n" + // commented line with leading whitespace
- "\n" + // blank line
- " \t\n" + // line with only whitespace
- " |comment | comment\n" + // commented line with comment
- "ONE\n" + // stopword, in uppercase
- " two \n" + // stopword with leading/trailing space
- " three four five \n" + // multiple stopwords
- "six seven | comment\n"; //multiple stopwords + comment
- CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
- assertEquals(7, wordset.size());
- assertTrue(wordset.contains("ONE"));
- assertTrue(wordset.contains("two"));
- assertTrue(wordset.contains("three"));
- assertTrue(wordset.contains("four"));
- assertTrue(wordset.contains("five"));
- assertTrue(wordset.contains("six"));
- assertTrue(wordset.contains("seven"));
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/icu/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html
index abb2e2a..bdace97 100644
--- a/lucene/analysis/icu/src/java/overview.html
+++ b/lucene/analysis/icu/src/java/overview.html
@@ -103,7 +103,7 @@ algorithm.
</li>
<li>
Effective Locale-specific normalization (case differences, diacritics, etc.).
- ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and
+ ({@link org.apache.lucene.analysis.LowerCaseFilter} and
{@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
in a generic way that doesn't take into account locale-specific needs.)
</li>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
index f2fd50a..17ea967 100644
--- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
-import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.IOUtils;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
index bff30f1..46d40b1 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
import org.apache.lucene.analysis.ja.dict.UserDictionary;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
/**
* Analyzer for Japanese that uses morphological analysis.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index b8d0a78..a1af95e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -44,7 +44,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
* input tokens tokens \uff13 and \uff12\u5343 and give outputs 3 and 2000 instead of 3200, which is
* likely not the intended result. If you want to remove punctuation characters from your
* index that are not part of normalized numbers, add a
- * {@link org.apache.lucene.analysis.core.StopFilter} with the punctuation you wish to
+ * {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to
* remove after {@link JapaneseNumberFilter} in your analyzer chain.
* <p>
* Below are some examples of normalizations this filter supports. The input is untokenized
@@ -615,4 +615,4 @@ public class JapaneseNumberFilter extends TokenFilter {
return position;
}
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
index 0ee9ccf..342295d 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ja;
import java.util.Set;
-import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
/**
* Removes tokens that match a set of part-of-speech tags.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
index 8b5483c..a59de44 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
@@ -22,8 +22,8 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
index b9ebd36..ab6c473 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
@@ -21,11 +21,11 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
index bd14be3..bc57f56 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
@@ -17,16 +17,16 @@
package org.apache.lucene.analysis.ja;
+import java.io.IOException;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-
-import java.io.IOException;
/**
* Tests for {@link JapaneseKatakanaStemFilter}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
index 27cef33..b8a987a 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
@@ -27,11 +27,11 @@ import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.junit.Ignore;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index 3429d86..b35523e 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -23,12 +23,6 @@ import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
-import morfologik.stemming.Dictionary;
-import morfologik.stemming.DictionaryLookup;
-import morfologik.stemming.IStemmer;
-import morfologik.stemming.WordData;
-import morfologik.stemming.polish.PolishStemmer;
-
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -36,6 +30,12 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.CharsRefBuilder;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
+
/**
* {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
* morphosyntactic (POS) tokens. Applies to Polish only.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
index d8967c7..c4294e3 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
@@ -22,13 +22,13 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
/**
* TODO: The tests below rely on the order of returned lemmas, which is probably not good.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
index bd1fc7b..5f0347b 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
@@ -21,12 +21,12 @@ import java.nio.charset.StandardCharsets;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
index 999ce86..6ed4fda 100644
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@@ -22,18 +22,18 @@ import java.io.Reader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.stempel.StempelFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.util.IOUtils;
import org.egothor.stemmer.Trie;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
index b0ef008..c37cedb 100644
--- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index b4074ac..94b7910 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2585,4 +2585,34 @@ The following arguments can be provided to ant to alter its behaviour and target
</sequential>
</macrodef>
+ <macrodef name="run-jflex">
+ <attribute name="dir"/>
+ <attribute name="name"/>
+ <sequential>
+ <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+ </sequential>
+ </macrodef>
+
+ <macrodef name="run-jflex-and-disable-buffer-expansion">
+ <attribute name="dir"/>
+ <attribute name="name"/>
+ <sequential>
+ <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+ <!-- LUCENE-5897: Disallow scanner buffer expansion -->
+ <replaceregexp file="@{dir}/@{name}.java"
+ match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
+ replace="" flags="s" />
+ <replaceregexp file="@{dir}/@{name}.java"
+ match="private static final int ZZ_BUFFERSIZE ="
+ replace="private int ZZ_BUFFERSIZE ="/>
+ <replaceregexp file="@{dir}/@{name}.java"
+ match="int requested = zzBuffer.length - zzEndRead;"
+ replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
+ <replaceregexp file="@{dir}/@{name}.java"
+ match="(zzFinalHighSurrogate = 1;)(\r?\n)"
+ replace="\1\2 if (totalRead == 1) { return true; }\2"/>
+ </sequential>
+ </macrodef>
+
+
</project>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/core/build.xml
----------------------------------------------------------------------
diff --git a/lucene/core/build.xml b/lucene/core/build.xml
index 90da238..4e62e1c 100644
--- a/lucene/core/build.xml
+++ b/lucene/core/build.xml
@@ -133,7 +133,7 @@
<delete file="${build.dir}/moman.zip"/>
</target>
- <target name="regenerate" depends="createLevAutomata,createPackedIntSources"/>
+ <target name="regenerate" depends="createLevAutomata,createPackedIntSources,jflex"/>
<macrodef name="startLockStressTestClient">
<attribute name="clientId"/>
@@ -223,4 +223,20 @@
<target name="test" depends="common.test, test-lock-factory"/>
+ <target name="clean-jflex">
+ <delete>
+ <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
+ <containsregexp expression="generated.*by.*JFlex"/>
+ </fileset>
+ </delete>
+ </target>
+
+ <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-StandardAnalyzer"/>
+
+ <target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
+ <run-jflex-and-disable-buffer-expansion
+ dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
+ </target>
+
+
</project>