You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/07/25 08:33:24 UTC

[1/4] lucenenet git commit: ENHANCEMENT: Lucene.Net.Store.NativeFSLockFactory: Refactored implementation to take advantage of .NET FileStream.Lock. Implementation provided by Vincent Van Den Berghe.

Repository: lucenenet
Updated Branches:
  refs/heads/master c3f60b29f -> db1f605cd


ENHANCEMENT: Lucene.Net.Store.NativeFSLockFactory: Refactored implementation to take advantage of .NET FileStream.Lock. Implementation provided by Vincent Van Den Berghe.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7e6b0bca
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7e6b0bca
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7e6b0bca

Branch: refs/heads/master
Commit: 7e6b0bcac9e441fb6f17bdb5e324cbbef1bdead3
Parents: c3f60b2
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Jul 24 21:06:12 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Jul 24 21:06:12 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net/Lucene.Net.csproj            |   2 +-
 src/Lucene.Net/Store/NativeFSLockFactory.cs | 236 +++++++++++++++--------
 src/Lucene.Net/project.json                 |   3 +-
 3 files changed, 162 insertions(+), 79 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7e6b0bca/src/Lucene.Net/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/Lucene.Net.csproj b/src/Lucene.Net/Lucene.Net.csproj
index d2b8e6e..3dc6d30 100644
--- a/src/Lucene.Net/Lucene.Net.csproj
+++ b/src/Lucene.Net/Lucene.Net.csproj
@@ -53,7 +53,7 @@
     <Prefer32Bit>false</Prefer32Bit>
   </PropertyGroup>
   <PropertyGroup>
-    <DefineConstants>$(DefineConstants);FEATURE_CLONEABLE;FEATURE_CONCURRENTMERGESCHEDULER;FEATURE_SERIALIZABLE;FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM</DefineConstants>
+    <DefineConstants>$(DefineConstants);FEATURE_CLONEABLE;FEATURE_CONCURRENTMERGESCHEDULER;FEATURE_SERIALIZABLE;FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM;FEATURE_FILESTREAM_LOCK</DefineConstants>
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="Microsoft.CSharp" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7e6b0bca/src/Lucene.Net/Store/NativeFSLockFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/Store/NativeFSLockFactory.cs b/src/Lucene.Net/Store/NativeFSLockFactory.cs
index 47b5233..8157e13 100644
--- a/src/Lucene.Net/Store/NativeFSLockFactory.cs
+++ b/src/Lucene.Net/Store/NativeFSLockFactory.cs
@@ -1,26 +1,26 @@
 using Lucene.Net.Util;
 using System;
-using System.Collections.Concurrent;
 using System.IO;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Store
 {
     /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     *
-     *     http://www.apache.org/licenses/LICENSE-2.0
-     *
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
+    * Licensed to the Apache Software Foundation (ASF) under one or more
+    * contributor license agreements.  See the NOTICE file distributed with
+    * this work for additional information regarding copyright ownership.
+    * The ASF licenses this file to You under the Apache License, Version 2.0
+    * (the "License"); you may not use this file except in compliance with
+    * the License.  You may obtain a copy of the License at
+    *
+    *     http://www.apache.org/licenses/LICENSE-2.0
+    *
+    * Unless required by applicable law or agreed to in writing, software
+    * distributed under the License is distributed on an "AS IS" BASIS,
+    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    * See the License for the specific language governing permissions and
+    * limitations under the License.
+    */
 
     /// <summary>
     /// <para>Implements <see cref="LockFactory"/> using native OS file
@@ -49,7 +49,7 @@ namespace Lucene.Net.Store
     ///
     /// <para>If you suspect that this or any other <see cref="LockFactory"/> is
     /// not working properly in your environment, you can easily
-    /// test it by using <see cref="VerifyingLockFactory"/>, 
+    /// test it by using <see cref="VerifyingLockFactory"/>,
     /// <see cref="LockVerifyServer"/> and <see cref="LockStressTest"/>.</para>
     /// </summary>
     /// <seealso cref="LockFactory"/>
@@ -89,33 +89,109 @@ namespace Lucene.Net.Store
 
         // LUCENENET: NativeFSLocks in Java are infact singletons; this is how we mimick that to track instances and make sure
         // IW.Unlock and IW.IsLocked works correctly
-        internal static readonly ConcurrentDictionary<string, Lazy<NativeFSLock>> _locks = new ConcurrentDictionary<string, Lazy<NativeFSLock>>();
+        internal static readonly Dictionary<string, NativeFSLock> _locks = new Dictionary<string, NativeFSLock>();
+
+        /// <summary>
+        /// Given a lock name, return the full prefixed path of the actual lock file.
+        /// </summary>
+        /// <param name="lockName"></param>
+        /// <returns></returns>
+        private string GetPathOfLockFile(string lockName)
+        {
+            if (m_lockPrefix != null)
+            {
+                lockName = m_lockPrefix + "-" + lockName;
+            }
+            return Path.Combine(m_lockDir.FullName, lockName);
+        }
 
         public override Lock MakeLock(string lockName)
         {
-            var path = new DirectoryInfo(System.IO.Path.Combine(m_lockDir.FullName, lockName));
-            return _locks.GetOrAdd(path.FullName, s => new Lazy<NativeFSLock>(() => new NativeFSLock(this, m_lockDir, s))).Value;
+            var path = GetPathOfLockFile(lockName);
+            NativeFSLock l;
+            lock (_locks)
+                if (!_locks.TryGetValue(path, out l))
+                    _locks.Add(path, l = new NativeFSLock(this, m_lockDir, path));
+            return l;
         }
 
         public override void ClearLock(string lockName)
         {
-            using (var _ = MakeLock(lockName)) { }
+            var path = GetPathOfLockFile(lockName);
+            NativeFSLock l;
+            // this is the reason why we can't use ConcurrentDictionary: we need the removal and disposal of the lock to be atomic
+            // otherwise it may clash with MakeLock making a lock and ClearLock disposing of it in another thread.
+            lock (_locks)
+                if (_locks.TryGetValue(path, out l))
+                {
+                    _locks.Remove(path);
+                    l.Dispose();
+                }
         }
     }
 
     internal class NativeFSLock : Lock
     {
+#if FEATURE_FILESTREAM_LOCK
+        private const int ERROR_LOCK_VIOLATION = 0x21;
+#else
+        private const int ERROR_SHARE_VIOLATION = 0x20;
+#endif
+
         private readonly NativeFSLockFactory outerInstance;
 
         private FileStream channel;
-        private readonly DirectoryInfo path;
+        private readonly string path;
         private readonly DirectoryInfo lockDir;
 
-        public NativeFSLock(NativeFSLockFactory outerInstance, DirectoryInfo lockDir, string lockFileName)
+        public NativeFSLock(NativeFSLockFactory outerInstance, DirectoryInfo lockDir, string path)
         {
             this.outerInstance = outerInstance;
             this.lockDir = lockDir;
-            path = new DirectoryInfo(System.IO.Path.Combine(lockDir.FullName, lockFileName));
+            this.path = path;
+        }
+
+        /// <summary>
+        /// Return true if the <see cref="IOException"/> is the result of a lock violation
+        /// </summary>
+        /// <param name="e"></param>
+        /// <returns></returns>
+        private static bool IsLockOrShareViolation(IOException e)
+        {
+            var result = e.HResult & 0xFFFF;
+#if FEATURE_FILESTREAM_LOCK
+            return result == ERROR_LOCK_VIOLATION;
+#else
+            return result == ERROR_SHARE_VIOLATION;
+#endif
+        }
+
+        private FileStream GetLockFileStream(FileMode mode)
+        {
+            if (!System.IO.Directory.Exists(lockDir.FullName))
+            {
+                try
+                {
+                    System.IO.Directory.CreateDirectory(lockDir.FullName);
+                }
+                catch (Exception e)
+                {
+                    // note that several processes might have been trying to create the same directory at the same time.
+                    // if one succeeded, the directory will exist and the exception can be ignored. In all other cases we should report it.
+                    if (!System.IO.Directory.Exists(lockDir.FullName))
+                        throw new IOException("Cannot create directory: " + lockDir.FullName, e);
+                }
+            }
+            else if (File.Exists(lockDir.FullName))
+            {
+                throw new IOException("Found regular file where directory expected: " + lockDir.FullName);
+            }
+
+#if FEATURE_FILESTREAM_LOCK
+            return new FileStream(path, mode, FileAccess.Write, FileShare.ReadWrite);
+#else
+            return new FileStream(path, mode, FileAccess.Write, FileShare.None, 1, mode == FileMode.Open ? FileOptions.None : FileOptions.DeleteOnClose);
+#endif
         }
 
         public override bool Obtain()
@@ -130,28 +206,48 @@ namespace Lucene.Net.Store
                     return false;
                 }
 
-                if (!System.IO.Directory.Exists(lockDir.FullName))
+#if FEATURE_FILESTREAM_LOCK
+                FileStream stream = null;
+                try
+                {
+                    stream = GetLockFileStream(FileMode.OpenOrCreate);
+                }
+                catch (IOException e)
+                {
+                    FailureReason = e;
+                }
+                // LUCENENET: UnauthorizedAccessException does not derive from IOException like in java
+                catch (UnauthorizedAccessException e)
+                {
+                    // On Windows, we can get intermittent "Access
+                    // Denied" here.  So, we treat this as failure to
+                    // acquire the lock, but, store the reason in case
+                    // there is in fact a real error case.
+                    FailureReason = e;
+                }
+
+                if (stream != null)
                 {
                     try
                     {
-                        System.IO.Directory.CreateDirectory(lockDir.FullName);
+                        stream.Lock(0, 1);
+                        // only assign the channel if the lock succeeds
+                        channel = stream;
                     }
-                    catch
+                    catch (Exception e)
                     {
-                        throw new IOException("Cannot create directory: " + lockDir.FullName);
+                        FailureReason = e;
+                        IOUtils.DisposeWhileHandlingException(stream);
                     }
                 }
-                else if (File.Exists(lockDir.FullName))
+#else
+                try
                 {
-                    throw new IOException("Found regular file where directory expected: " + lockDir.FullName);
+                    channel = GetLockFileStream(FileMode.OpenOrCreate);
                 }
-
-                var success = false;
-                try
+                catch (IOException e) when (IsLockOrShareViolation(e))
                 {
-                    channel = new FileStream(path.FullName, FileMode.Create, FileAccess.Write, FileShare.None);
-
-                    success = true;
+                    // no failure reason to be recorded, since this is the expected error if a lock exists
                 }
                 catch (IOException e)
                 {
@@ -166,15 +262,7 @@ namespace Lucene.Net.Store
                     // there is in fact a real error case.
                     FailureReason = e;
                 }
-                finally
-                {
-                    if (!success)
-                    {
-                        IOUtils.DisposeWhileHandlingException(channel);
-                        channel = null;
-                    }
-                }
-
+#endif
                 return channel != null;
             }
         }
@@ -187,28 +275,25 @@ namespace Lucene.Net.Store
                 {
                     if (channel != null)
                     {
-                        IOUtils.DisposeWhileHandlingException(channel);
-                        channel = null;
-
-                        bool tmpBool;
-                        if (File.Exists(path.FullName))
+                        try
                         {
-                            File.Delete(path.FullName);
-                            tmpBool = true;
+                            NativeFSLockFactory._locks.Remove(path);
                         }
-                        else if (System.IO.Directory.Exists(path.FullName))
+                        finally
                         {
-                            System.IO.Directory.Delete(path.FullName);
-                            tmpBool = true;
+                            IOUtils.DisposeWhileHandlingException(channel);
+                            channel = null;
                         }
-                        else
+#if FEATURE_FILESTREAM_LOCK
+                        // try to delete the file if we created it, but it's not an error if we can't.
+                        try
                         {
-                            tmpBool = false;
+                            File.Delete(path);
                         }
-                        if (!tmpBool)
+                        catch
                         {
-                            throw new LockReleaseFailedException("failed to delete " + path);
                         }
+#endif
                     }
                 }
             }
@@ -218,35 +303,32 @@ namespace Lucene.Net.Store
         {
             lock (this)
             {
-                // The test for is isLocked is not directly possible with native file locks:
-
                 // First a shortcut, if a lock reference in this instance is available
                 if (channel != null)
                 {
                     return true;
                 }
 
-                // Look if lock file is present; if not, there can definitely be no lock!
-                bool tmpBool;
-                if (System.IO.File.Exists(path.FullName))
-                    tmpBool = true;
-                else
-                    tmpBool = System.IO.Directory.Exists(path.FullName);
-                if (!tmpBool)
-                    return false;
-
-                // Try to obtain and release (if was locked) the lock
                 try
                 {
-                    bool obtained = Obtain();
-                    if (obtained)
+                    using (var stream = GetLockFileStream(FileMode.Open))
                     {
-                        Dispose();
+#if FEATURE_FILESTREAM_LOCK
+                        // try to find out if the file is locked by writing a byte. Note that we need to flush the stream to find out.
+                        stream.WriteByte(0);
+                        stream.Flush();   // this *may* throw an IOException if the file is locked, but...
+                                          // ... closing the stream is the real test
+#endif
                     }
-                    return !obtained;
+                    return false;
+                }
+                catch (IOException e) when (IsLockOrShareViolation(e))
+                {
+                    return true;
                 }
-                catch (IOException)
+                catch (FileNotFoundException)
                 {
+                    // if the file doesn't exists, there can be no lock active
                     return false;
                 }
             }
@@ -257,4 +339,4 @@ namespace Lucene.Net.Store
             return "NativeFSLock@" + path;
         }
     }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7e6b0bca/src/Lucene.Net/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/project.json b/src/Lucene.Net/project.json
index 9a38729..b820797 100644
--- a/src/Lucene.Net/project.json
+++ b/src/Lucene.Net/project.json
@@ -46,7 +46,8 @@
           "FEATURE_CLONEABLE",
           "FEATURE_CONCURRENTMERGESCHEDULER",
           "FEATURE_SERIALIZABLE",
-          "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM"
+          "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM",
+          "FEATURE_FILESTREAM_LOCK"
         ]
       }
     }

[3/4] lucenenet git commit: Squashed commit of the following:

Posted by ni...@apache.org.

Squashed commit of the following:

commit 480d8cf0f169258b1536d376fc63031de7fe9091
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Tue Jul 25 15:21:15 2017 +0700

    Fixed .NET Core compilation issues

commit dc336e73b7a45e822d74cd14228d728dc0d6bf22
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Tue Jul 25 14:50:09 2017 +0700

    Fixed up documentation comments

commit 25f40064828b32f3f30b688dd6a7a42ea5069b8b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Tue Jul 25 13:59:06 2017 +0700

    Integrated Collation into Lucene.Net.ICU

commit 4145820997b5329ce7116f9bd723e36a7e064a17
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Sat Jul 22 15:30:16 2017 +0700

    Lucene.Net.Analysis.ICU: Fixed all collation bugs, added tests for TestCharArrayIterator, TestICUNormalizer2Filter, and TestICUNormalizer2FilterFactory.

commit 2580df83fe6f5b04639279455fdf61568f34e451
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Fri Jul 21 02:58:37 2017 +0700

    WIP: Analysis.ICU

commit 7be4d43ae06663610d0ff321dcf93ae5cc8499b7
Author: Shad Storhaug <sh...@shadstorhaug.com>
Date:   Thu Jul 20 15:26:53 2017 +0700

    First poke at Analysis.ICU


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/a4989ea1
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/a4989ea1
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/a4989ea1

Branch: refs/heads/master
Commit: a4989ea194f4b73f5cd9dcb81b6ae4ef358588b9
Parents: 7e6b0bc
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Jul 25 15:22:31 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Jul 25 15:25:20 2017 +0700

----------------------------------------------------------------------
 .../Analysis/ICU/ICUFoldingFilter.cs            |  32 ++
 .../Analysis/ICU/ICUFoldingFilterFactory.cs     |  31 ++
 .../Analysis/ICU/ICUNormalizer2CharFilter.cs    | 235 +++++++++++++
 .../ICU/ICUNormalizer2CharFilterFactory.cs      |  60 ++++
 .../Analysis/ICU/ICUNormalizer2Filter.cs        |  60 ++++
 .../Analysis/ICU/ICUNormalizer2FilterFactory.cs |  59 ++++
 .../Analysis/ICU/ICUTransformFilter.cs          | 152 +++++++++
 .../Analysis/ICU/ICUTransformFilterFactory.cs   |  38 +++
 .../ICU/Segmentation/BreakIteratorWrapper.cs    | 166 ++++++++++
 .../ICU/Segmentation/CharArrayIterator.cs       | 134 ++++++++
 .../ICU/Segmentation/CompositeBreakIterator.cs  | 132 ++++++++
 .../Segmentation/DefaultICUTokenizerConfig.cs   | 127 +++++++
 .../Analysis/ICU/Segmentation/ICUTokenizer.cs   | 229 +++++++++++++
 .../ICU/Segmentation/ICUTokenizerConfig.cs      |  33 ++
 .../ICU/Segmentation/ICUTokenizerFactory.cs     | 139 ++++++++
 .../Analysis/ICU/Segmentation/ScriptIterator.cs | 206 ++++++++++++
 .../ICU/TokenAttributes/ScriptAttribute.cs      |  42 +++
 .../ICU/TokenAttributes/ScriptAttributeImpl.cs  |  80 +++++
 .../Collation/ICUCollationAttributeFactory.cs   |  75 +++++
 .../Collation/ICUCollationDocValuesField.cs     |  62 ++++
 .../Collation/ICUCollationKeyAnalyzer.cs        |  96 ++++++
 .../Collation/ICUCollationKeyFilter.cs          |  86 +++++
 .../Collation/ICUCollationKeyFilterFactory.cs   | 245 ++++++++++++++
 .../ICUCollatedTermAttributeImpl.cs             |  39 +++
 src/Lucene.Net.ICU/Lucene.Net.ICU.csproj        |  18 +
 .../Analysis/CollationTestBase.cs               |  12 +-
 src/Lucene.Net.TestFramework/project.json       |   1 +
 .../Icu/Segmentation/TestCharArrayIterator.cs   | 110 ++++++
 .../Analysis/Icu/TestICUNormalizer2Filter.cs    |  92 ++++++
 .../Icu/TestICUNormalizer2FilterFactory.cs      |  45 +++
 .../Collation/TestICUCollationDocValuesField.cs | 121 +++++++
 .../Collation/TestICUCollationKeyAnalyzer.cs    |  98 ++++++
 .../Collation/TestICUCollationKeyFilter.cs      | 101 ++++++
 .../TestICUCollationKeyFilterFactory.cs         | 331 +++++++++++++++++++
 .../Lucene.Net.Tests.ICU.csproj                 |  12 +
 src/Lucene.Net.Tests.ICU/project.json           |   1 +
 .../Support/StringBuilderExtensions.cs          |  23 ++
 37 files changed, 3516 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
new file mode 100644
index 0000000..4ca8278
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
@@ -0,0 +1,32 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public sealed class ICUFoldingFilter : ICUNormalizer2Filter
+//    {
+//        private static readonly Normalizer2 normalizer;
+
+//        /// <summary>
+//        /// Create a new ICUFoldingFilter on the specified input
+//        /// </summary>
+//        public ICUFoldingFilter(TokenStream input)
+//            : base(input, normalizer)
+//        {
+//        }
+
+//        static ICUFoldingFilter()
+//        {
+//            normalizer = Normalizer2.GetInstance(
+//                typeof(ICUFoldingFilter).Assembly.FindAndGetManifestResourceStream(typeof(ICUFoldingFilter), "utr30.nrm"),
+//                "utr30", Normalizer2.Mode.COMPOSE);
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
new file mode 100644
index 0000000..c25cf93
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
@@ -0,0 +1,31 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Lucene.Net.Analysis.Util;
+//using System;
+//using System.Collections.Generic;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUFoldingFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
+//    {
+//        /// <summary>Creates a new ICUFoldingFilterFactory</summary>
+//        public ICUFoldingFilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUFoldingFilter(input);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
new file mode 100644
index 0000000..4254298
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
@@ -0,0 +1,235 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Lucene.Net.Analysis.CharFilters;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    /// <summary>
+//    /// Normalize token text with ICU's <see cref="Normalizer2"/>.
+//    /// </summary>
+//    public sealed class ICUNormalizer2CharFilter : BaseCharFilter
+//    {
+//        private static readonly int IO_BUFFER_SIZE = 128;
+
+//        private readonly Normalizer2 normalizer;
+//        private readonly StringBuilder inputBuffer = new StringBuilder();
+//        private readonly StringBuilder resultBuffer = new StringBuilder();
+
+//        private bool inputFinished;
+//        private bool afterQuickCheckYes;
+//        private int checkedInputBoundary;
+//        private int charCount;
+
+
+//        /**
+//         * Create a new Normalizer2CharFilter that combines NFKC normalization, Case
+//         * Folding, and removes Default Ignorables (NFKC_Casefold)
+//         */
+//        public ICUNormalizer2CharFilter(TextReader input)
+//            : this(input, new Normalizer2(Icu.Normalizer.UNormalizationMode.UNORM_NFKC) /*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
+//        {
+//        }
+
+//        /**
+//         * Create a new Normalizer2CharFilter with the specified Normalizer2
+//         * @param in text
+//         * @param normalizer normalizer to use
+//         */
+//        public ICUNormalizer2CharFilter(TextReader input, Normalizer2 normalizer)
+//            : base(input)
+//        {
+//            if (normalizer == null)
+//            {
+//                throw new ArgumentNullException("normalizer");
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        public override int Read(char[] cbuf, int off, int len)
+//        {
+//            if (off < 0) throw new ArgumentException("off < 0");
+//            if (off >= cbuf.Length) throw new ArgumentException("off >= cbuf.length");
+//            if (len <= 0) throw new ArgumentException("len <= 0");
+
+//            while (!inputFinished || inputBuffer.Length > 0 || resultBuffer.Length > 0)
+//            {
+//                int retLen;
+
+//                if (resultBuffer.Length > 0)
+//                {
+//                    retLen = OutputFromResultBuffer(cbuf, off, len);
+//                    if (retLen > 0)
+//                    {
+//                        return retLen;
+//                    }
+//                }
+
+//                int resLen = ReadAndNormalizeFromInput();
+//                if (resLen > 0)
+//                {
+//                    retLen = OutputFromResultBuffer(cbuf, off, len);
+//                    if (retLen > 0)
+//                    {
+//                        return retLen;
+//                    }
+//                }
+
+//                ReadInputToBuffer();
+//            }
+
+//            return -1;
+//        }
+
+//        private readonly char[] tmpBuffer = new char[IO_BUFFER_SIZE];
+
+//        private int ReadInputToBuffer()
+//        {
+//            int len = m_input.Read(tmpBuffer, 0, tmpBuffer.Length);
+//            if (len == -1)
+//            {
+//                inputFinished = true;
+//                return 0;
+//            }
+//            inputBuffer.Append(tmpBuffer, 0, len);
+
+//            // if checkedInputBoundary was at the end of a buffer, we need to check that char again
+//            checkedInputBoundary = Math.Max(checkedInputBoundary - 1, 0);
+//            // this loop depends on 'isInert' (changes under normalization) but looks only at characters.
+//            // so we treat all surrogates as non-inert for simplicity
+//            if (normalizer.IsInert(tmpBuffer[len - 1]) && !char.IsSurrogate(tmpBuffer[len - 1]))
+//            {
+//                return len;
+//            }
+//            else return len + ReadInputToBuffer();
+//        }
+
+//        private int ReadAndNormalizeFromInput()
+//        {
+//            if (inputBuffer.Length <= 0)
+//            {
+//                afterQuickCheckYes = false;
+//                return 0;
+//            }
+//            if (!afterQuickCheckYes)
+//            {
+//                int resLen2 = ReadFromInputWhileSpanQuickCheckYes();
+//                afterQuickCheckYes = true;
+//                if (resLen2 > 0) return resLen2;
+//            }
+//            int resLen = ReadFromIoNormalizeUptoBoundary();
+//            if (resLen > 0)
+//            {
+//                afterQuickCheckYes = false;
+//            }
+//            return resLen;
+//        }
+
+//        private int ReadFromInputWhileSpanQuickCheckYes()
+//        {
+//            int end = normalizer.spanQuickCheckYes(inputBuffer);
+//            if (end > 0)
+//            {
+//                //resultBuffer.Append(inputBuffer.subSequence(0, end));
+//                resultBuffer.Append(inputBuffer.ToString(0, end));
+//                //inputBuffer.delete(0, end);
+//                inputBuffer.Remove(0, end);
+//                checkedInputBoundary = Math.Max(checkedInputBoundary - end, 0);
+//                charCount += end;
+//            }
+//            return end;
+//        }
+
+//        private int ReadFromIoNormalizeUptoBoundary()
+//        {
+//            // if there's no buffer to normalize, return 0
+//            if (inputBuffer.Length <= 0)
+//            {
+//                return 0;
+//            }
+
+//            bool foundBoundary = false;
+//            int bufLen = inputBuffer.Length;
+
+//            while (checkedInputBoundary <= bufLen - 1)
+//            {
+//                int charLen = Character.CharCount(inputBuffer.CodePointAt(checkedInputBoundary));
+//                checkedInputBoundary += charLen;
+//                if (checkedInputBoundary < bufLen && normalizer.HasBoundaryBefore(inputBuffer
+//                  .CodePointAt(checkedInputBoundary)))
+//                {
+//                    foundBoundary = true;
+//                    break;
+//                }
+//            }
+//            if (!foundBoundary && checkedInputBoundary >= bufLen && inputFinished)
+//            {
+//                foundBoundary = true;
+//                checkedInputBoundary = bufLen;
+//            }
+
+//            if (!foundBoundary)
+//            {
+//                return 0;
+//            }
+
+//            return NormalizeInputUpto(checkedInputBoundary);
+//        }
+
+//        private int NormalizeInputUpto(int length)
+//        {
+//            int destOrigLen = resultBuffer.Length;
+//            normalizer.NormalizeSecondAndAppend(resultBuffer, inputBuffer.ToString(0, length));
+//              //inputBuffer.SubSequence(0, length));
+
+//            //inputBuffer.Delete(0, length);
+//            inputBuffer.Remove(0, length);
+//            checkedInputBoundary = Math.Max(checkedInputBoundary - length, 0);
+//            int resultLength = resultBuffer.Length - destOrigLen;
+//            RecordOffsetDiff(length, resultLength);
+//            return resultLength;
+//        }
+
+//        private void RecordOffsetDiff(int inputLength, int outputLength)
+//        {
+//            if (inputLength == outputLength)
+//            {
+//                charCount += outputLength;
+//                return;
+//            }
+//            int diff = inputLength - outputLength;
+//            int cumuDiff = LastCumulativeDiff;
+//            if (diff < 0)
+//            {
+//                for (int i = 1; i <= -diff; ++i)
+//                {
+//                    AddOffCorrectMap(charCount + i, cumuDiff - i);
+//                }
+//            }
+//            else
+//            {
+//                AddOffCorrectMap(charCount + outputLength, cumuDiff + diff);
+//            }
+//            charCount += outputLength;
+//        }
+
+//        private int OutputFromResultBuffer(char[] cbuf, int begin, int len)
+//        {
+//            len = Math.Min(resultBuffer.Length, len);
+//            //resultBuffer.GetChars(0, len, cbuf, begin);
+//            resultBuffer.CopyTo(0, cbuf, begin, len);
+//            if (len > 0)
+//            {
+//                //resultBuffer.delete(0, len);
+//                resultBuffer.Remove(0, len);
+//            }
+//            return len;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
new file mode 100644
index 0000000..bd4cbe5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
@@ -0,0 +1,60 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2CharFilterFactory : CharFilterFactory, IMultiTermAwareComponent
+//    {
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>Creates a new ICUNormalizer2CharFilterFactory</summary>
+//        public ICUNormalizer2CharFilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            string name = Get(args, "name", "NFKC");
+//            //string name = Get(args, "name", "nfkc_cf");
+//            //string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");
+//            //Normalizer2 normalizer = Normalizer2.getInstance
+//            //    (null, name, "compose".Equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
+
+//            var mode = (Icu.Normalizer.UNormalizationMode)Enum.Parse(typeof(Icu.Normalizer.UNormalizationMode), "UNORM_" + name);
+//            Normalizer2 normalizer = new Normalizer2(mode);
+
+//            string filter = Get(args, "filter");
+//            if (filter != null)
+//            {
+//                //UnicodeSet set = new UnicodeSet(filter);
+//                var set = UnicodeSet.ToCharacters(filter);
+//                if (set.Any())
+//                {
+//                    //set.freeze();
+//                    normalizer = new FilteredNormalizer2(normalizer, set);
+//                }
+//            }
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        public override TextReader Create(TextReader input)
+//        {
+//            return new ICUNormalizer2CharFilter(input, normalizer);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
new file mode 100644
index 0000000..bca3d24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
@@ -0,0 +1,60 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.TokenAttributes;
+//using Lucene.Net.Support;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2Filter : TokenFilter
+//    {
+//        private readonly ICharTermAttribute termAtt;
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>
+//        /// Create a new <see cref="Normalizer2Filter"/> that combines NFKC normalization, Case
+//        /// Folding, and removes Default Ignorables (NFKC_Casefold)
+//        /// </summary>
+//        /// <param name="input"></param>
+//        public ICUNormalizer2Filter(TokenStream input)
+//            : this(input, new Normalizer2(Normalizer.UNormalizationMode.UNORM_NFKC) /*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
+//        {
+//        }
+
+//        /// <summary>
+//        /// Create a new <see cref="Normalizer2Filter"/> with the specified <see cref="Normalizer2"/>
+//        /// </summary>
+//        /// <param name="input">stream</param>
+//        /// <param name="normalizer">normalizer to use</param>
+//        public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer)
+//            : base(input)
+//        {
+//            this.normalizer = normalizer;
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+//        }
+
+//        public override sealed bool IncrementToken()
+//        {
+//            if (m_input.IncrementToken())
+//            {
+//                var term = termAtt.ToString();
+//                try
+//                {
+//                    if (!normalizer.IsNormalized(term))
+//                    {
+//                        termAtt.SetEmpty().Append(normalizer.Normalize(term));
+//                    }
+//                }
+//                catch (System.Exception ex)
+//                {
+
+//                }
+//                return true;
+//            }
+//            else
+//            {
+//                return false;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
new file mode 100644
index 0000000..c17fb7f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
@@ -0,0 +1,59 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2FilterFactory : TokenFilterFactory, IMultiTermAwareComponent
+//    {
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>Creates a new ICUNormalizer2FilterFactory</summary>
+//        public ICUNormalizer2FilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            string name = Get(args, "name", "NFKC");
+//            //string name = Get(args, "name", "nfkc_cf");
+//            //string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");
+
+//            var mode = (Normalizer.UNormalizationMode)Enum.Parse(typeof(Normalizer.UNormalizationMode), "UNORM_" + name);
+//            Normalizer2 normalizer = new Normalizer2(mode);
+
+//            //Normalizer2 normalizer = Normalizer2.getInstance
+//            //    (null, name, "compose".Equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
+
+//            string filter = Get(args, "filter");
+//            if (filter != null)
+//            {
+//                //UnicodeSet set = new UnicodeSet(filter);
+//                var set = UnicodeSet.ToCharacters(filter);
+//                if (set.Any())
+//                {
+//                    //set.freeze();
+//                    normalizer = new FilteredNormalizer2(normalizer, set);
+//                }
+//            }
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        // TODO: support custom normalization
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUNormalizer2Filter(input, normalizer);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
new file mode 100644
index 0000000..7f22c3d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
@@ -0,0 +1,152 @@
+// LUCENENET TODO: Port issues - missing Transliterator dependency from icu.net
+
+//using Lucene.Net.Analysis.TokenAttributes;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public sealed class ICUTransformFilter : TokenFilter
+//    {
+//        // Transliterator to transform the text
+//        private readonly Transliterator transform;
+
+//        // Reusable position object
+//        private readonly Transliterator.Position position = new Transliterator.Position();
+
+//        // term attribute, will be updated with transformed text.
+//        private readonly ICharTermAttribute termAtt;
+
+//        // Wraps a termAttribute around the replaceable interface.
+//        private readonly ReplaceableTermAttribute replaceableAttribute = new ReplaceableTermAttribute();
+
+//        /// <summary>
+//        /// Create a new ICUTransformFilter that transforms text on the given stream.
+//        /// </summary>
+//        /// <param name="input"><see cref="TokenStream"/> to filter.</param>
+//        /// <param name="transform">Transliterator to transform the text.</param>
+//        public ICUTransformFilter(TokenStream input, Transliterator transform)
+//            : base(input)
+//        {
+//            this.transform = transform;
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+
+//            /* 
+//             * This is cheating, but speeds things up a lot.
+//             * If we wanted to use pkg-private APIs we could probably do better.
+//             */
+//            if (transform.getFilter() == null && transform is com.ibm.icu.text.RuleBasedTransliterator)
+//            {
+//                UnicodeSet sourceSet = transform.getSourceSet();
+//                if (sourceSet != null && !sourceSet.isEmpty())
+//                    transform.setFilter(sourceSet);
+//            }
+//        }
+
+//        public override bool IncrementToken()
+//        {
+//            /*
+//             * Wrap around replaceable. clear the positions, and transliterate.
+//             */
+//            if (m_input.IncrementToken())
+//            {
+//                replaceableAttribute.SetText(termAtt);
+
+//                int length = termAtt.Length;
+//                position.start = 0;
+//                position.limit = length;
+//                position.contextStart = 0;
+//                position.contextLimit = length;
+
+//                transform.FilteredTransliterate(replaceableAttribute, position, false);
+//                return true;
+//            }
+//            else
+//            {
+//                return false;
+//            }
+//        }
+
+//        /// <summary>
+//        /// Wrap a <see cref="ICharTermAttribute"/> with the Replaceable API.
+//        /// </summary>
+//        private sealed class ReplaceableTermAttribute //: IReplaceable
+//        {
+//            private char[] buffer;
+//            private int length;
+//            private ICharTermAttribute token;
+
+//            public void SetText(ICharTermAttribute token)
+//            {
+//                this.token = token;
+//                this.buffer = token.Buffer;
+//                this.length = token.Length;
+//            }
+
+//            public int Char32At(int pos)
+//            {
+//                return UTF16.charAt(buffer, 0, length, pos);
+//            }
+
+//            public char CharAt(int pos)
+//            {
+//                return buffer[pos];
+//            }
+
+//            public void Copy(int start, int limit, int dest)
+//            {
+//                char[] text = new char[limit - start];
+//                GetChars(start, limit, text, 0);
+//                Replace(dest, dest, text, 0, limit - start);
+//            }
+
+//            public void GetChars(int srcStart, int srcLimit, char[] dst, int dstStart)
+//            {
+//                System.Array.Copy(buffer, srcStart, dst, dstStart, srcLimit - srcStart);
+//            }
+
+//            public bool HasMetaData
+//            {
+//                get { return false; }
+//            }
+
+//            public int Length
+//            {
+//                get { return length; }
+//            }
+
+//            public void Replace(int start, int limit, string text)
+//            {
+//                int charsLen = text.Length;
+//                int newLength = ShiftForReplace(start, limit, charsLen);
+//                // insert the replacement text
+//                //text.getChars(0, charsLen, buffer, start);
+//                text.CopyTo(0, buffer, start, charsLen);
+//                token.Length = (length = newLength);
+//            }
+
+//            public void Replace(int start, int limit, char[] text, int charsStart,
+//                int charsLen)
+//            {
+//                // shift text if necessary for the replacement
+//                int newLength = ShiftForReplace(start, limit, charsLen);
+//                // insert the replacement text
+//                System.Array.Copy(text, charsStart, buffer, start, charsLen);
+//                token.Length = (length = newLength);
+//            }
+
+//            /// <summary>shift text (if necessary) for a replacement operation</summary>
+//            private int ShiftForReplace(int start, int limit, int charsLen)
+//            {
+//                int replacementLength = limit - start;
+//                int newLength = length - replacementLength + charsLen;
+//                // resize if necessary
+//                if (newLength > length)
+//                    buffer = token.ResizeBuffer(newLength);
+//                // if the substring being replaced is longer or shorter than the
+//                // replacement, need to shift things around
+//                if (replacementLength != charsLen && limit < length)
+//                    System.Array.Copy(buffer, limit, buffer, start + charsLen, length - limit);
+//                return newLength;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
new file mode 100644
index 0000000..081ebf5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
@@ -0,0 +1,38 @@
+// LUCENENET TODO: Port issues - missing Transliterator dependency from icu.net
+
+//using Lucene.Net.Analysis.Util;
+//using System;
+//using System.Collections.Generic;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUTransformFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
+//    {
+//        private readonly Transliterator transliterator;
+
+//        // TODO: add support for custom rules
+//        /// <summary>Creates a new ICUTransformFilterFactory</summary>
+//        public ICUTransformFilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            string id = Require(args, "id");
+//            string direction = Get(args, "direction", new string[] { "forward", "reverse" }, "forward", false);
+//            int dir = "forward".Equals(direction) ? Transliterator.FORWARD : Transliterator.REVERSE;
+//            transliterator = Transliterator.getInstance(id, dir);
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUTransformFilter(input, transliterator);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
new file mode 100644
index 0000000..c124a88
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
@@ -0,0 +1,166 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Contain all the issues surrounding BreakIterators in ICU in one place.
+//    /// Basically this boils down to the fact that they aren't very friendly to any
+//    /// sort of OO design.
+//    /// <para/>
+//    /// http://bugs.icu-project.org/trac/ticket/5901: RBBI.getRuleStatus(), hoist to
+//    /// BreakIterator from RuleBasedBreakIterator
+//    /// <para/>
+//    /// DictionaryBasedBreakIterator is a subclass of RuleBasedBreakIterator, but
+//    /// doesn't actually behave as a subclass: it always returns 0 for
+//    /// getRuleStatus(): 
+//    /// http://bugs.icu-project.org/trac/ticket/4730: Thai RBBI, no boundary type
+//    /// tags
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    internal abstract class BreakIteratorWrapper
+//    {
+//        protected readonly CharArrayIterator textIterator = new CharArrayIterator();
+//        protected char[] text;
+//        protected int start;
+//        protected int length;
+
+//        public abstract int Next();
+//        public abstract int Current { get; }
+//        public abstract int GetRuleStatus();
+//        public abstract void SetText(CharacterIterator text);
+
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            this.start = start;
+//            this.length = length;
+//            textIterator.SetText(text, start, length);
+//            SetText(textIterator);
+//        }
+
+//        /**
+//         * If its a RuleBasedBreakIterator, the rule status can be used for token type. If its
+//         * any other BreakIterator, the rulestatus method is not available, so treat
+//         * it like a generic BreakIterator.
+//         */
+//        public static BreakIteratorWrapper Wrap(Icu.BreakIterator breakIterator)
+//        {
+//            if (breakIterator is Icu.RuleBasedBreakIterator)
+//                return new RBBIWrapper((Icu.RuleBasedBreakIterator)breakIterator);
+//            else
+//                return new BIWrapper(breakIterator);
+//        }
+
+//        /**
+//         * RuleBasedBreakIterator wrapper: RuleBasedBreakIterator (as long as its not
+//         * a DictionaryBasedBreakIterator) behaves correctly.
+//         */
+//        private sealed class RBBIWrapper : BreakIteratorWrapper
+//        {
+//            private readonly Icu.RuleBasedBreakIterator rbbi;
+
+//            internal RBBIWrapper(Icu.RuleBasedBreakIterator rbbi)
+//            {
+//                this.rbbi = rbbi;
+//            }
+
+//            public override int Current
+//            {
+//                get { return rbbi.Current; }
+//            }
+
+//            public override int GetRuleStatus()
+//            {
+//                return rbbi.GetRuleStatus();
+//            }
+
+//            public override int Next()
+//            {
+//                return rbbi.Next();
+//            }
+
+//            public override void SetText(CharacterIterator text)
+//            {
+//                rbbi.SetText(text);
+//            }
+//        }
+
+//        /**
+//         * Generic BreakIterator wrapper: Either the rulestatus method is not
+//         * available or always returns 0. Calculate a rulestatus here so it behaves
+//         * like RuleBasedBreakIterator.
+//         * 
+//         * Note: This is slower than RuleBasedBreakIterator.
+//         */
+//        private sealed class BIWrapper : BreakIteratorWrapper
+//        {
+//            private readonly Support.BreakIterator bi;
+//            private int status;
+
+//            internal BIWrapper(Support.BreakIterator bi)
+//            {
+//                this.bi = bi;
+//            }
+
+//            public override int Current
+//            {
+//                get { return bi.Current; }
+//            }
+
+//            public override int GetRuleStatus()
+//            {
+//                return status;
+//            }
+
+//            public override int Next()
+//            {
+//                int current = bi.Current;
+//                int next = bi.Next();
+//                status = CalcStatus(current, next);
+//                return next;
+//            }
+
+//            private int CalcStatus(int current, int next)
+//            {
+//                if (current == Support.BreakIterator.DONE || next == Support.BreakIterator.DONE)
+//                    return RuleBasedBreakIterator.WORD_NONE;
+
+//                int begin = start + current;
+//                int end = start + next;
+
+//                int codepoint;
+//                for (int i = begin; i < end; i += UTF16.getCharCount(codepoint))
+//                {
+//                    codepoint = UTF16.charAt(text, 0, end, begin);
+
+//                    if (UCharacter.isDigit(codepoint))
+//                        return RuleBasedBreakIterator.WORD_NUMBER;
+//                    else if (UCharacter.isLetter(codepoint))
+//                    {
+//                        // TODO: try to separately specify ideographic, kana? 
+//                        // [currently all bundled as letter for this case]
+//                        return RuleBasedBreakIterator.WORD_LETTER;
+//                    }
+//                }
+
+//                return RuleBasedBreakIterator.WORD_NONE;
+//            }
+
+//            public override void SetText(CharacterIterator text)
+//            {
+//                bi.SetText(text);
+//                status = RuleBasedBreakIterator.WORD_NONE;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
new file mode 100644
index 0000000..209d583
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
@@ -0,0 +1,134 @@
+using Lucene.Net.Support;
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Lucene.Net.Analysis.Icu.Segmentation
+{
+    /// <summary>
+    /// Wraps a char[] as CharacterIterator for processing with a BreakIterator
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    internal sealed class CharArrayIterator : CharacterIterator
+    {
+        private char[] array;
+        private int start;
+        private int index;
+        private int length;
+        private int limit;
+
+        [WritableArray]
+        [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
+        public char[] Text
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        public int Start
+        {
+            get { return start; }
+        }
+
+        public int Length
+        {
+            get { return length; }
+        }
+
+        /// <summary>
+        /// Set a new region of text to be examined by this iterator
+        /// </summary>
+        /// <param name="array">text buffer to examine</param>
+        /// <param name="start">offset into buffer</param>
+        /// <param name="length"> maximum length to examine</param>
+        public void SetText(char[] array, int start, int length)
+        {
+            this.array = array;
+            this.start = start;
+            this.index = start;
+            this.length = length;
+            this.limit = start + length;
+        }
+
+        public override char Current
+        {
+            get { return (index == limit) ? DONE : array[index]; }
+        }
+
+        public override char First()
+        {
+            index = start;
+            return Current;
+        }
+
+        public override int BeginIndex
+        {
+            get { return 0; }
+        }
+
+        public override int EndIndex
+        {
+            get { return length; }
+        }
+
+        public override int Index
+        {
+            get { return index - start; }
+        }
+
+        public override char Last()
+        {
+            index = (limit == start) ? limit : limit - 1;
+            return Current;
+        }
+
+        public override char Next()
+        {
+            if (++index >= limit)
+            {
+                index = limit;
+                return DONE;
+            }
+            else
+            {
+                return Current;
+            }
+        }
+
+        public override char Previous()
+        {
+            if (--index < start)
+            {
+                index = start;
+                return DONE;
+            }
+            else
+            {
+                return Current;
+            }
+        }
+
+        public override char SetIndex(int position)
+        {
+            if (position < BeginIndex || position > EndIndex)
+                throw new ArgumentException("Illegal Position: " + position);
+            index = start + position;
+            return Current;
+        }
+
+        public override string GetTextAsString()
+        {
+            return new string(array);
+        }
+
+        public override object Clone()
+        {
+            CharArrayIterator clone = new CharArrayIterator();
+            clone.SetText(array, start, length);
+            clone.index = index;
+            return clone;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
new file mode 100644
index 0000000..a004193
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
@@ -0,0 +1,132 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// An internal BreakIterator for multilingual text, following recommendations
+//    /// from: UAX #29: Unicode Text Segmentation. (http://unicode.org/reports/tr29/)
+//    /// <para/>
+//    /// See http://unicode.org/reports/tr29/#Tailoring for the motivation of this
+//    /// design.
+//    /// <para/>
+//    /// Text is first divided into script boundaries. The processing is then
+//    /// delegated to the appropriate break iterator for that specific script.
+//    /// <para/>
+//    /// This break iterator also allows you to retrieve the ISO 15924 script code
+//    /// associated with a piece of text.
+//    /// <para/>
+//    /// See also UAX #29, UTR #24
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    internal sealed class CompositeBreakIterator
+//    {
+//        private readonly ICUTokenizerConfig config;
+//        private readonly BreakIteratorWrapper[] wordBreakers = new BreakIteratorWrapper[UScript.CODE_LIMIT];
+
+//        private BreakIteratorWrapper rbbi;
+//        private readonly ScriptIterator scriptIterator;
+
+//        private char[] text;
+
+//        public CompositeBreakIterator(ICUTokenizerConfig config)
+//        {
+//            this.config = config;
+//            this.scriptIterator = new ScriptIterator(config.CombineCJ);
+//        }
+
+//        /**
+//         * Retrieve the next break position. If the RBBI range is exhausted within the
+//         * script boundary, examine the next script boundary.
+//         * 
+//         * @return the next break position or BreakIterator.DONE
+//         */
+//        public int Next()
+//        {
+//            int next = rbbi.Next();
+//            while (next == Support.BreakIterator.DONE && scriptIterator.Next())
+//            {
+//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
+//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
+//                    scriptIterator.GetScriptLimit() - scriptIterator.GetScriptStart());
+//                next = rbbi.Next();
+//            }
+//            return (next == Support.BreakIterator.DONE) ? Support.BreakIterator.DONE : next
+//                + scriptIterator.GetScriptStart();
+//        }
+
+//        /**
+//         * Retrieve the current break position.
+//         * 
+//         * @return the current break position or BreakIterator.DONE
+//         */
+//        public int Current
+//        {
+//            get
+//            {
+//                int current = rbbi.Current;
+//                return (current == Support.BreakIterator.DONE) ? Support.BreakIterator.DONE : current
+//                    + scriptIterator.GetScriptStart();
+//            }
+//        }
+
+//        /**
+//         * Retrieve the rule status code (token type) from the underlying break
+//         * iterator
+//         * 
+//         * @return rule status code (see RuleBasedBreakIterator constants)
+//         */
+//        public int GetRuleStatus()
+//        {
+//            return rbbi.GetRuleStatus();
+//        }
+
+//        /**
+//         * Retrieve the UScript script code for the current token. This code can be
+//         * decoded with UScript into a name or ISO 15924 code.
+//         * 
+//         * @return UScript script code for the current token.
+//         */
+//        public int GetScriptCode()
+//        {
+//            return scriptIterator.GetScriptCode();
+//        }
+
+//        /**
+//         * Set a new region of text to be examined by this iterator
+//         * 
+//         * @param text buffer of text
+//         * @param start offset into buffer
+//         * @param length maximum length to examine
+//         */
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            scriptIterator.SetText(text, start, length);
+//            if (scriptIterator.Next())
+//            {
+//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
+//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
+//                    scriptIterator.GetScriptLimit() - scriptIterator.GetScriptStart());
+//            }
+//            else
+//            {
+//                rbbi = GetBreakIterator(UScript.COMMON);
+//                rbbi.SetText(text, 0, 0);
+//            }
+//        }
+
+//        private BreakIteratorWrapper GetBreakIterator(int scriptCode)
+//        {
+//            if (wordBreakers[scriptCode] == null)
+//                wordBreakers[scriptCode] = BreakIteratorWrapper.Wrap(config.GetBreakIterator(scriptCode));
+//            return wordBreakers[scriptCode];
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
new file mode 100644
index 0000000..fc2a989
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
@@ -0,0 +1,127 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Standard;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Globalization;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Default <see cref="ICUTokenizerConfig"/> that is generally applicable
+//    /// to many languages.
+//    /// </summary>
+//    /// <remarks>
+//    /// Generally tokenizes Unicode text according to UAX#29 
+//    /// ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}), 
+//    /// but with the following tailorings:
+//    /// <list type="bullet">
+//    ///     <item><description>Thai, Lao, and CJK text is broken into words with a dictionary.</description></item>
+//    ///     <item><description>Myanmar, and Khmer text is broken into syllables based on custom BreakIterator rules.</description></item>
+//    /// </list>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </remarks>
+//    public class DefaultICUTokenizerConfig : ICUTokenizerConfig
+//    {
+//        /** Token type for words containing ideographic characters */
+//        public static readonly string WORD_IDEO = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
+//        /** Token type for words containing Japanese hiragana */
+//        public static readonly string WORD_HIRAGANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
+//        /** Token type for words containing Japanese katakana */
+//        public static readonly string WORD_KATAKANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
+//        /** Token type for words containing Korean hangul  */
+//        public static readonly string WORD_HANGUL = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
+//        /** Token type for words that contain letters */
+//        public static readonly string WORD_LETTER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
+//        /** Token type for words that appear to be numbers */
+//        public static readonly string WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
+
+//        /*
+//         * the default breakiterators in use. these can be expensive to
+//         * instantiate, cheap to clone.
+//         */
+//        // we keep the cjk breaking separate, thats because it cannot be customized (because dictionary
+//        // is only triggered when kind = WORD, but kind = LINE by default and we have no non-evil way to change it)
+//        private static readonly Icu.BreakIterator cjkBreakIterator = new Icu.RuleBasedBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new Locale()); //BreakIterator.getWordInstance(ULocale.ROOT);
+//                                                                                                                                                                                // the same as ROOT, except no dictionary segmentation for cjk
+//        private static readonly Icu.BreakIterator defaultBreakIterator =
+//            ReadBreakIterator("Default.brk");
+//        private static readonly Icu.BreakIterator khmerBreakIterator =
+//            ReadBreakIterator("Khmer.brk");
+//        private static readonly Icu.BreakIterator myanmarBreakIterator =
+//            ReadBreakIterator("Myanmar.brk");
+
+//        // TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
+//        private readonly bool cjkAsWords;
+
+//        /** 
+//         * Creates a new config. This object is lightweight, but the first
+//         * time the class is referenced, breakiterators will be initialized.
+//         * @param cjkAsWords true if cjk text should undergo dictionary-based segmentation, 
+//         *                   otherwise text will be segmented according to UAX#29 defaults.
+//         *                   If this is true, all Han+Hiragana+Katakana words will be tagged as
+//         *                   IDEOGRAPHIC.
+//         */
+//        public DefaultICUTokenizerConfig(bool cjkAsWords)
+//        {
+//            this.cjkAsWords = cjkAsWords;
+//        }
+
+//        public override bool CombineCJ
+//        {
+//            get { return cjkAsWords; }
+//        }
+
+//        public override Icu.BreakIterator GetBreakIterator(int script)
+//        {
+//            switch (script)
+//            {
+//                case UScript.KHMER: return (Icu.BreakIterator)khmerBreakIterator.Clone();
+//                case UScript.MYANMAR: return (Icu.BreakIterator)myanmarBreakIterator.Clone();
+//                case UScript.JAPANESE: return (Icu.BreakIterator)cjkBreakIterator.Clone();
+//                default: return (Icu.BreakIterator)defaultBreakIterator.Clone();
+//            }
+//        }
+
+//        public override string GetType(int script, int ruleStatus)
+//        {
+//            switch (ruleStatus)
+//            {
+//                case RuleBasedBreakIterator.WORD_IDEO:
+//                    return WORD_IDEO;
+//                case RuleBasedBreakIterator.WORD_KANA:
+//                    return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
+//                case RuleBasedBreakIterator.WORD_LETTER:
+//                    return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
+//                case RuleBasedBreakIterator.WORD_NUMBER:
+//                    return WORD_NUMBER;
+//                default: /* some other custom code */
+//                    return "<OTHER>";
+//            }
+//        }
+
+//        private static RuleBasedBreakIterator ReadBreakIterator(string filename)
+//        {
+//            Stream @is =
+//              typeof(DefaultICUTokenizerConfig).Assembly.FindAndGetManifestResourceStream(typeof(DefaultICUTokenizerConfig), filename);
+//            try
+//            {
+//                RuleBasedBreakIterator bi =
+//                    RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
+//                @is.Dispose();
+//                return bi;
+//            }
+//            catch (IOException e)
+//            {
+//                throw new Exception(e.ToString(), e);
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
new file mode 100644
index 0000000..7677c0c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
@@ -0,0 +1,229 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Analysis.ICU.TokenAttributes;
+//using Lucene.Net.Analysis.TokenAttributes;
+//using System;
+//using System.Collections.Generic;
+//using System.Diagnostics;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Breaks text into words according to UAX #29: Unicode Text Segmentation
+//    /// (http://www.unicode.org/reports/tr29/)
+//    /// <para/>
+//    /// Words are broken across script boundaries, then segmented according to
+//    /// the BreakIterator and typing provided by the <see cref="ICUTokenizerConfig"/>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    /// <seealso cref="ICUTokenizerConfig"/>
+//    public sealed class ICUTokenizer : Tokenizer
+//    {
+//        private static readonly int IOBUFFER = 4096;
+//        private readonly char[] buffer = new char[IOBUFFER];
+//        /** true length of text in the buffer */
+//        private int length = 0;
+//        /** length in buffer that can be evaluated safely, up to a safe end point */
+//        private int usableLength = 0;
+//        /** accumulated offset of previous buffers for this reader, for offsetAtt */
+//        private int offset = 0;
+
+//        private readonly CompositeBreakIterator breaker; /* tokenizes a char[] of text */
+//        private readonly ICUTokenizerConfig config;
+//        private readonly IOffsetAttribute offsetAtt;
+//        private readonly ICharTermAttribute termAtt;
+//        private readonly ITypeAttribute typeAtt;
+//        private readonly IScriptAttribute scriptAtt;
+
+//        /**
+//        * Construct a new ICUTokenizer that breaks text into words from the given
+//        * Reader.
+//        * <p>
+//        * The default script-specific handling is used.
+//        * <p>
+//        * The default attribute factory is used.
+//        * 
+//        * @param input Reader containing text to tokenize.
+//        * @see DefaultICUTokenizerConfig
+//        */
+//        public ICUTokenizer(TextReader input)
+//            : this(input, new DefaultICUTokenizerConfig(true))
+//        {
+//        }
+
+//        /**
+//         * Construct a new ICUTokenizer that breaks text into words from the given
+//         * Reader, using a tailored BreakIterator configuration.
+//         * <p>
+//         * The default attribute factory is used.
+//         *
+//         * @param input Reader containing text to tokenize.
+//         * @param config Tailored BreakIterator configuration 
+//         */
+//        public ICUTokenizer(TextReader input, ICUTokenizerConfig config)
+//            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config)
+//        {
+//        }
+
+//        /**
+//         * Construct a new ICUTokenizer that breaks text into words from the given
+//         * Reader, using a tailored BreakIterator configuration.
+//         *
+//         * @param factory AttributeFactory to use
+//         * @param input Reader containing text to tokenize.
+//         * @param config Tailored BreakIterator configuration 
+//         */
+//        public ICUTokenizer(AttributeFactory factory, TextReader input, ICUTokenizerConfig config)
+//            : base(factory, input)
+//        {
+//            this.config = config;
+//            breaker = new CompositeBreakIterator(config);
+
+//            this.offsetAtt = AddAttribute<IOffsetAttribute>();
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+//            this.typeAtt = AddAttribute<ITypeAttribute>();
+//            this.scriptAtt = AddAttribute<IScriptAttribute>();
+//        }
+
+
+//        public override bool IncrementToken()
+//        {
+//            ClearAttributes();
+//            if (length == 0)
+//                Refill();
+//            while (!IncrementTokenBuffer())
+//            {
+//                Refill();
+//                if (length <= 0) // no more bytes to read;
+//                    return false;
+//            }
+//            return true;
+//        }
+
+
+//        public override void Reset()
+//        {
+//            base.Reset();
+//            breaker.SetText(buffer, 0, 0);
+//            length = usableLength = offset = 0;
+//        }
+
+//        public override void End()
+//        {
+//            base.End();
+//            int finalOffset = (length < 0) ? offset : offset + length;
+//            offsetAtt.SetOffset(CorrectOffset(finalOffset), CorrectOffset(finalOffset));
+//        }
+
+//        /*
+//         * This tokenizes text based upon the longest matching rule, and because of 
+//         * this, isn't friendly to a Reader.
+//         * 
+//         * Text is read from the input stream in 4kB chunks. Within a 4kB chunk of
+//         * text, the last unambiguous break point is found (in this implementation:
+//         * white space character) Any remaining characters represent possible partial
+//         * words, so are appended to the front of the next chunk.
+//         * 
+//         * There is the possibility that there are no unambiguous break points within
+//         * an entire 4kB chunk of text (binary data). So there is a maximum word limit
+//         * of 4kB since it will not try to grow the buffer in this case.
+//         */
+
+//        /**
+//         * Returns the last unambiguous break position in the text.
+//         * 
+//         * @return position of character, or -1 if one does not exist
+//         */
+//        private int FindSafeEnd()
+//        {
+//            for (int i = length - 1; i >= 0; i--)
+//                if (char.IsWhiteSpace(buffer[i]))
+//                    return i + 1;
+//            return -1;
+//        }
+
+//        /**
+//         * Refill the buffer, accumulating the offset and setting usableLength to the
+//         * last unambiguous break position
+//         * 
+//         * @throws IOException If there is a low-level I/O error.
+//         */
+//        private void Refill()
+//        {
+//            offset += usableLength;
+//            int leftover = length - usableLength;
+//            System.Array.Copy(buffer, usableLength, buffer, 0, leftover);
+//            int requested = buffer.Length - leftover;
+//            int returned = Read(m_input, buffer, leftover, requested);
+//            length = returned + leftover;
+//            if (returned < requested) /* reader has been emptied, process the rest */
+//                usableLength = length;
+//            else
+//            { /* still more data to be read, find a safe-stopping place */
+//                usableLength = FindSafeEnd();
+//                if (usableLength < 0)
+//                    usableLength = length; /*
+//                                * more than IOBUFFER of text without space,
+//                                * gonna possibly truncate tokens
+//                                */
+//            }
+
+//            breaker.SetText(buffer, 0, Math.Max(0, usableLength));
+//        }
+
+//        // TODO: refactor to a shared readFully somewhere
+//        // (NGramTokenizer does this too):
+//        /** commons-io's readFully, but without bugs if offset != 0 */
+//        private static int Read(TextReader input, char[] buffer, int offset, int length)
+//        {
+//            Debug.Assert(length >= 0, "length must not be negative: " + length);
+
+//            int remaining = length;
+//            while (remaining > 0)
+//            {
+//                int location = length - remaining;
+//                int count = input.Read(buffer, offset + location, remaining);
+//                if (-1 == count)
+//                { // EOF
+//                    break;
+//                }
+//                remaining -= count;
+//            }
+//            return length - remaining;
+//        }
+
+//        /*
+//         * return true if there is a token from the buffer, or null if it is
+//         * exhausted.
+//         */
+//        private bool IncrementTokenBuffer()
+//        {
+//            int start = breaker.Current;
+//            if (start == Support.BreakIterator.DONE)
+//                return false; // BreakIterator exhausted
+
+//            // find the next set of boundaries, skipping over non-tokens (rule status 0)
+//            int end = breaker.Next();
+//            while (start != Support.BreakIterator.DONE && breaker.GetRuleStatus() == 0)
+//            {
+//                start = end;
+//                end = breaker.Next();
+//            }
+
+//            if (start == Support.BreakIterator.DONE)
+//                return false; // BreakIterator exhausted
+
+//            termAtt.CopyBuffer(buffer, start, end - start);
+//            offsetAtt.SetOffset(CorrectOffset(offset + start), CorrectOffset(offset + end));
+//            typeAtt.Type = config.GetType(breaker.GetScriptCode(), breaker.GetRuleStatus());
+//            scriptAtt.Code = breaker.GetScriptCode();
+
+//            return true;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
new file mode 100644
index 0000000..0c13316
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
@@ -0,0 +1,33 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Support;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Class that allows for tailored Unicode Text Segmentation on
+//    /// a per-writing system basis.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public abstract class ICUTokenizerConfig
+//    {
+//        /// <summary>
+//        /// Sole constructor. (For invocation by subclass 
+//        /// constructors, typically implicit.)
+//        /// </summary>
+//        public ICUTokenizerConfig() { }
+//        /// <summary>
+//        /// Return a breakiterator capable of processing a given script.
+//        /// </summary>
+//        public abstract Icu.BreakIterator GetBreakIterator(int script);
+//        /// <summary>
+//        /// Return a token type value for a given script and BreakIterator rule status.
+//        /// </summary>
+//        public abstract string GetType(int script, int ruleStatus);
+//        /// <summary>
+//        /// true if Han, Hiragana, and Katakana scripts should all be returned as Japanese
+//        /// </summary>
+//        public abstract bool CombineCJ { get; }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
new file mode 100644
index 0000000..14aa9c0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
@@ -0,0 +1,139 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using Lucene.Net.Util;
+//using System;
+//using System.Collections.Generic;
+//using System.Diagnostics;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+
+//    public class ICUTokenizerFactory : TokenizerFactory, IResourceLoaderAware
+//    {
+//        internal static readonly string RULEFILES = "rulefiles";
+//        private readonly IDictionary<int, string> tailored;
+//        private ICUTokenizerConfig config;
+//        private readonly bool cjkAsWords;
+
+//        /// <summary>Creates a new ICUTokenizerFactory</summary>
+//        public ICUTokenizerFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            tailored = new Dictionary<int, string>();
+//            string rulefilesArg = Get(args, RULEFILES);
+//            if (rulefilesArg != null)
+//            {
+//                IList<string> scriptAndResourcePaths = SplitFileNames(rulefilesArg);
+//                foreach (string scriptAndResourcePath in scriptAndResourcePaths)
+//                {
+//                    int colonPos = scriptAndResourcePath.IndexOf(":");
+//                    string scriptCode = scriptAndResourcePath.Substring(0, colonPos - 0).Trim();
+//                    string resourcePath = scriptAndResourcePath.Substring(colonPos + 1).Trim();
+//                    tailored[UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode)] = resourcePath;
+//                }
+//            }
+//            cjkAsWords = GetBoolean(args, "cjkAsWords", true);
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public virtual void Inform(IResourceLoader loader)
+//        {
+//            Debug.Assert(tailored != null, "init must be called first!");
+//            if (tailored.Count == 0)
+//            {
+//                config = new DefaultICUTokenizerConfig(cjkAsWords);
+//            }
+//            else
+//            {
+//                config = new DefaultICUTokenizerConfigAnonymousHelper(cjkAsWords, tailored, loader);
+
+//                //BreakIterator[] breakers = new BreakIterator[UScript.CODE_LIMIT];
+//                //foreach (var entry in tailored)
+//                //{
+//                //    int code = entry.Key;
+//                //    string resourcePath = entry.Value;
+//                //    breakers[code] = ParseRules(resourcePath, loader);
+//                //}
+//                //            config = new DefaultICUTokenizerConfig(cjkAsWords)
+//                //            {
+
+//                //    public override BreakIterator GetBreakIterator(int script)
+//                //    {
+//                //        if (breakers[script] != null)
+//                //        {
+//                //            return (BreakIterator)breakers[script].clone();
+//                //        }
+//                //        else
+//                //        {
+//                //            return base.GetBreakIterator(script);
+//                //        }
+//                //    }
+//                //    // TODO: we could also allow codes->types mapping
+//                //};
+//            }
+//        }
+
+//        private class DefaultICUTokenizerConfigAnonymousHelper : DefaultICUTokenizerConfig
+//        {
+//            private readonly Icu.BreakIterator[] breakers;
+//            public DefaultICUTokenizerConfigAnonymousHelper(bool cjkAsWords, IDictionary<int, string> tailored, IResourceLoader loader)
+//                : base(cjkAsWords)
+//            {
+//                breakers = new Icu.BreakIterator[UScript.CODE_LIMIT];
+//                foreach (var entry in tailored)
+//                {
+//                    int code = entry.Key;
+//                    string resourcePath = entry.Value;
+//                    breakers[code] = ParseRules(resourcePath, loader);
+//                }
+//            }
+
+//            public override Icu.BreakIterator GetBreakIterator(int script)
+//            {
+//                if (breakers[script] != null)
+//                {
+//                    return (Icu.BreakIterator)breakers[script].Clone();
+//                }
+//                else
+//                {
+//                    return base.GetBreakIterator(script);
+//                }
+//            }
+
+//            private Icu.BreakIterator ParseRules(string filename, IResourceLoader loader)
+//            {
+//                StringBuilder rules = new StringBuilder();
+//                Stream rulesStream = loader.OpenResource(filename);
+//                using (TextReader reader = IOUtils.GetDecodingReader(rulesStream, Encoding.UTF8))
+//                {
+//                    string line = null;
+//                    while ((line = reader.ReadLine()) != null)
+//                    {
+//                        if (!line.StartsWith("#", StringComparison.Ordinal))
+//                        {
+//                            rules.Append(line);
+//                        }
+//                        rules.Append('\n');
+//                    }
+//                }
+//                return new RuleBasedBreakIterator(rules.ToString());
+//            }
+//        }
+
+//        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+//        {
+//            Debug.Assert(config != null, "inform must be called first!");
+//            return new ICUTokenizer(factory, input, config);
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
new file mode 100644
index 0000000..f328851
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
@@ -0,0 +1,206 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Text.RegularExpressions;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// An iterator that locates ISO 15924 script boundaries in text. 
+//    /// </summary>
+//    /// <remarks>
+//    /// This is not the same as simply looking at the Unicode block, or even the 
+//    /// Script property. Some characters are 'common' across multiple scripts, and
+//    /// some 'inherit' the script value of text surrounding them.
+//    /// <para/>
+//    /// This is similar to ICU (internal-only) UScriptRun, with the following
+//    /// differences:
+//    /// <list type="bullet">
+//    ///     <item><description>
+//    ///         Doesn't attempt to match paired punctuation. For tokenization purposes, this
+//    ///         is not necessary. Its also quite expensive. 
+//    ///     </description></item>
+//    ///     <item><description>
+//    ///         Non-spacing marks inherit the script of their base character, following 
+//    ///         recommendations from UTR #24.
+//    ///     </description></item>
+//    /// </list>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </remarks>
+//    internal sealed class ScriptIterator
+//    {
+//        private char[] text;
+//        private int start;
+//        private int limit;
+//        private int index;
+
+//        private int scriptStart;
+//        private int scriptLimit;
+//        private int scriptCode;
+
+//        private readonly bool combineCJ;
+
+//        /**
+//         * @param combineCJ if true: Han,Hiragana,Katakana will all return as {@link UScript#JAPANESE}
+//         */
+//        internal ScriptIterator(bool combineCJ)
+//        {
+//            this.combineCJ = combineCJ;
+//        }
+
+//        /**
+//         * Get the start of this script run
+//         * 
+//         * @return start position of script run
+//         */
+//        public int ScriptStart
+//        {
+//            get { return scriptStart; }
+//        }
+
+//        /**
+//         * Get the index of the first character after the end of this script run
+//         * 
+//         * @return position of the first character after this script run
+//         */
+//        public int ScriptLimit
+//        {
+//            get { return scriptLimit; }
+//        }
+
+//        /**
+//         * Get the UScript script code for this script run
+//         * 
+//         * @return code for the script of the current run
+//         */
+//        public int ScriptCode
+//        {
+//            get { return scriptCode; }
+//        }
+
+//        /**
+//         * Iterates to the next script run, returning true if one exists.
+//         * 
+//         * @return true if there is another script run, false otherwise.
+//         */
+//        public bool Next()
+//        {
+//            if (scriptLimit >= limit)
+//                return false;
+
+//            scriptCode = UScript.COMMON;
+//            scriptStart = scriptLimit;
+
+//            while (index < limit)
+//            {
+//                //int ch = UTF16.charAt(text, start, limit, index - start);
+//                int ch = Encoding.Unicode.(text, start, limit);
+//                int sc = GetScript(ch);
+
+//                /*
+//                 * From UTR #24: Implementations that determine the boundaries between
+//                 * characters of given scripts should never break between a non-spacing
+//                 * mark and its base character. Thus for boundary determinations and
+//                 * similar sorts of processing, a non-spacing mark — whatever its script
+//                 * value — should inherit the script value of its base character.
+//                 */
+//                if (isSameScript(scriptCode, sc)
+//                    || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK)
+//                {
+//                    //index += UTF16.getCharCount(ch);
+//                    index += Encoding.Unicode.GetCharCount()
+
+//                    /*
+//                     * Inherited or Common becomes the script code of the surrounding text.
+//                     */
+//                    if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED)
+//                    {
+//                        scriptCode = sc;
+//                    }
+
+//                }
+//                else
+//                {
+//                    break;
+//                }
+//            }
+
+//            scriptLimit = index;
+//            return true;
+//        }
+
+//        /** Determine if two scripts are compatible. */
+//        private static bool IsSameScript(int scriptOne, int scriptTwo)
+//        {
+//            return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED
+//                || scriptOne == scriptTwo;
+//        }
+
+//        /**
+//         * Set a new region of text to be examined by this iterator
+//         * 
+//         * @param text text buffer to examine
+//         * @param start offset into buffer
+//         * @param length maximum length to examine
+//         */
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            this.start = start;
+//            this.index = start;
+//            this.limit = start + length;
+//            this.scriptStart = start;
+//            this.scriptLimit = start;
+//            this.scriptCode = UScript.INVALID_CODE;
+//        }
+
+//        /** linear fast-path for basic latin case */
+//        private static readonly int[] basicLatin = new int[128];
+
+//        static ScriptIterator()
+//        {
+//            for (int i = 0; i < basicLatin.Length; i++)
+//                basicLatin[i] = UScript.GetScript(i);
+//        }
+
+//        /** fast version of UScript.getScript(). Basic Latin is an array lookup */
+//        private int GetScript(int codepoint)
+//        {
+//            if (0 <= codepoint && codepoint < basicLatin.Length)
+//            {
+//                return basicLatin[codepoint];
+//            }
+//            else
+//            {
+//                //int script = UScript.GetScript(codepoint);
+//                if (combineCJ)
+//                {
+//                    if (Regex.IsMatch(new string(Support.Character.ToChars(codepoint)), @"\p{IsHangulCompatibilityJamo}+|\p{IsHiragana}+|\p{IsKatakana}+"))
+//                    //if (script == UScript.HAN || script == UScript.HIRAGANA || script == UScript.KATAKANA)
+//                    {
+//                        return UScript.JAPANESE;
+//                    }
+//                    else if (codepoint >= 0xFF10 && codepoint <= 0xFF19)
+//                    {
+//                        // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise
+//                        // they are treated as punctuation. we currently have no cleaner way to fix this!
+//                        return UScript.LATIN;
+//                    }
+//                    else
+//                    {
+//                        return script;
+//                    }
+//                }
+//                else
+//                {
+//                    return script;
+//                }
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
new file mode 100644
index 0000000..abc1ae2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
@@ -0,0 +1,42 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Util;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.TokenAttributes
+//{
+//    /// <summary>
+//    /// This attribute stores the UTR #24 script value for a token of text.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public interface IScriptAttribute : IAttribute
+//    {
+//        /**
+//   * Get the numeric code for this script value.
+//   * This is the constant value from {@link UScript}.
+//   * @return numeric code
+//   */
+//        int Code { get; set; }
+//        ///**
+//        // * Set the numeric code for this script value.
+//        // * This is the constant value from {@link UScript}.
+//        // * @param code numeric code
+//        // */
+//        //public void setCode(int code);
+//        /**
+//         * Get the full name.
+//         * @return UTR #24 full name.
+//         */
+//        string GetName();
+//        /**
+//         * Get the abbreviated name.
+//         * @return UTR #24 abbreviated name.
+//         */
+//        string GetShortName();
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
new file mode 100644
index 0000000..6fa4512
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
@@ -0,0 +1,80 @@
+// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Util;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.TokenAttributes
+//{
+//    /// <summary>
+//    /// Implementation of <see cref="IScriptAttribute"/> that stores the script
+//    /// as an integer.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public class ScriptAttribute : Attribute, IScriptAttribute
+//    {
+//        private int code = UScript.COMMON;
+
+//        /** Initializes this attribute with <code>UScript.COMMON</code> */
+//        public ScriptAttribute() { }
+
+//        public virtual int Code
+//        {
+//            get { return code; }
+//            set { code = value; }
+//        }
+
+//        public virtual string GetName()
+//        {
+//            return UScript.GetName(code);
+//        }
+
+//        public virtual string GetShortName()
+//        {
+//            return UScript.GetShortName(code);
+//        }
+
+//        public override void Clear()
+//        {
+//            code = UScript.COMMON;
+//        }
+
+//        public override void CopyTo(IAttribute target)
+//        {
+//            ScriptAttribute t = (ScriptAttribute)target;
+//            t.Code = code;
+//        }
+
+//        public override bool Equals(object other)
+//        {
+//            if (this == other)
+//            {
+//                return true;
+//            }
+
+//            if (other is ScriptAttribute)
+//            {
+//                return ((ScriptAttribute)other).code == code;
+//            }
+
+//            return false;
+//        }
+
+//        public override int GetHashCode()
+//        {
+//            return code;
+//        }
+
+//        public override void ReflectWith(IAttributeReflector reflector)
+//        {
+//            // when wordbreaking CJK, we use the 15924 code Japanese (Han+Hiragana+Katakana) to 
+//            // mark runs of Chinese/Japanese. our use is correct (as for chinese Han is a subset), 
+//            // but this is just to help prevent confusion.
+//            string name = code == UScript.JAPANESE ? "Chinese/Japanese" : GetName();
+//            reflector.Reflect<IScriptAttribute>("script", name);
+//        }
+//    }
+//}

[2/4] lucenenet git commit: Squashed commit of the following:

Posted by ni...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationAttributeFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationAttributeFactory.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationAttributeFactory.cs
new file mode 100644
index 0000000..ed4c7f9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationAttributeFactory.cs
@@ -0,0 +1,75 @@
+using Icu.Collation;
+using Lucene.Net.Collation.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System.Reflection;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// Converts each token into its <see cref="System.Globalization.SortKey"/>, and
+    /// then encodes bytes as an index term.
+    /// </summary>
+    /// <remarks>
+    /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+    /// index and query time -- <see cref="System.Globalization.SortKey"/>s are only comparable when produced by
+    /// the same <see cref="Collator"/>.  <see cref="RuleBasedCollator"/>s are 
+    /// independently versioned, so it is safe to search against stored
+    /// <see cref="System.Globalization.SortKey"/>s if the following are exactly the same (best practice is
+    /// to store this information with the index and check that they remain the
+    /// same at query time):
+    /// <para/>
+    /// <list type="number">
+    ///     <item><description>Collator version - see <see cref="Collator"/> Version</description></item>
+    ///     <item><description>The collation strength used - see <see cref="Collator.Strength"/></description></item>
+    /// </list>
+    /// <para/>
+    /// <see cref="System.Globalization.SortKey"/>s generated by ICU Collators are not compatible with those
+    /// generated by java.text.Collators.  Specifically, if you use 
+    /// <see cref="ICUCollationAttributeFactory"/> to generate index terms, do not use 
+    /// CollationAttributeFactory on the query side, or vice versa.
+    /// <para/>
+    /// <see cref="ICUCollationAttributeFactory"/> is significantly faster and generates significantly
+    /// shorter keys than CollationAttributeFactory.  See
+    /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+    /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+    /// generation timing and key length comparisons between ICU4J and
+    /// java.text.Collator over several languages.
+    /// </remarks>
+    [ExceptionToClassNameConvention]
+    public class ICUCollationAttributeFactory : AttributeSource.AttributeFactory
+    {
+        private readonly Collator collator;
+        private readonly AttributeSource.AttributeFactory @delegate;
+
+        /// <summary>
+        /// Create an <see cref="ICUCollationAttributeFactory"/>, using 
+        /// <see cref="AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY"/> as the
+        /// factory for all other attributes.
+        /// </summary>
+        /// <param name="collator"><see cref="System.Globalization.SortKey"/> generator</param>
+        public ICUCollationAttributeFactory(Collator collator)
+            : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator)
+        {
+        }
+
+        /// <summary>
+        /// Create an <see cref="ICUCollationAttributeFactory"/>, using the supplied Attribute 
+        /// Factory as the factory for all other attributes.
+        /// </summary>
+        /// <param name="delegate">Attribute Factory</param>
+        /// <param name="collator"><see cref="System.Globalization.SortKey"/> generator</param>
+        public ICUCollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator)
+        {
+            this.@delegate = @delegate;
+            this.collator = collator;
+        }
+
+        public override Util.Attribute CreateAttributeInstance<T>()
+        {
+            return typeof(T).GetTypeInfo().IsAssignableFrom(typeof(ICUCollatedTermAttribute))
+                ? new ICUCollatedTermAttribute(collator)
+                : @delegate.CreateAttributeInstance<T>();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationDocValuesField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationDocValuesField.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationDocValuesField.cs
new file mode 100644
index 0000000..bddc095
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationDocValuesField.cs
@@ -0,0 +1,62 @@
+using Icu.Collation;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System.Globalization;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// Indexes sort keys as a single-valued <see cref="SortedDocValuesField"/>.
+    /// </summary>
+    /// <remarks>
+    /// This is more efficient that <see cref="ICUCollationKeyAnalyzer"/> if the field 
+    /// only has one value: no uninversion is necessary to sort on the field, 
+    /// locale-sensitive range queries can still work via <see cref="Search.FieldCacheRangeFilter"/>, 
+    /// and the underlying data structures built at index-time are likely more efficient 
+    /// and use less memory than FieldCache.
+    /// </remarks>
+    [ExceptionToClassNameConvention]
+    public sealed class ICUCollationDocValuesField : Field
+    {
+        private readonly string name;
+        private readonly Collator collator;
+        private readonly BytesRef bytes = new BytesRef();
+        private SortKey key;
+
+        /// <summary>
+        /// Create a new <see cref="ICUCollationDocValuesField"/>.
+        /// <para/>
+        /// NOTE: you should not create a new one for each document, instead
+        /// just make one and reuse it during your indexing process, setting
+        /// the value via <see cref="SetStringValue(string)"/>.
+        /// </summary>
+        /// <param name="name">Field name.</param>
+        /// <param name="collator">Collator for generating collation keys.</param>
+        // TODO: can we make this trap-free? maybe just synchronize on the collator
+        // instead? 
+        public ICUCollationDocValuesField(string name, Collator collator)
+            : base(name, SortedDocValuesField.TYPE)
+        {
+            this.name = name;
+            this.collator = (Collator)collator.Clone();
+            m_fieldsData = bytes; // so wrong setters cannot be called
+        }
+
+        public override string Name
+        {
+            get
+            {
+                return name;
+            }
+        }
+
+        public override void SetStringValue(string value)
+        {
+            key = collator.GetSortKey(value);
+            bytes.Bytes = key.KeyData;
+            bytes.Offset = 0;
+            bytes.Length = key.KeyData.Length;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyAnalyzer.cs
new file mode 100644
index 0000000..3b9d7c4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyAnalyzer.cs
@@ -0,0 +1,96 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// Configures <see cref="KeywordTokenizer"/> with <see cref="ICUCollationAttributeFactory"/>.
+    /// </summary>
+    /// <remarks>
+    /// Converts the token into its <see cref="System.Globalization.SortKey"/>, and
+    /// then encodes the <see cref="System.Globalization.SortKey"/> either directly or with 
+    /// <see cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow it to
+    /// be stored as an index term.
+    /// <para/>
+    /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+    /// index and query time -- CollationKeys are only comparable when produced by
+    /// the same <see cref="Collator"/>.  <see cref="RuleBasedCollator"/>s are 
+    /// independently versioned, so it is safe to search against stored
+    /// <see cref="System.Globalization.SortKey"/>s if the following are exactly the same (best practice is
+    /// to store this information with the index and check that they remain the
+    /// same at query time):
+    /// <list type="number">
+    ///     <item><description>Collator version - see <see cref="Collator"/> Version</description></item>
+    ///     <item><description>The collation strength used - see <see cref="Collator.Strength"/></description></item>
+    /// </list>
+    /// <para/>
+    /// <see cref="System.Globalization.SortKey"/>s generated by ICU Collators are not compatible with those
+    /// generated by java.text.Collators.  Specifically, if you use 
+    /// <see cref="ICUCollationKeyAnalyzer"/> to generate index terms, do not use 
+    /// CollationKeyAnalyzer on the query side, or vice versa.
+    /// <para/>
+    /// ICUCollationKeyAnalyzer is significantly faster and generates significantly
+    /// shorter keys than CollationKeyAnalyzer.  See
+    /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+    /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+    /// generation timing and key length comparisons between ICU4J and
+    /// java.text.Collator over several languages.
+    /// <para/>
+    /// <a name="version"/>
+    /// You must specify the required <see cref="LuceneVersion"/>
+    /// compatibility when creating <see cref="ICUCollationKeyAnalyzer"/>:
+    /// <list type="bullet">
+    ///     <item><description>As of 4.0, <see cref="System.Globalization.SortKey"/>s are directly encoded as bytes. Previous
+    ///     versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>.</description></item>
+    /// </list>
+    /// </remarks>
+    [ExceptionToClassNameConvention]
+    public sealed class ICUCollationKeyAnalyzer : Analyzer
+    {
+        private readonly Collator collator;
+        private readonly ICUCollationAttributeFactory factory;
+        private readonly LuceneVersion matchVersion;
+
+        /// <summary>
+        /// Create a new <see cref="ICUCollationKeyAnalyzer"/>, using the specified <paramref name="collator"/>.
+        /// </summary>
+        /// <param name="matchVersion">See <see cref="ICUCollationKeyAnalyzer"/>.</param>
+        /// <param name="collator"><see cref="System.Globalization.SortKey"/> generator.</param>
+        public ICUCollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator)
+        {
+            this.matchVersion = matchVersion;
+            this.collator = collator;
+            this.factory = new ICUCollationAttributeFactory(collator);
+        }
+
+        [Obsolete("Use ICUCollationKeyAnalyzer.ICUCollationKeyAnalyzer(LuceneVersion, Collator) and specify a version instead. This ctor will be removed in Lucene 5.0")]
+        public ICUCollationKeyAnalyzer(Collator collator)
+            : this(LuceneVersion.LUCENE_31, collator)
+        {
+        }
+
+        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+#pragma warning disable 612, 618
+            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_40))
+#pragma warning restore 612, 618
+            {
+                KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            }
+            else
+            {
+                KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer,
+#pragma warning disable 612, 618
+                    new ICUCollationKeyFilter(tokenizer, collator));
+#pragma warning restore 612, 618
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs
new file mode 100644
index 0000000..e6c595a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs
@@ -0,0 +1,86 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// Converts each token into its <see cref="SortKey"/>, and
+    /// then encodes the <see cref="SortKey"/> with <see cref="IndexableBinaryStringTools"/>, to
+    /// allow it to be stored as an index term.
+    /// </summary>
+    /// <remarks>
+    /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+    /// index and query time -- CollationKeys are only comparable when produced by
+    /// the same <see cref="Collator"/>.  <see cref="RuleBasedCollator"/>s are 
+    /// independently versioned, so it is safe to search against stored
+    /// <see cref="System.Globalization.SortKey"/>s if the following are exactly the same (best practice is
+    /// to store this information with the index and check that they remain the
+    /// same at query time):
+    /// <list type="number">
+    ///     <item><description>Collator version - see <see cref="Collator"/> Version</description></item>
+    ///     <item><description>The collation strength used - see <see cref="Collator.Strength"/></description></item>
+    /// </list>
+    /// <para/>
+    /// <see cref="System.Globalization.SortKey"/>s generated by ICU Collators are not compatible with those
+    /// generated by java.text.Collators.  Specifically, if you use 
+    /// <see cref="ICUCollationKeyAnalyzer"/> to generate index terms, do not use 
+    /// CollationKeyAnalyzer on the query side, or vice versa.
+    /// <para/>
+    /// ICUCollationKeyAnalyzer is significantly faster and generates significantly
+    /// shorter keys than CollationKeyAnalyzer.  See
+    /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+    /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+    /// generation timing and key length comparisons between ICU4J and
+    /// java.text.Collator over several languages.
+    /// </remarks>
+    [Obsolete("Use ICUCollationAttributeFactory instead, which encodes terms directly as bytes. This filter will be removed in Lucene 5.0")]
+    [ExceptionToClassNameConvention]
+    public sealed class ICUCollationKeyFilter : TokenFilter
+    {
+        private Collator collator = null;
+        private SortKey reusableKey;
+        private readonly ICharTermAttribute termAtt;
+
+        /// <summary>
+        /// Creates a new <see cref="ICUCollationKeyFilter"/>.
+        /// </summary>
+        /// <param name="input">Source token stream.</param>
+        /// <param name="collator"><see cref="SortKey"/> generator.</param>
+        public ICUCollationKeyFilter(TokenStream input, Collator collator)
+            : base(input)
+        {
+            // clone the collator: see http://userguide.icu-project.org/collation/architecture
+            this.collator = (Collator)collator.Clone();
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (m_input.IncrementToken())
+            {
+                char[] termBuffer = termAtt.Buffer;
+                string termText = new string(termBuffer, 0, termAtt.Length);
+                reusableKey = collator.GetSortKey(termText);
+                int encodedLength = IndexableBinaryStringTools.GetEncodedLength(
+                    reusableKey.KeyData, 0, reusableKey.KeyData.Length);
+                if (encodedLength > termBuffer.Length)
+                {
+                    termAtt.ResizeBuffer(encodedLength);
+                }
+                termAtt.SetLength(encodedLength);
+                IndexableBinaryStringTools.Encode(reusableKey.KeyData, 0, reusableKey.KeyData.Length,
+                    termAtt.Buffer, 0, encodedLength);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilterFactory.cs
new file mode 100644
index 0000000..7ecf357
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilterFactory.cs
@@ -0,0 +1,245 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// Factory for <see cref="ICUCollationKeyFilter"/>.
+    /// </summary>
+    /// <remarks>
+    /// This factory can be created in two ways: 
+    /// <list type="bullet">
+    ///     <item><description>Based upon a system collator associated with a Locale.</description></item>
+    ///     <item><description>Based upon a tailored ruleset.</description></item>
+    /// </list>
+    /// <para/>
+    /// Using a System collator:
+    /// <list type="bullet">
+    ///     <item><description>locale: RFC 3066 locale ID (mandatory)</description></item>
+    ///     <item><description>strength: 'primary','secondary','tertiary', 'quaternary', or 'identical' (optional)</description></item>
+    ///     <item><description>decomposition: 'no', or 'canonical' (optional)</description></item>
+    /// </list>
+    /// <para/>
+    /// Using a Tailored ruleset:
+    /// <list type="bullet">
+    ///     <item><description>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)</description></item>
+    ///     <item><description>strength: 'primary','secondary','tertiary', 'quaternary', or 'identical' (optional)</description></item>
+    ///     <item><description>decomposition: 'no' or 'canonical' (optional)</description></item>
+    /// </list>
+    /// <para/>
+    /// Expert options:
+    /// <list type="bullet">
+    ///     <item><description>alternate: 'shifted' or 'non-ignorable'. Can be used to ignore punctuation/whitespace.</description></item>
+    ///     <item><description>caseLevel: 'true' or 'false'. Useful with strength=primary to ignore accents but not case.</description></item>
+    ///     <item><description>caseFirst: 'lower' or 'upper'. Useful to control which is sorted first when case is not ignored.</description></item>
+    ///     <item><description>numeric: 'true' or 'false'. Digits are sorted according to numeric value, e.g. foobar-9 sorts before foobar-10</description></item>
+    /// </list>
+    /// </remarks>
+    /// <seealso cref="Collator"/>
+    /// <seealso cref="RuleBasedCollator"/>
+    /// LUCENENET NOTE: variableTop is not supported by icu.net
+    [Obsolete("Use ICUCollationKeyAnalyzer instead.")]
+    [ExceptionToClassNameConvention]
+    public class ICUCollationKeyFilterFactory : TokenFilterFactory, IMultiTermAwareComponent, IResourceLoaderAware
+    {
+        private Collator collator;
+        private readonly string custom;
+        private readonly string localeID;
+        private readonly string strength;
+        private readonly string decomposition;
+
+        private readonly string alternate;
+        private readonly string caseLevel;
+        private readonly string caseFirst;
+        private readonly string numeric;
+        //private readonly string variableTop;
+
+        public ICUCollationKeyFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            custom = Get(args, "custom");
+            localeID = Get(args, "locale");
+            strength = Get(args, "strength");
+            decomposition = Get(args, "decomposition");
+
+            alternate = Get(args, "alternate");
+            caseLevel = Get(args, "caseLevel");
+            caseFirst = Get(args, "caseFirst");
+            numeric = Get(args, "numeric");
+
+            // LUCENENET TODO: variableTop is not supported by icu.net. Besides this,
+            // it is deprecated as of ICU 53 and has been superceded by maxVariable,
+            // but that feature is also not supported by icu.net at the time of this writing.
+            //variableTop = Get(args, "variableTop");
+
+            if (custom == null && localeID == null)
+                throw new ArgumentException("Either custom or locale is required.");
+
+            if (custom != null && localeID != null)
+                throw new ArgumentException("Cannot specify both locale and custom. "
+                    + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. "
+                    + "Then save the entire customized ruleset to a file, and use with the custom parameter");
+
+            if (args.Count != 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            if (localeID != null)
+            {
+                // create from a system collator, based on Locale.
+                collator = CreateFromLocale(localeID);
+            }
+            else
+            {
+                // create from a custom ruleset
+                collator = CreateFromRules(custom, loader);
+            }
+
+            // set the strength flag, otherwise it will be the default.
+            if (strength != null)
+            {
+                if (strength.Equals("primary", StringComparison.OrdinalIgnoreCase))
+                    collator.Strength = CollationStrength.Primary;
+                else if (strength.Equals("secondary", StringComparison.OrdinalIgnoreCase))
+                    collator.Strength = CollationStrength.Secondary;
+                else if (strength.Equals("tertiary", StringComparison.OrdinalIgnoreCase))
+                    collator.Strength = CollationStrength.Tertiary;
+                else if (strength.Equals("quaternary", StringComparison.OrdinalIgnoreCase))
+                    collator.Strength = CollationStrength.Quaternary;
+                else if (strength.Equals("identical", StringComparison.OrdinalIgnoreCase))
+                    collator.Strength = CollationStrength.Identical;
+                else
+                    throw new ArgumentException("Invalid strength: " + strength);
+            }
+
+            // set the decomposition flag, otherwise it will be the default.
+            if (decomposition != null)
+            {
+                if (decomposition.Equals("no", StringComparison.OrdinalIgnoreCase))
+                    collator.NormalizationMode = NormalizationMode.Off;  // (Collator.NO_DECOMPOSITION);
+                else if (decomposition.Equals("canonical", StringComparison.OrdinalIgnoreCase))
+                    collator.NormalizationMode = NormalizationMode.On;     //.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+                else
+                    throw new ArgumentException("Invalid decomposition: " + decomposition);
+            }
+
+            // expert options: concrete subclasses are always a RuleBasedCollator
+            RuleBasedCollator rbc = (RuleBasedCollator)collator;
+            if (alternate != null)
+            {
+                if (alternate.Equals("shifted", StringComparison.OrdinalIgnoreCase))
+                {
+                    rbc.AlternateHandling = AlternateHandling.Shifted;//  .setAlternateHandlingShifted(true);
+                }
+                else if (alternate.Equals("non-ignorable", StringComparison.OrdinalIgnoreCase))
+                {
+                    rbc.AlternateHandling = AlternateHandling.NonIgnorable; //.setAlternateHandlingShifted(false);
+                }
+                else
+                {
+                    throw new ArgumentException("Invalid alternate: " + alternate);
+                }
+            }
+            if (caseLevel != null)
+            {
+                rbc.CaseLevel = bool.Parse(caseLevel) ? CaseLevel.On : CaseLevel.Off; //  setCaseLevel(Boolean.parseBoolean(caseLevel));
+            }
+            if (caseFirst != null)
+            {
+                if (caseFirst.Equals("lower", StringComparison.OrdinalIgnoreCase))
+                {
+                    rbc.CaseFirst = CaseFirst.LowerFirst; //.setLowerCaseFirst(true);
+                }
+                else if (caseFirst.Equals("upper", StringComparison.OrdinalIgnoreCase))
+                {
+                    rbc.CaseFirst = CaseFirst.UpperFirst; //.setUpperCaseFirst(true);
+                }
+                else
+                {
+                    throw new ArgumentException("Invalid caseFirst: " + caseFirst);
+                }
+            }
+            if (numeric != null)
+            {
+                rbc.NumericCollation = bool.Parse(numeric) ? NumericCollation.On : NumericCollation.Off;   //.setNumericCollation(Boolean.parseBoolean(numeric));
+            }
+
+            // LUCENENET TODO: variableTop is not supported by icu.net. Besides this,
+            // it is deprecated as of ICU 53 and has been superceded by maxVariable,
+            // but that feature is also not supported by icu.net at the time of this writing.
+            //if (variableTop != null)
+            //{
+            //    rbc.setVariableTop(variableTop);
+            //}
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new ICUCollationKeyFilter(input, collator);
+        }
+
+        /// <summary>
+        /// Create a locale from <paramref name="localeID"/>.
+        /// Then return the appropriate collator for the locale.
+        /// </summary>
+        /// <param name="localeID"></param>
+        /// <returns>The appropriate collator for the locale.</returns>
+        private Collator CreateFromLocale(string localeID)
+        {
+            return Collator.Create(localeID);
+        }
+
+        /// <summary>
+        /// Read custom rules from a file, and create a <see cref="RuleBasedCollator"/>.
+        /// The file cannot support comments, as # might be in the rules!
+        /// </summary>
+        private Collator CreateFromRules(string fileName, IResourceLoader loader)
+        {
+            Stream input = null;
+            try
+            {
+                input = loader.OpenResource(fileName);
+                string rules = ToUTF8String(input);
+                return new RuleBasedCollator(rules);
+            }
+            catch (Exception e)
+            {
+                // io error or invalid rules
+                throw new Exception(e.ToString(), e);
+            }
+            finally
+            {
+                IOUtils.DisposeWhileHandlingException(input);
+            }
+        }
+
+        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+        {
+            return this;
+        }
+
+        private string ToUTF8String(Stream input)
+        {
+            StringBuilder sb = new StringBuilder();
+            char[] buffer = new char[1024];
+            TextReader r = IOUtils.GetDecodingReader(input, Encoding.UTF8);
+            int len = 0;
+            while ((len = r.Read(buffer, 0, buffer.Length)) > 0)
+            {
+                sb.Append(buffer, 0, len);
+            }
+            return sb.ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Analysis.ICU/Collation/TokenAttributes/ICUCollatedTermAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Collation/TokenAttributes/ICUCollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.ICU/Collation/TokenAttributes/ICUCollatedTermAttributeImpl.cs
new file mode 100644
index 0000000..ac1187e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Collation/TokenAttributes/ICUCollatedTermAttributeImpl.cs
@@ -0,0 +1,39 @@
+using Icu.Collation;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System.Globalization;
+
+namespace Lucene.Net.Collation.TokenAttributes
+{
+    /// <summary>
+    /// Extension of <see cref="CharTermAttribute"/> that encodes the term
+    /// text as a binary Unicode collation key instead of as UTF-8 bytes.
+    /// </summary>
+    [ExceptionToClassNameConvention]
+    public class ICUCollatedTermAttribute : CharTermAttribute
+    {
+        private readonly Collator collator;
+        //private readonly RawCollationKey key = new RawCollationKey();
+        private SortKey key;
+
+        /// <summary>
+        /// Create a new ICUCollatedTermAttribute
+        /// </summary>
+        /// <param name="collator"><see cref="SortKey"/> generator.</param>
+        public ICUCollatedTermAttribute(Collator collator)
+        {
+            // clone the collator: see http://userguide.icu-project.org/collation/architecture
+            this.collator = (Collator)collator.Clone();
+        }
+
+        public override void FillBytesRef()
+        {
+            BytesRef bytes = this.BytesRef;
+            key = collator.GetSortKey(ToString());
+            bytes.Bytes = key.KeyData;
+            bytes.Offset = 0;
+            bytes.Length = key.KeyData.Length;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
index b1510b9..079f5c1 100644
--- a/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
+++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
@@ -80,6 +80,24 @@
     <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs">
       <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link>
     </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\ICUCollationAttributeFactory.cs">
+      <Link>Collation\ICUCollationAttributeFactory.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\ICUCollationDocValuesField.cs">
+      <Link>Collation\ICUCollationDocValuesField.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\ICUCollationKeyAnalyzer.cs">
+      <Link>Collation\ICUCollationKeyAnalyzer.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\ICUCollationKeyFilter.cs">
+      <Link>Collation\ICUCollationKeyFilter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\ICUCollationKeyFilterFactory.cs">
+      <Link>Collation\ICUCollationKeyFilterFactory.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Analysis.ICU\Collation\TokenAttributes\ICUCollatedTermAttributeImpl.cs">
+      <Link>Collation\TokenAttributes\ICUCollatedTermAttributeImpl.cs</Link>
+    </Compile>
     <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs">
       <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link>
     </Compile>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.TestFramework/Analysis/CollationTestBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Analysis/CollationTestBase.cs b/src/Lucene.Net.TestFramework/Analysis/CollationTestBase.cs
index c079a6f..e8c6cdf 100644
--- a/src/Lucene.Net.TestFramework/Analysis/CollationTestBase.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/CollationTestBase.cs
@@ -1,10 +1,9 @@
-#if FEATURE_COLLATION
 using Icu.Collation;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.Search;
-using Lucene.Net.Support;
+using Lucene.Net.Support.Threading;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
@@ -55,7 +54,7 @@ namespace Lucene.Net.Analysis
         /// <returns> The encoded collation key for the original String </returns>
         /// @deprecated only for testing deprecated filters
         [Obsolete("only for testing deprecated filters")]
-		protected internal virtual string EncodeCollationKey(sbyte[] keyBits)
+		protected internal virtual string EncodeCollationKey(byte[] keyBits)
 		{
 			// Ensure that the backing char[] array is large enough to hold the encoded
 			// Binary String
@@ -268,7 +267,7 @@ namespace Lucene.Net.Analysis
 				}
 				finally
 				{
-					IOUtils.CloseWhileHandlingException(priorException, ts);
+					IOUtils.DisposeWhileHandlingException(priorException, ts);
 				}
 			}
 
@@ -328,7 +327,7 @@ namespace Lucene.Net.Analysis
 						}
 						finally
 						{
-							IOUtils.CloseWhileHandlingException(priorException, ts);
+							IOUtils.DisposeWhileHandlingException(priorException, ts);
 						}
 					}
 				}
@@ -339,5 +338,4 @@ namespace Lucene.Net.Analysis
 			}
 		}
 	}
-}
-#endif
\ No newline at end of file
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.TestFramework/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/project.json b/src/Lucene.Net.TestFramework/project.json
index 45e8d8a..ef35176 100644
--- a/src/Lucene.Net.TestFramework/project.json
+++ b/src/Lucene.Net.TestFramework/project.json
@@ -27,6 +27,7 @@
     }
   },
   "dependencies": {
+    "icu.net": "54.1.1-alpha",
     "Lucene.Net.Analysis.Common": "4.8.0",
     "Lucene.Net.Codecs": "4.8.0",
     "NUnit": "3.5.0"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/Segmentation/TestCharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/Segmentation/TestCharArrayIterator.cs b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/Segmentation/TestCharArrayIterator.cs
new file mode 100644
index 0000000..cccd20a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/Segmentation/TestCharArrayIterator.cs
@@ -0,0 +1,110 @@
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Icu.Segmentation
+{
+    public class TestCharArrayIterator : LuceneTestCase
+    {
+        [Test]
+        public void TestBasicUsage()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText("testing".toCharArray(), 0, "testing".Length);
+            assertEquals(0, ci.BeginIndex);
+            assertEquals(7, ci.EndIndex);
+            assertEquals(0, ci.Index);
+            assertEquals('t', ci.Current);
+            assertEquals('e', ci.Next());
+            assertEquals('g', ci.Last());
+            assertEquals('n', ci.Previous());
+            assertEquals('t', ci.First());
+            assertEquals(CharacterIterator.DONE, ci.Previous());
+        }
+
+        [Test]
+        public void TestFirst()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText("testing".toCharArray(), 0, "testing".Length);
+            ci.Next();
+            // Sets the position to getBeginIndex() and returns the character at that position. 
+            assertEquals('t', ci.First());
+            assertEquals(ci.BeginIndex, ci.Index);
+            // or DONE if the text is empty
+            ci.SetText(new char[] { }, 0, 0);
+            assertEquals(CharacterIterator.DONE, ci.First());
+        }
+
+        [Test]
+        public void TestLast()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText("testing".toCharArray(), 0, "testing".Length);
+            // Sets the position to getEndIndex()-1 (getEndIndex() if the text is empty) 
+            // and returns the character at that position. 
+            assertEquals('g', ci.Last());
+            assertEquals(ci.Index, ci.EndIndex - 1);
+            // or DONE if the text is empty
+            ci.SetText(new char[] { }, 0, 0);
+            assertEquals(CharacterIterator.DONE, ci.Last());
+            assertEquals(ci.EndIndex, ci.Index);
+        }
+
+        [Test]
+        public void TestCurrent()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            // Gets the character at the current position (as returned by getIndex()). 
+            ci.SetText("testing".toCharArray(), 0, "testing".Length);
+            assertEquals('t', ci.Current);
+            ci.Last();
+            ci.Next();
+            // or DONE if the current position is off the end of the text.
+            assertEquals(CharacterIterator.DONE, ci.Current);
+        }
+
+        [Test]
+        public void TestNext()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText("te".toCharArray(), 0, 2);
+            // Increments the iterator's index by one and returns the character at the new index.
+            assertEquals('e', ci.Next());
+            assertEquals(1, ci.Index);
+            // or DONE if the new position is off the end of the text range.
+            assertEquals(CharacterIterator.DONE, ci.Next());
+            assertEquals(ci.EndIndex, ci.Index);
+        }
+
+        [Test]
+        public void TestSetIndex()
+        {
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText("test".toCharArray(), 0, "test".Length);
+            try
+            {
+                ci.SetIndex(5);
+                fail();
+            }
+            catch (Exception e)
+            {
+                assertTrue(e is ArgumentException);
+            }
+        }
+
+        [Test]
+        public void TestClone()
+        {
+            char[] text = "testing".toCharArray();
+            CharArrayIterator ci = new CharArrayIterator();
+            ci.SetText(text, 0, text.Length);
+            ci.Next();
+            CharArrayIterator ci2 = (CharArrayIterator)ci.Clone();
+            assertEquals(ci.Index, ci2.Index);
+            assertEquals(ci.Next(), ci2.Next());
+            assertEquals(ci.Last(), ci2.Last());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2Filter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2Filter.cs b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2Filter.cs
new file mode 100644
index 0000000..da7cf0f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2Filter.cs
@@ -0,0 +1,92 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Lucene.Net.Analysis.Core;
+//using Lucene.Net.Support;
+//using NUnit.Framework;
+//using System;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    /// <summary>
+//    /// Tests the ICUNormalizer2Filter
+//    /// </summary>
+//    public class TestICUNormalizer2Filter : BaseTokenStreamTestCase
+//    {
+//        private readonly Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+//        {
+//            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+//            return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
+//        });
+
+//        [Test]
+//        public void TestDefaults()
+//        {
+//            // case folding
+//            AssertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
+
+//            // case folding
+//            AssertAnalyzesTo(a, "Ruß", new String[] { "russ" });
+
+//            // case folding
+//            AssertAnalyzesTo(a, "ΜΆΪΟΣ", new String[] { "μάϊοσ" });
+//            AssertAnalyzesTo(a, "Μάϊος", new String[] { "μάϊοσ" });
+
+//            // supplementary case folding
+//            AssertAnalyzesTo(a, "𐐖", new String[] { "𐐾" });
+
+//            // normalization
+//            AssertAnalyzesTo(a, "ﴳﴺﰧ", new String[] { "طمطمطم" });
+
+//            // removal of default ignorables
+//            AssertAnalyzesTo(a, "क्‍ष", new String[] { "क्ष" });
+//        }
+
+//        [Test]
+//        public void TestAlternate()
+//        {
+//            //    Analyzer a = new Analyzer()
+//            //{
+//            //    @Override
+//            //      public TokenStreamComponents createComponents(String fieldName, Reader reader)
+//            //{
+//            //    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+//            //    return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
+//            //        tokenizer,
+//            //        /* specify nfc with decompose to get nfd */
+//            //        Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
+//            //}
+//            //    };
+
+//            Analyzer a = Analysis.Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+//            {
+//                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+//                return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
+//                    tokenizer,
+//                    /* specify nfc with decompose to get nfd */
+//                    //Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
+//                    new Normalizer2(global::Icu.Normalizer.UNormalizationMode.UNORM_NFD))); // LUCENENET NOTE: "nfc" + "DECOMPOSE" = "UNORM_NFD"
+//            });
+
+//            // decompose EAcute into E + combining Acute
+//            AssertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
+//        }
+
+//        /** blast some random strings through the analyzer */
+//        [Test]
+//        public void TestRandomStrings()
+//        {
+//            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+//        }
+
+//        [Test]
+//        public void TestEmptyTerm()
+//        {
+//            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+//            {
+//                Tokenizer tokenizer = new KeywordTokenizer(reader);
+//                return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
+//            });
+//            CheckOneTerm(a, "", "");
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2FilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2FilterFactory.cs b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2FilterFactory.cs
new file mode 100644
index 0000000..8ee65a1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Analysis/Icu/TestICUNormalizer2FilterFactory.cs
@@ -0,0 +1,45 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using NUnit.Framework;
+//using System;
+//using System.Collections.Generic;
+//using System.IO;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    /// <summary>
+//    /// basic tests for <see cref="ICUNormalizer2FilterFactory"/>
+//    /// </summary>
+//    public class TestICUNormalizer2FilterFactory : BaseTokenStreamTestCase
+//    {
+//        /** Test nfkc_cf defaults */
+//        [Test]
+//        public void TestDefaults()
+//        {
+//            TextReader reader = new StringReader("This is a Ｔｅｓｔ");
+//            ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(new Dictionary<String, String>());
+//            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+//            stream = factory.Create(stream);
+//            AssertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
+//        }
+
+//        /** Test that bogus arguments result in exception */
+//        [Test]
+//        public void TestBogusArguments()
+//        {
+//            try
+//            {
+//                new ICUNormalizer2FilterFactory(new Dictionary<String, String>() {
+//                    { "bogusArg", "bogusValue" }
+//                });
+//                fail();
+//            }
+//            catch (ArgumentException expected)
+//            {
+//                assertTrue(expected.Message.Contains("Unknown parameters"));
+//            }
+//        }
+
+//        // TODO: add tests for different forms
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationDocValuesField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationDocValuesField.cs b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationDocValuesField.cs
new file mode 100644
index 0000000..ecfbdf6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationDocValuesField.cs
@@ -0,0 +1,121 @@
+using Icu.Collation;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Collation
+{
+    /// <summary>
+    /// trivial test of ICUCollationDocValuesField
+    /// </summary>
+    [SuppressCodecs("Lucene3x")]
+    public class TestICUCollationDocValuesField : LuceneTestCase
+    {
+        [Test]
+        public void TestBasic()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            Field field = NewField("field", "", StringField.TYPE_STORED);
+            ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", Collator.Create(new CultureInfo("en")));
+            doc.Add(field);
+            doc.Add(collationField);
+
+            field.SetStringValue("ABC");
+            collationField.SetStringValue("ABC");
+            iw.AddDocument(doc);
+
+            field.SetStringValue("abc");
+            collationField.SetStringValue("abc");
+            iw.AddDocument(doc);
+
+            IndexReader ir = iw.Reader;
+            iw.Dispose();
+
+            IndexSearcher @is = NewSearcher(ir);
+
+            SortField sortField = new SortField("collated", SortFieldType.STRING);
+
+            TopDocs td = @is.Search(new MatchAllDocsQuery(), 5, new Sort(sortField));
+            assertEquals("abc", ir.Document(td.ScoreDocs[0].Doc).Get("field"));
+            assertEquals("ABC", ir.Document(td.ScoreDocs[1].Doc).Get("field"));
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestRanges()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            Field field = NewField("field", "", StringField.TYPE_STORED);
+            Collator collator = Collator.Create(CultureInfo.CurrentCulture, Collator.Fallback.FallbackAllowed); // uses -Dtests.locale
+            if (Random().nextBoolean())
+            {
+                collator.Strength = CollationStrength.Primary;
+            }
+            ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator);
+            doc.Add(field);
+            doc.Add(collationField);
+
+            int numDocs = AtLeast(500);
+            for (int i = 0; i < numDocs; i++)
+            {
+                String value = TestUtil.RandomSimpleString(Random());
+                field.SetStringValue(value);
+                collationField.SetStringValue(value);
+                iw.AddDocument(doc);
+            }
+
+            IndexReader ir = iw.Reader;
+            iw.Dispose();
+            IndexSearcher @is = NewSearcher(ir);
+
+            int numChecks = AtLeast(100);
+            for (int i = 0; i < numChecks; i++)
+            {
+                String start = TestUtil.RandomSimpleString(Random());
+                String end = TestUtil.RandomSimpleString(Random());
+                BytesRef lowerVal = new BytesRef(collator.GetSortKey(start).KeyData);
+                BytesRef upperVal = new BytesRef(collator.GetSortKey(end).KeyData);
+                Query query = new ConstantScoreQuery(FieldCacheRangeFilter.NewBytesRefRange("collated", lowerVal, upperVal, true, true));
+                DoTestRanges(@is, start, end, query, collator);
+            }
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        private void DoTestRanges(IndexSearcher @is, String startPoint, String endPoint, Query query, Collator collator)
+        {
+            QueryUtils.Check(query);
+
+            // positive test
+            TopDocs docs = @is.Search(query, @is.IndexReader.MaxDoc);
+            foreach (ScoreDoc doc in docs.ScoreDocs)
+            {
+                String value = @is.Doc(doc.Doc).Get("field");
+                assertTrue(collator.Compare(value, startPoint) >= 0);
+                assertTrue(collator.Compare(value, endPoint) <= 0);
+            }
+
+            // negative test
+            BooleanQuery bq = new BooleanQuery();
+            bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
+            bq.Add(query, Occur.MUST_NOT);
+            docs = @is.Search(bq, @is.IndexReader.MaxDoc);
+            foreach (ScoreDoc doc in docs.ScoreDocs)
+            {
+                String value = @is.Doc(doc.Doc).Get("field");
+                assertTrue(collator.Compare(value, startPoint) < 0 || collator.Compare(value, endPoint) > 0);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyAnalyzer.cs
new file mode 100644
index 0000000..55b0b3b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyAnalyzer.cs
@@ -0,0 +1,98 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.Globalization;
+
+namespace Lucene.Net.Collation
+{
+    [SuppressCodecs("Lucene3x")]
+    public class TestICUCollationKeyAnalyzer : CollationTestBase
+    {
+        private Collator collator = Collator.Create(new CultureInfo("fa"));
+        private Analyzer analyzer;
+
+        private BytesRef firstRangeBeginning;
+        private BytesRef firstRangeEnd;
+        private BytesRef secondRangeBeginning;
+        private BytesRef secondRangeEnd;
+
+        public override void SetUp()
+        {
+            base.SetUp();
+
+            this.analyzer = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
+            this.firstRangeBeginning = new BytesRef
+          (collator.GetSortKey(FirstRangeBeginningOriginal).KeyData);
+            this.firstRangeEnd = new BytesRef
+          (collator.GetSortKey(FirstRangeEndOriginal).KeyData);
+            this.secondRangeBeginning = new BytesRef
+          (collator.GetSortKey(SecondRangeBeginningOriginal).KeyData);
+            this.secondRangeEnd = new BytesRef
+          (collator.GetSortKey(SecondRangeEndOriginal).KeyData);
+        }
+
+        [Test]
+        public void TestFarsiRangeFilterCollating()
+        {
+            TestFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd,
+                                  secondRangeBeginning, secondRangeEnd);
+        }
+
+        [Test]
+        public void TestFarsiRangeQueryCollating()
+        {
+            TestFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd,
+                                 secondRangeBeginning, secondRangeEnd);
+        }
+
+        [Test]
+        public void TestFarsiTermRangeQuery()
+        {
+            TestFarsiTermRangeQuery
+                (analyzer, firstRangeBeginning, firstRangeEnd,
+                secondRangeBeginning, secondRangeEnd);
+        }
+
+        // Test using various international locales with accented characters (which
+        // sort differently depending on locale)
+        //
+        // Copied (and slightly modified) from 
+        // org.apache.lucene.search.TestSort.testInternationalSort()
+        //  
+        [Test]
+        public void TestCollationKeySort()
+        {
+            Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
+              (TEST_VERSION_CURRENT, Collator.Create(new CultureInfo("en-us"), Collator.Fallback.FallbackAllowed));
+
+            Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
+              (TEST_VERSION_CURRENT, Collator.Create(new CultureInfo("fr")));
+
+            Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
+              (TEST_VERSION_CURRENT, Collator.Create(new CultureInfo("sv-se"), Collator.Fallback.FallbackAllowed));
+
+            Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer
+              (TEST_VERSION_CURRENT, Collator.Create(new CultureInfo("da-dk"), Collator.Fallback.FallbackAllowed));
+
+            // The ICU Collator and java.text.Collator implementations differ in their
+            // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
+            TestCollationKeySort
+                (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
+                "BFJHD", "ECAGI", "BJDFH", "BJDHF");
+        }
+
+        [Test]
+        public void TestThreadSafe()
+        {
+            int iters = 20 * RANDOM_MULTIPLIER;
+            for (int i = 0; i < iters; i++)
+            {
+                CultureInfo locale = new CultureInfo("de");
+                Collator collator = Collator.Create(locale);
+                collator.Strength = CollationStrength.Identical;
+                AssertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilter.cs b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilter.cs
new file mode 100644
index 0000000..a8a8cba
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilter.cs
@@ -0,0 +1,101 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.IO;
+
+namespace Lucene.Net.Collation
+{
+    [Obsolete("remove this when ICUCollationKeyFilter is removed")]
+    public class TestICUCollationKeyFilter : CollationTestBase
+    {
+        private Collator collator = Collator.Create(new CultureInfo("fa"));
+        private Analyzer analyzer;
+
+        private BytesRef firstRangeBeginning;
+        private BytesRef firstRangeEnd;
+        private BytesRef secondRangeBeginning;
+        private BytesRef secondRangeEnd;
+
+
+        public override void SetUp()
+        {
+            base.SetUp();
+
+            this.analyzer = new TestAnalyzer(collator);
+            this.firstRangeBeginning = new BytesRef(EncodeCollationKey
+                (collator.GetSortKey(FirstRangeBeginningOriginal).KeyData));
+            this.firstRangeEnd = new BytesRef(EncodeCollationKey
+                (collator.GetSortKey(FirstRangeEndOriginal).KeyData));
+            this.secondRangeBeginning = new BytesRef(EncodeCollationKey
+                (collator.GetSortKey(SecondRangeBeginningOriginal).KeyData));
+            this.secondRangeEnd = new BytesRef(EncodeCollationKey
+                (collator.GetSortKey(SecondRangeEndOriginal).KeyData));
+        }
+
+        public sealed class TestAnalyzer : Analyzer
+        {
+            private Collator _collator;
+
+            internal TestAnalyzer(Collator collator)
+            {
+                _collator = collator;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer result = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
+            }
+        }
+
+        [Test]
+        public void TestFarsiRangeFilterCollating()
+        {
+            TestFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd,
+                                      secondRangeBeginning, secondRangeEnd);
+        }
+
+        [Test]
+        public void TestFarsiRangeQueryCollating()
+        {
+            TestFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd,
+                                     secondRangeBeginning, secondRangeEnd);
+        }
+
+        [Test]
+        public void TestFarsiTermRangeQuery()
+        {
+            TestFarsiTermRangeQuery
+                (analyzer, firstRangeBeginning, firstRangeEnd,
+                secondRangeBeginning, secondRangeEnd);
+        }
+
+        // Test using various international locales with accented characters (which
+        // sort differently depending on locale)
+        //
+        // Copied (and slightly modified) from 
+        // org.apache.lucene.search.TestSort.testInternationalSort()
+        //  
+        [Test]
+        public void TestCollationKeySort()
+        {
+            Analyzer usAnalyzer = new TestAnalyzer(Collator.Create(new CultureInfo("en-us"), Collator.Fallback.FallbackAllowed));
+            Analyzer franceAnalyzer
+              = new TestAnalyzer(Collator.Create(new CultureInfo("fr")));
+            Analyzer swedenAnalyzer
+              = new TestAnalyzer(Collator.Create(new CultureInfo("sv-se"), Collator.Fallback.FallbackAllowed));
+            Analyzer denmarkAnalyzer
+              = new TestAnalyzer(Collator.Create(new CultureInfo("da-dk"), Collator.Fallback.FallbackAllowed));
+
+            // The ICU Collator and java.text.Collator implementations differ in their
+            // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
+            TestCollationKeySort
+                (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
+                "BFJHD", "ECAGI", "BJDFH", "BJDHF");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilterFactory.cs
new file mode 100644
index 0000000..80aa910
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.ICU/Collation/TestICUCollationKeyFilterFactory.cs
@@ -0,0 +1,331 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+
+namespace Lucene.Net.Collation
+{
+    [Obsolete]
+    public class TestICUCollationKeyFilterFactory : BaseTokenStreamTestCase
+    {
+        /// <summary>
+        /// Turkish has some funny casing.
+        /// This test shows how you can solve this kind of thing easily with collation.
+        /// Instead of using LowerCaseFilter, use a turkish collator with primary strength.
+        /// Then things will sort and match correctly.
+        /// </summary>
+        [Test]
+        public void TestBasicUsage()
+        {
+            String turkishUpperCase = "I WİLL USE TURKİSH CASING";
+            String turkishLowerCase = "ı will use turkish casıng";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "tr",
+                "strength", "primary");
+            TokenStream tsUpper = factory.Create(
+                new KeywordTokenizer(new StringReader(turkishUpperCase)));
+            TokenStream tsLower = factory.Create(
+                new KeywordTokenizer(new StringReader(turkishLowerCase)));
+            assertCollatesToSame(tsUpper, tsLower);
+        }
+
+        /*
+         * Test usage of the decomposition option for unicode normalization.
+         */
+        [Test]
+        public void TestNormalization()
+        {
+            String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
+            String turkishLowerCase = "ı will use turkish casıng";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+            "locale", "tr",
+            "strength", "primary",
+            "decomposition", "canonical");
+            TokenStream tsUpper = factory.Create(
+            new KeywordTokenizer(new StringReader(turkishUpperCase)));
+            TokenStream tsLower = factory.Create(
+                new KeywordTokenizer(new StringReader(turkishLowerCase)));
+            assertCollatesToSame(tsUpper, tsLower);
+        }
+
+        /*
+         * Test secondary strength, for english case is not significant.
+         */
+        [Test]
+        public void TestSecondaryStrength()
+        {
+            String upperCase = "TESTING";
+            String lowerCase = "testing";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "en",
+                "strength", "secondary",
+                "decomposition", "no");
+            TokenStream tsUpper = factory.Create(
+                new KeywordTokenizer(new StringReader(upperCase)));
+            TokenStream tsLower = factory.Create(
+                new KeywordTokenizer(new StringReader(lowerCase)));
+            assertCollatesToSame(tsUpper, tsLower);
+        }
+
+        /*
+         * Setting alternate=shifted to shift whitespace, punctuation and symbols
+         * to quaternary level 
+         */
+        [Test]
+        public void TestIgnorePunctuation()
+        {
+            String withPunctuation = "foo-bar";
+            String withoutPunctuation = "foo bar";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "en",
+                "strength", "primary",
+                "alternate", "shifted");
+            TokenStream tsPunctuation = factory.Create(
+                new KeywordTokenizer(new StringReader(withPunctuation)));
+            TokenStream tsWithoutPunctuation = factory.Create(
+                new KeywordTokenizer(new StringReader(withoutPunctuation)));
+            assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
+        }
+
+        // LUCENENET TODO: variableTop is not supported by icu.net. Besides this,
+        // it is deprecated as of ICU 53 and has been superceded by maxVariable,
+        // but that feature is also not supported by icu.net at the time of this writing.
+
+        ///*
+        // * Setting alternate=shifted and variableTop to shift whitespace, but not 
+        // * punctuation or symbols, to quaternary level 
+        // */
+        //[Test]
+        //public void TestIgnoreWhitespace()
+        //{
+        //    String withSpace = "foo bar";
+        //    String withoutSpace = "foobar";
+        //    String withPunctuation = "foo-bar";
+        //    TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+        //        "locale", "en",
+        //        "strength", "primary",
+        //        "alternate", "shifted",
+        //        "variableTop", " ");
+        //    TokenStream tsWithSpace = factory.Create(
+        //        new KeywordTokenizer(new StringReader(withSpace)));
+        //    TokenStream tsWithoutSpace = factory.Create(
+        //        new KeywordTokenizer(new StringReader(withoutSpace)));
+        //    assertCollatesToSame(tsWithSpace, tsWithoutSpace);
+        //    // now assert that punctuation still matters: foo-bar < foo bar
+        //    tsWithSpace = factory.Create(
+        //            new KeywordTokenizer(new StringReader(withSpace)));
+        //    TokenStream tsWithPunctuation = factory.Create(
+        //        new KeywordTokenizer(new StringReader(withPunctuation)));
+        //    assertCollation(tsWithPunctuation, tsWithSpace, -1);
+        //}
+
+        /*
+         * Setting numeric to encode digits with numeric value, so that
+         * foobar-9 sorts before foobar-10
+         */
+        [Test]
+        public void TestNumerics()
+        {
+            String nine = "foobar-9";
+            String ten = "foobar-10";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "en",
+                "numeric", "true");
+            TokenStream tsNine = factory.Create(
+                new KeywordTokenizer(new StringReader(nine)));
+            TokenStream tsTen = factory.Create(
+                new KeywordTokenizer(new StringReader(ten)));
+            assertCollation(tsNine, tsTen, -1);
+        }
+
+        /*
+         * Setting caseLevel=true to create an additional case level between
+         * secondary and tertiary
+         */
+        [Test]
+        public void TestIgnoreAccentsButNotCase()
+        {
+            String withAccents = "résumé";
+            String withoutAccents = "resume";
+            String withAccentsUpperCase = "Résumé";
+            String withoutAccentsUpperCase = "Resume";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "en",
+                "strength", "primary",
+                "caseLevel", "true");
+            TokenStream tsWithAccents = factory.Create(
+                new KeywordTokenizer(new StringReader(withAccents)));
+            TokenStream tsWithoutAccents = factory.Create(
+                new KeywordTokenizer(new StringReader(withoutAccents)));
+            assertCollatesToSame(tsWithAccents, tsWithoutAccents);
+
+            TokenStream tsWithAccentsUpperCase = factory.Create(
+                new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
+            TokenStream tsWithoutAccentsUpperCase = factory.Create(
+                new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
+            assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
+
+            // now assert that case still matters: resume < Resume
+            TokenStream tsLower = factory.Create(
+                new KeywordTokenizer(new StringReader(withoutAccents)));
+            TokenStream tsUpper = factory.Create(
+                new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
+            assertCollation(tsLower, tsUpper, -1);
+        }
+
+        /*
+         * Setting caseFirst=upper to cause uppercase strings to sort
+         * before lowercase ones.
+         */
+        [Test]
+        public void TestUpperCaseFirst()
+        {
+            String lower = "resume";
+            String upper = "Resume";
+            TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+                "locale", "en",
+                "strength", "tertiary",
+                "caseFirst", "upper");
+            TokenStream tsLower = factory.Create(
+                new KeywordTokenizer(new StringReader(lower)));
+            TokenStream tsUpper = factory.Create(
+                new KeywordTokenizer(new StringReader(upper)));
+            assertCollation(tsUpper, tsLower, -1);
+        }
+
+        /*
+         * For german, you might want oe to sort and match with o umlaut.
+         * This is not the default, but you can make a customized ruleset to do this.
+         *
+         * The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
+         *  http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
+         */
+        [Test]
+        public void TestCustomRules()
+        {
+            String DIN5007_2_tailorings =
+              "& ae , a\u0308 & AE , A\u0308" +
+              "& oe , o\u0308 & OE , O\u0308" +
+              "& ue , u\u0308 & UE , u\u0308";
+
+            string baseRules = RuleBasedCollator.GetCollationRules(new Icu.Locale("de-DE"), UColRuleOption.UCOL_TAILORING_ONLY);
+            //RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseRules + DIN5007_2_tailorings);
+
+            string tailoredRules = baseRules + DIN5007_2_tailorings;
+            //
+            // at this point, you would save these tailoredRules to a file, 
+            // and use the custom parameter.
+            //
+            String germanUmlaut = "Töne";
+            String germanOE = "Toene";
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("custom", "rules.txt");
+            args.Put("strength", "primary");
+            ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory(args);
+            factory.Inform(new StringMockResourceLoader(tailoredRules));
+            TokenStream tsUmlaut = factory.Create(
+                new KeywordTokenizer(new StringReader(germanUmlaut)));
+            TokenStream tsOE = factory.Create(
+                new KeywordTokenizer(new StringReader(germanOE)));
+
+            assertCollatesToSame(tsUmlaut, tsOE);
+        }
+
+        private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
+        {
+            assertCollation(stream1, stream2, 0);
+        }
+
+        private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison)
+        {
+            ICharTermAttribute term1 = stream1
+                .AddAttribute<ICharTermAttribute>();
+            ICharTermAttribute term2 = stream2
+                .AddAttribute<ICharTermAttribute>();
+            stream1.Reset();
+            stream2.Reset();
+            assertTrue(stream1.IncrementToken());
+            assertTrue(stream2.IncrementToken());
+            assertEquals(Number.Signum(comparison), Number.Signum(term1.toString().CompareToOrdinal(term2.toString())));
+            assertFalse(stream1.IncrementToken());
+            assertFalse(stream2.IncrementToken());
+            stream1.End();
+            stream2.End();
+            stream1.Dispose();
+            stream2.Dispose();
+        }
+
+        private class StringMockResourceLoader : IResourceLoader
+        {
+            String text;
+
+            internal StringMockResourceLoader(String text)
+            {
+                this.text = text;
+            }
+
+            public T NewInstance<T>(String cname)
+            {
+                return default(T);
+            }
+
+            public Type FindType(String cname)
+            {
+                return null;
+            }
+
+            public Stream OpenResource(String resource)
+            {
+                return new MemoryStream(Encoding.UTF8.GetBytes(text));
+            }
+        }
+
+        private TokenFilterFactory tokenFilterFactory(String name, params String[] keysAndValues)
+        {
+            Type clazz = TokenFilterFactory.LookupClass(name);
+            if (keysAndValues.Length % 2 == 1)
+            {
+                throw new ArgumentException("invalid keysAndValues map");
+            }
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            for (int i = 0; i < keysAndValues.Length; i += 2)
+            {
+                String prev = args.Put(keysAndValues[i], keysAndValues[i + 1]);
+                assertNull("duplicate values for key: " + keysAndValues[i], prev);
+            }
+            String previous = args.Put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+            assertNull("duplicate values for key: luceneMatchVersion", previous);
+            TokenFilterFactory factory = null;
+            try
+            {
+                //factory = clazz.getConstructor(Map.class).newInstance(args);
+                factory = (TokenFilterFactory)Activator.CreateInstance(clazz, args);
+            }
+            catch (TargetInvocationException e)
+            {
+                // to simplify tests that check for illegal parameters
+                if (e.InnerException is ArgumentException)
+                {
+                    throw (ArgumentException)e.InnerException;
+                }
+                else
+                {
+                    throw e;
+                }
+            }
+            if (factory is IResourceLoaderAware)
+            {
+                ((IResourceLoaderAware)factory).Inform(new ClasspathResourceLoader(GetType()));
+            }
+            return factory;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.ICU/Lucene.Net.Tests.ICU.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.ICU/Lucene.Net.Tests.ICU.csproj b/src/Lucene.Net.Tests.ICU/Lucene.Net.Tests.ICU.csproj
index 84d660a..8f5e312 100644
--- a/src/Lucene.Net.Tests.ICU/Lucene.Net.Tests.ICU.csproj
+++ b/src/Lucene.Net.Tests.ICU/Lucene.Net.Tests.ICU.csproj
@@ -74,6 +74,18 @@
     <Compile Include="..\Lucene.Net.Tests.Analysis.Common\Analysis\Util\TestSegmentingTokenizerBase.cs">
       <Link>Analysis\Util\TestSegmentingTokenizerBase.cs</Link>
     </Compile>
+    <Compile Include="..\Lucene.Net.Tests.Analysis.ICU\Collation\TestICUCollationDocValuesField.cs">
+      <Link>Collation\TestICUCollationDocValuesField.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Tests.Analysis.ICU\Collation\TestICUCollationKeyAnalyzer.cs">
+      <Link>Collation\TestICUCollationKeyAnalyzer.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Tests.Analysis.ICU\Collation\TestICUCollationKeyFilter.cs">
+      <Link>Collation\TestICUCollationKeyFilter.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Tests.Analysis.ICU\Collation\TestICUCollationKeyFilterFactory.cs">
+      <Link>Collation\TestICUCollationKeyFilterFactory.cs</Link>
+    </Compile>
     <Compile Include="..\Lucene.Net.Tests.Highlighter\PostingsHighlight\TestMultiTermHighlighting.cs">
       <Link>Search\PostingsHighlight\TestMultiTermHighlighting.cs</Link>
     </Compile>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net.Tests.ICU/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.ICU/project.json b/src/Lucene.Net.Tests.ICU/project.json
index 4dc5c7a..1c3c0cd 100644
--- a/src/Lucene.Net.Tests.ICU/project.json
+++ b/src/Lucene.Net.Tests.ICU/project.json
@@ -26,6 +26,7 @@
   },
   "dependencies": {
     "dotnet-test-nunit-teamcity": "3.4.0-beta-3",
+    "icu.net": "54.1.1-alpha",
     "Lucene.Net": "4.8.0",
     "Lucene.Net.Analysis.Common": "4.8.0",
     "Lucene.Net.Highlighter": "4.8.0",

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a4989ea1/src/Lucene.Net/Support/StringBuilderExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net/Support/StringBuilderExtensions.cs b/src/Lucene.Net/Support/StringBuilderExtensions.cs
index 5200e02..ae68811 100644
--- a/src/Lucene.Net/Support/StringBuilderExtensions.cs
+++ b/src/Lucene.Net/Support/StringBuilderExtensions.cs
@@ -110,6 +110,29 @@ namespace Lucene.Net.Support
         }
 
         /// <summary>
+        /// Returns the character (Unicode code point) at the specified index. 
+        /// The index refers to char values (Unicode code units) and ranges from 0 to Length - 1.
+        /// <para/>
+        /// If the char value specified at the given index is in the high-surrogate range, 
+        /// the following index is less than the length of this sequence, and the char value 
+        /// at the following index is in the low-surrogate range, then the 
+        /// supplementary code point corresponding to this surrogate pair is returned. 
+        /// Otherwise, the char value at the given index is returned.
+        /// </summary>
+        /// <param name="text">this <see cref="StringBuilder"/></param>
+        /// <param name="index">the index to the char values</param>
+        /// <returns>the code point value of the character at the index</returns>
+        /// <exception cref="IndexOutOfRangeException">if the index argument is negative or not less than the length of this sequence.</exception>
+        public static int CodePointAt(this StringBuilder text, int index)
+        {
+            if ((index < 0) || (index >= text.Length))
+            {
+                throw new IndexOutOfRangeException();
+            }
+            return Character.CodePointAt(text.ToString(), index);
+        }
+
+        /// <summary>
         /// Copies the array from the <see cref="StringBuilder"/> into a new array
         /// and returns it.
         /// </summary>

[4/4] lucenenet git commit: Updated CONTRIBUTING.md

Posted by ni...@apache.org.

Updated CONTRIBUTING.md


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/db1f605c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/db1f605c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/db1f605c

Branch: refs/heads/master
Commit: db1f605cd1d1c488ee11a2447f704f61542ba477
Parents: a4989ea
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Jul 25 15:30:41 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Jul 25 15:30:41 2017 +0700

----------------------------------------------------------------------
 CONTRIBUTING.md | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/db1f605c/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fa2942c..bc0c07b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -38,7 +38,6 @@ Note that even though we are currently a port of Lucene 4.8.0, we recommend port
 
 ### Pending being ported from scratch (code + tests) plus have additional dependencies that either need to be sourced from the .NET ecosystem or ported.
 
-* [Lucene.Net.Benchmark](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/benchmark) - See [JIRA issue 564](https://issues.apache.org/jira/browse/LUCENENET-564)
 * [Lucene.Net.Analysis.Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/analysis/morfologik) - See [JIRA issue 568](https://issues.apache.org/jira/browse/LUCENENET-568)
 * [Lucene.Net.Analysis.UIMA](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.1/lucene/analysis/uima) - See [JIRA issue 570](https://issues.apache.org/jira/browse/LUCENENET-570)