You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/05/05 20:56:01 UTC
[6/9] lucenenet git commit: Renamed Lucene.Net.Icu > Lucene.Net.ICU,
Lucene.Net.Tests.Icu > Lucene.Net.Tests.ICU
Renamed Lucene.Net.Icu > Lucene.Net.ICU, Lucene.Net.Tests.Icu > Lucene.Net.Tests.ICU
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3c077fb1
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3c077fb1
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3c077fb1
Branch: refs/heads/master
Commit: 3c077fb1b44503c778d9385f3419dae7c02a99f2
Parents: b1a701c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat May 6 03:12:29 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat May 6 03:12:29 2017 +0700
----------------------------------------------------------------------
CONTRIBUTING.md | 2 +-
Lucene.Net.Portable.sln | 4 +-
Lucene.Net.sln | 22 +-
.../Properties/AssemblyInfo.cs | 4 +-
src/Lucene.Net.ICU/Analysis/Th/stopwords.txt | 119 ++++++
src/Lucene.Net.ICU/Lucene.Net.ICU.csproj | 144 +++++++
src/Lucene.Net.ICU/Lucene.Net.ICU.project.json | 11 +
src/Lucene.Net.ICU/Lucene.Net.ICU.xproj | 39 ++
src/Lucene.Net.ICU/Properties/AssemblyInfo.cs | 52 +++
src/Lucene.Net.ICU/Support/BreakIterator.cs | 248 +++++++++++
src/Lucene.Net.ICU/Support/CharacterIterator.cs | 50 +++
src/Lucene.Net.ICU/Support/IcuBreakIterator.cs | 394 +++++++++++++++++
.../Support/StringCharacterIterator.cs | 204 +++++++++
src/Lucene.Net.ICU/project.json | 64 +++
src/Lucene.Net.Icu/Analysis/Th/stopwords.txt | 119 ------
src/Lucene.Net.Icu/Lucene.Net.Icu.csproj | 145 -------
src/Lucene.Net.Icu/Lucene.Net.Icu.project.json | 11 -
src/Lucene.Net.Icu/Lucene.Net.Icu.xproj | 39 --
src/Lucene.Net.Icu/Properties/AssemblyInfo.cs | 52 ---
src/Lucene.Net.Icu/Support/BreakIterator.cs | 248 -----------
src/Lucene.Net.Icu/Support/CharacterIterator.cs | 50 ---
src/Lucene.Net.Icu/Support/IcuBreakIterator.cs | 394 -----------------
.../Support/StringCharacterIterator.cs | 204 ---------
src/Lucene.Net.Icu/project.json | 64 ---
.../Lucene.Net.Tests.ICU.csproj | 141 +++++++
.../Lucene.Net.Tests.ICU.project.json | 12 +
.../Lucene.Net.Tests.ICU.xproj | 42 ++
.../Properties/AssemblyInfo.cs | 42 ++
.../Search/PostingsHighlight/CambridgeMA.utf8 | 1 +
.../Support/TestApiConsistency.cs | 147 +++++++
.../Support/TestExceptionSerialization.cs | 54 +++
.../Support/TestIcuBreakIterator.cs | 420 +++++++++++++++++++
src/Lucene.Net.Tests.ICU/project.json | 83 ++++
.../Lucene.Net.Tests.Icu.csproj | 142 -------
.../Lucene.Net.Tests.Icu.project.json | 12 -
.../Lucene.Net.Tests.Icu.xproj | 42 --
.../Properties/AssemblyInfo.cs | 42 --
.../Search/PostingsHighlight/CambridgeMA.utf8 | 1 -
.../Support/TestApiConsistency.cs | 147 -------
.../Support/TestExceptionSerialization.cs | 54 ---
.../Support/TestIcuBreakIterator.cs | 420 -------------------
src/Lucene.Net.Tests.Icu/project.json | 83 ----
src/Lucene.Net/Properties/AssemblyInfo.cs | 4 +-
43 files changed, 2294 insertions(+), 2278 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index be00288..4c3522b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -66,7 +66,7 @@ See [Documenting Lucene.Net](https://cwiki.apache.org/confluence/display/LUCENEN
* [Lucene.Net.Demo](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/demo) (might be a good learning experience)
* [Lucene.Net.Replicator](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/replicator)
-* [Lucene.Net.Analysis.Icu](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu)
+* [Lucene.Net.Analysis.ICU](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu) (note that we will be putting this functionality into the Lucene.Net.ICU package)
* [Lucene.Net.Analysis.Kuromoji](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/kuromoji)
* [Lucene.Net.Analysis.SmartCn](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/smartcn)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index d6affa8..7e71238 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -98,9 +98,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{EFA10A77
Version.proj = Version.proj
EndProjectSection
EndProject
-Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.xproj", "{44A5341B-0F52-429D-977A-C35E10ECCADF}"
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.ICU", "src\Lucene.Net.ICU\Lucene.Net.ICU.xproj", "{44A5341B-0F52-429D-977A-C35E10ECCADF}"
EndProject
-Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.xproj", "{32FD3471-E862-4055-B969-79C12A656366}"
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.ICU", "src\Lucene.Net.Tests.ICU\Lucene.Net.Tests.ICU.xproj", "{32FD3471-E862-4055-B969-79C12A656366}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index e6940c9..b1d2752 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -1,6 +1,24 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "src\Lucene.Net\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}"
@@ -89,9 +107,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{9811D53E
Version.proj = Version.proj
EndProjectSection
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.csproj", "{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.ICU", "src\Lucene.Net.ICU\Lucene.Net.ICU.csproj", "{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.csproj", "{D5AA1A22-1B28-4DF6-BFDA-02519A189839}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.ICU", "src\Lucene.Net.Tests.ICU\Lucene.Net.Tests.ICU.csproj", "{D5AA1A22-1B28-4DF6-BFDA-02519A189839}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
index 4da6116..8b18901 100644
--- a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
+++ b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
@@ -45,9 +45,9 @@ using System.Runtime.InteropServices;
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("e9e769ea-8504-44bc-8dc9-ccf958765f8f")]
-[assembly: InternalsVisibleTo("Lucene.Net.Icu")]
+[assembly: InternalsVisibleTo("Lucene.Net.ICU")]
// for testing
[assembly: InternalsVisibleTo("Lucene.Net.Tests.Highlighter")]
-[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.ICU")]
// NOTE: Version information is in CommonAssemblyInfo.cs
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt b/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt
new file mode 100644
index 0000000..07f0fab
--- /dev/null
+++ b/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
new file mode 100644
index 0000000..b1510b9
--- /dev/null
+++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj
@@ -0,0 +1,144 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net</RootNamespace>
+ <AssemblyName>Lucene.Net.ICU</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs">
+ <Link>Analysis\Th\ThaiAnalyzer.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs">
+ <Link>Analysis\Th\ThaiTokenizer.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs">
+ <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs">
+ <Link>Analysis\Th\ThaiWordFilter.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs">
+ <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs">
+ <Link>Analysis\Util\CharArrayIterator.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs">
+ <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs">
+ <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs">
+ <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs">
+ <Link>Search\PostingsHighlight\Passage.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs">
+ <Link>Search\PostingsHighlight\PassageFormatter.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs">
+ <Link>Search\PostingsHighlight\PassageScorer.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs">
+ <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs">
+ <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link>
+ </Compile>
+ <Compile Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs">
+ <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link>
+ </Compile>
+ <Compile Include="Support\BreakIterator.cs" />
+ <Compile Include="Support\CharacterIterator.cs" />
+ <Compile Include="Support\IcuBreakIterator.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="..\CommonAssemblyInfo.cs">
+ <Link>Properties\CommonAssemblyInfo.cs</Link>
+ </Compile>
+ <Compile Include="Support\StringCharacterIterator.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+ <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+ <Name>Lucene.Net.Analysis.Common</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj">
+ <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project>
+ <Name>Lucene.Net.Highlighter</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="Lucene.Net.Icu.project.json" />
+ </ItemGroup>
+ <ItemGroup>
+ <EmbeddedResource Include="Analysis\Th\stopwords.txt" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json b/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json
new file mode 100644
index 0000000..af28fc8
--- /dev/null
+++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json
@@ -0,0 +1,11 @@
+{
+ "runtimes": {
+ "win": {}
+ },
+ "dependencies": {
+ "icu.net": "54.1.1-alpha"
+ },
+ "frameworks": {
+ "net451": {}
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj b/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj
new file mode 100644
index 0000000..dbc1701
--- /dev/null
+++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup>
+ <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
+ <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+ </PropertyGroup>
+ <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid>
+ <RootNamespace>Lucene.Net</RootNamespace>
+ <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+ <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ </PropertyGroup>
+ <PropertyGroup>
+ <SchemaVersion>2.0</SchemaVersion>
+ </PropertyGroup>
+ <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs b/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..4d88887
--- /dev/null
+++ b/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs
@@ -0,0 +1,52 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.ICU")]
+[assembly: AssemblyDescription(
+ "International Components for Unicode-based features including Thai analyzer support, " +
+ "an international postings highlighter, and BreakIterator support for the vector highlighter in Lucene.Net.Highlighter " +
+ "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.ICU")]
+[assembly: AssemblyCulture("")]
+
+[assembly: CLSCompliant(true)]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.ICU")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/BreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Support/BreakIterator.cs b/src/Lucene.Net.ICU/Support/BreakIterator.cs
new file mode 100644
index 0000000..df4a945
--- /dev/null
+++ b/src/Lucene.Net.ICU/Support/BreakIterator.cs
@@ -0,0 +1,248 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// The <code>BreakIterator</code> class implements methods for finding
+ /// the location of boundaries in text. Instances of <code>BreakIterator</code>
+ /// maintain a current position and scan over text
+ /// returning the index of characters where boundaries occur.
+ /// </summary>
+ public abstract class BreakIterator
+#if FEATURE_CLONEABLE
+ : ICloneable
+#endif
+ {
+ /// <summary>
+ /// Constructor. BreakIterator is stateless and has no default behavior.
+ /// </summary>
+ protected BreakIterator()
+ {
+ }
+
+ /// <summary>
+ /// Create a copy of this iterator
+ /// </summary>
+ /// <returns>A member-wise copy of this</returns>
+ public object Clone()
+ {
+ return MemberwiseClone();
+ }
+
+ /// <summary>
+ /// DONE is returned by Previous(), Next(), Next(int), Preceding(int)
+ /// and Following(int) when either the first or last text boundary has been
+ /// reached.
+ /// </summary>
+ public static readonly int DONE = -1;
+
+ /// <summary>
+ /// Returns the first boundary. The iterator's current position is set
+ /// to the first text boundary.
+ /// </summary>
+ /// <returns>The character index of the first text boundary</returns>
+ public abstract int First();
+
+ /// <summary>
+ /// Returns the last boundary. The iterator's current position is set
+ /// to the last text boundary.
+ /// </summary>
+ /// <returns>The character index of the last text boundary.</returns>
+ public abstract int Last();
+
+ /// <summary>
+ /// Returns the nth boundary from the current boundary. If either
+ /// the first or last text boundary has been reached, it returns
+ /// <see cref="BreakIterator.DONE"/> and the current position is set to either
+ /// the first or last text boundary depending on which one is reached. Otherwise,
+ /// the iterator's current position is set to the new boundary.
+ /// For example, if the iterator's current position is the mth text boundary
+ /// and three more boundaries exist from the current boundary to the last text
+ /// boundary, the Next(2) call will return m + 2. The new text position is set
+ /// to the (m + 2)th text boundary. A Next(4) call would return
+ /// <see cref="BreakIterator.DONE"/> and the last text boundary would become the
+ /// new text position.
+ /// </summary>
+ /// <param name="n">
+ /// which boundary to return. A value of 0
+ /// does nothing. Negative values move to previous boundaries
+ /// and positive values move to later boundaries.
+ /// </param>
+ /// <returns>
+ /// The character index of the nth boundary from the current position
+ /// or <see cref="BreakIterator.DONE"/> if either first or last text boundary
+ /// has been reached.
+ /// </returns>
+ public abstract int Next(int n);
+
+ /// <summary>
+ /// Returns the boundary following the current boundary. If the current boundary
+ /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and
+ /// the iterator's current position is unchanged. Otherwise, the iterator's
+ /// current position is set to the boundary following the current boundary.
+ /// </summary>
+ /// <returns>
+ /// The character index of the next text boundary or
+ /// <see cref="BreakIterator.DONE"/> if the current boundary is the last text
+ /// boundary.
+ /// Equivalent to Next(1).
+ /// </returns>
+ /// <seealso cref="Next(int)"/>
+ public abstract int Next();
+
+ /// <summary>
+ /// Returns the boundary preceding the current boundary. If the current boundary
+ /// is the first text boundary, it returns <code>BreakIterator.DONE</code> and
+ /// the iterator's current position is unchanged. Otherwise, the iterator's
+ /// current position is set to the boundary preceding the current boundary.
+ /// </summary>
+ /// <returns>
+ /// The character index of the previous text boundary or
+ /// <see cref="BreakIterator.DONE"/> if the current boundary is the first text
+ /// boundary.
+ /// </returns>
+ public abstract int Previous();
+
+ /// <summary>
+ /// Returns the first boundary following the specified character offset. If the
+ /// specified offset equals to the last text boundary, it returns
+ /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
+ /// Otherwise, the iterator's current position is set to the returned boundary.
+ /// The value returned is always greater than the offset or the value
+ /// <see cref="BreakIterator.DONE"/>.
+ /// </summary>
+ /// <param name="offset">the character offset to begin scanning.</param>
+ /// <returns>
+ /// The first boundary after the specified offset or
+ /// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in
+ /// as the offset.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// if the specified offset is less than
+ /// the first text boundary or greater than the last text boundary.
+ /// </exception>
+ public abstract int Following(int offset);
+
+ /// <summary>
+ /// Returns the last boundary preceding the specified character offset. If the
+ /// specified offset equals to the first text boundary, it returns
+ /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
+ /// Otherwise, the iterator's current position is set to the returned boundary.
+ /// The value returned is always less than the offset or the value
+ /// <see cref="BreakIterator.DONE"/>.
+ /// </summary>
+ /// <param name="offset">the character offset to begin scanning.</param>
+ /// <returns>
+ /// The last boundary before the specified offset or
+ /// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in
+ /// as the offset.
+ /// </returns>
+ public abstract int Preceding(int offset);
+ //{
+ // // NOTE: This implementation is here solely because we can't add new
+ // // abstract methods to an existing class. There is almost ALWAYS a
+ // // better, faster way to do this.
+ // int pos = Following(offset);
+ // while (pos >= offset && pos != DONE)
+ // {
+ // pos = Previous();
+ // }
+ // return pos;
+ //}
+
+ /// <summary>
+ /// Returns true if the specified character offset is a text boundary.
+ /// </summary>
+ /// <param name="offset">the character offset to check.</param>
+ /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+ /// <exception cref="ArgumentException">
+ /// if the specified offset is less than
+ /// the first text boundary or greater than the last text boundary.
+ /// </exception>
+ public abstract bool IsBoundary(int offset);
+ //{
+ // // NOTE: This implementation probably is wrong for most situations
+ // // because it fails to take into account the possibility that a
+ // // CharacterIterator passed to setText() may not have a begin offset
+ // // of 0. But since the abstract BreakIterator doesn't have that
+ // // knowledge, it assumes the begin offset is 0. If you subclass
+ // // BreakIterator, copy the SimpleTextBoundary implementation of this
+ // // function into your subclass. [This should have been abstract at
+ // // this level, but it's too late to fix that now.]
+ // if (offset == 0)
+ // {
+ // return true;
+ // }
+ // int boundary = Following(offset - 1);
+ // if (boundary == DONE)
+ // {
+ // throw new ArgumentException();
+ // }
+ // return boundary == offset;
+ //}
+
+ /// <summary>
+ /// Returns character index of the text boundary that was most
+ /// recently returned by Next(), Next(int), Previous(), First(), Last(),
+ /// Following(int) or Preceding(int). If any of these methods returns
+ /// <see cref="BreakIterator.DONE"/> because either first or last text boundary
+ /// has been reached, it returns the first or last text boundary depending on
+ /// which one is reached.
+ /// </summary>
+ /// <returns>
+ /// The text boundary returned from the above methods, first or last
+ /// text boundary.
+ /// </returns>
+ /// <seealso cref="Next()"/>
+ /// <seealso cref="Next(int)"/>
+ /// <seealso cref="Previous()"/>
+ /// <seealso cref="First()"/>
+ /// <seealso cref="Last()"/>
+ /// <seealso cref="Following(int)"/>
+ /// <seealso cref="Preceding(int)"/>
+ public abstract int Current { get; }
+
+ /// <summary>
+ /// Get the text being scanned
+ /// </summary>
+ /// <returns>the text being scanned</returns>
+ //public abstract CharacterIterator GetText();
+ public abstract string Text { get; }
+
+ /// <summary>
+ /// Set a new text string to be scanned. The current scan
+ /// position is reset to First().
+ /// </summary>
+ /// <param name="newText">new text to scan.</param>
+ public virtual void SetText(string newText)
+ {
+ SetText(new StringCharacterIterator(newText));
+ }
+
+ /// <summary>
+ /// Set a new text string to be scanned. The current scan
+ /// position is reset to First().
+ /// </summary>
+ /// <param name="newText">new text to scan.</param>
+ public abstract void SetText(CharacterIterator newText);
+ }
+}
+#endif
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/CharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Support/CharacterIterator.cs b/src/Lucene.Net.ICU/Support/CharacterIterator.cs
new file mode 100644
index 0000000..0c81629
--- /dev/null
+++ b/src/Lucene.Net.ICU/Support/CharacterIterator.cs
@@ -0,0 +1,50 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public abstract class CharacterIterator
+ {
+ public static readonly char DONE = '\uFFFF';
+
+ public abstract char Current { get; }
+
+ public abstract char First();
+
+ public abstract char Last();
+
+ public abstract char Next();
+
+ public abstract char Previous();
+
+ public abstract char SetIndex(int position);
+
+ public abstract int BeginIndex { get; }
+
+ public abstract int EndIndex { get; }
+
+ public abstract int Index { get; }
+
+ public abstract object Clone();
+
+ public abstract string GetTextAsString();
+ }
+}
+#endif
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs b/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs
new file mode 100644
index 0000000..79819ed
--- /dev/null
+++ b/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs
@@ -0,0 +1,394 @@
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
+ /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
+ /// provides methods to move forward, reverse, and randomly through a set of text breaks
+ /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
+ /// </summary>
+ // LUCENENET specific type
+ public class IcuBreakIterator : BreakIterator
+ {
+ private readonly Icu.Locale locale;
+ private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+ private List<int> boundaries = new List<int>();
+ private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
+ private string text;
+
+ /// <summary>
+ /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_start;
+
+ /// <summary>
+ /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_end;
+
+ private bool enableHacks = false;
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+ : this(type, CultureInfo.CurrentCulture)
+ {
+ }
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
+ {
+ if (locale == null)
+ throw new ArgumentNullException("locale");
+ this.locale = new Icu.Locale(locale.Name);
+ this.type = type;
+ }
+
+
+ public virtual bool EnableHacks
+ {
+ get { return enableHacks; }
+ set { enableHacks = value; }
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <returns>The offset of the beginning of the text.</returns>
+ public override int First()
+ {
+ currentBoundaryIndex = 0;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the end of the text.
+ /// </summary>
+ /// <returns>The text's past-the-end offset.</returns>
+ public override int Last()
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator either forward or backward the specified number of steps.
+ /// Negative values move backward, and positive values move forward. This is
+ /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
+ /// </summary>
+ /// <param name="n">The number of steps to move. The sign indicates the direction
+ /// (negative is backwards, and positive is forwards).</param>
+ /// <returns>The character offset of the boundary position n boundaries away from
+ /// the current one.</returns>
+ public override int Next(int n)
+ {
+ int result = Current;
+ while (n > 0)
+ {
+ result = Next();
+ --n;
+ }
+ while (n < 0)
+ {
+ result = Previous();
+ ++n;
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Advances the iterator to the next boundary position.
+ /// </summary>
+ /// <returns>The position of the first boundary after this one.</returns>
+ public override int Next()
+ {
+ if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex++;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator backwards, to the last boundary preceding this one.
+ /// </summary>
+ /// <returns>The position of the last boundary position preceding this one.</returns>
+ public override int Previous()
+ {
+ if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex--;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Throw <see cref="ArgumentException"/> unless begin <= offset < end.
+ /// </summary>
+ /// <param name="offset"></param>
+ private void CheckOffset(int offset)
+ {
+ if (offset < m_start || offset > m_end)
+ {
+ throw new ArgumentException("offset out of bounds");
+ }
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the first boundary position following
+ /// the specified position.
+ /// </summary>
+ /// <param name="offset">The position from which to begin searching for a break position.</param>
+ /// <returns>The position of the first break after the current position.</returns>
+ public override int Following(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int following = GetLowestIndexGreaterThan(offset);
+ if (following == -1)
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = following;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetLowestIndexGreaterThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index;
+ }
+ else if (index + 1 < boundaries.Count)
+ {
+ return index + 1;
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the last boundary position before the
+ /// specified position.
+ /// </summary>
+ /// <param name="offset">The position to begin searching for a break from.</param>
+ /// <returns>The position of the last boundary before the starting position.</returns>
+ public override int Preceding(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int preceeding = GetHighestIndexLessThan(offset);
+ if (preceeding == -1)
+ {
+ currentBoundaryIndex = 0;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = preceeding;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetHighestIndexLessThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index - 1;
+ }
+ else
+ {
+ // NOTE: This is intentionally allowed to return -1 in the case
+ // where index == 0. This state indicates we are before the first boundary.
+ return index - 1;
+ }
+ }
+
+ /// <summary>
+ /// Returns the current iteration position.
+ /// </summary>
+ public override int Current
+ {
+ get { return ReturnCurrent(); }
+ }
+
+ /// <summary>
+ /// Gets the text being analyzed.
+ /// </summary>
+ public override string Text
+ {
+ get
+ {
+ return text;
+ }
+ }
+
+ /// <summary>
+ /// Set the iterator to analyze a new piece of text. This function resets
+ /// the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <param name="newText">The text to analyze.</param>
+ public override void SetText(string newText)
+ {
+ text = newText;
+ currentBoundaryIndex = 0;
+ m_start = 0;
+ m_end = newText.Length;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ public override void SetText(CharacterIterator newText)
+ {
+ text = newText.GetTextAsString();
+ currentBoundaryIndex = 0;
+ m_start = newText.BeginIndex;
+ m_end = newText.EndIndex;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ private void LoadBoundaries(int start, int end)
+ {
+ IEnumerable<Icu.Boundary> icuBoundaries;
+ string offsetText = text.Substring(start, end - start);
+
+#if !NETSTANDARD
+ try
+ {
+#endif
+ if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+ {
+ if (enableHacks)
+ {
+ // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+ offsetText = offsetText.Replace("-", "a");
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
+ }
+ else
+ {
+ if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+ {
+ // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
+ offsetText = offsetText.Replace("\n", " ");
+ // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
+ // begin with capital letters.
+ offsetText = CapitalizeFirst(offsetText);
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
+ }
+#if !NETSTANDARD
+ }
+ catch (AccessViolationException ace)
+ {
+ // LUCENENET TODO: Find a reliable way to reproduce and report the
+ // AccessViolationException that happens here to the icu-dotnet project team
+ throw new Exception("Hit AccessViolationException: " + ace.ToString(), ace);
+ }
+#endif
+
+ boundaries = icuBoundaries
+ .Select(t => new[] { t.Start + start, t.End + start })
+ .SelectMany(b => b)
+ .Distinct()
+ .ToList();
+ }
+
+ /// <summary>
+ /// Returns true if the specified character offset is a text boundary.
+ /// </summary>
+ /// <param name="offset">the character offset to check.</param>
+ /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+ public override bool IsBoundary(int offset)
+ {
+ CheckOffset(offset);
+ return boundaries.Contains(offset);
+ }
+
+ private int ReturnCurrent()
+ {
+ if (boundaries.Count > 0)
+ {
+ return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
+ ? boundaries[currentBoundaryIndex]
+ : DONE;
+ }
+
+ // If there are no boundaries, we must return the start offset
+ return m_start;
+ }
+
+ /// <summary>
+ /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
+ /// where it doesn't correctly break sentences unless they begin with a capital letter.
+ /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator
+ /// code changed to remove calls to this method.
+ /// </summary>
+ public static string CapitalizeFirst(string s)
+ {
+ bool isNewSentence = true;
+ var result = new StringBuilder(s.Length);
+ for (int i = 0; i < s.Length; i++)
+ {
+ if (isNewSentence && char.IsLetter(s[i]))
+ {
+ result.Append(char.ToUpper(s[i]));
+ isNewSentence = false;
+ }
+ else
+ result.Append(s[i]);
+
+ if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+ {
+ isNewSentence = true;
+ }
+ }
+
+ return result.ToString();
+ }
+ }
+}
+#endif
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs b/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs
new file mode 100644
index 0000000..156f81e
--- /dev/null
+++ b/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs
@@ -0,0 +1,204 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// <see cref="StringCharacterIterator"/> implements the
+ /// <see cref="CharacterIterator"/> protocol for a <see cref="string"/>.
+ /// The <see cref="StringCharacterIterator"/> class iterates over the
+ /// entire <see cref="string"/>.
+ /// </summary>
+ /// <seealso cref="CharacterIterator"/>
+ public class StringCharacterIterator : CharacterIterator
+ {
+ private string text;
+ private int begin;
+ private int end;
+ // invariant: begin <= pos <= end
+ private int pos;
+
+
+ public StringCharacterIterator(string text)
+ : this(text, 0)
+ {
+ }
+
+ public StringCharacterIterator(string text, int pos)
+ : this(text, 0, text.Length, pos)
+ {
+ }
+
+ public StringCharacterIterator(string text, int begin, int end, int pos)
+ {
+ if (text == null)
+ throw new ArgumentNullException("text");
+ this.text = text;
+
+ if (begin < 0 || begin > end || end > text.Length)
+ throw new ArgumentException("Invalid substring range");
+
+ if (pos < begin || pos > end)
+ throw new ArgumentException("Invalid position");
+
+ this.begin = begin;
+ this.end = end;
+ this.pos = pos;
+ }
+
+ public void SetText(string text)
+ {
+ if (text == null)
+ throw new ArgumentNullException("text");
+ this.text = text;
+ this.begin = 0;
+ this.end = text.Length;
+ this.pos = 0;
+ }
+
+ public override char First()
+ {
+ pos = begin;
+ return Current;
+ }
+
+ public override char Last()
+ {
+ if (end != begin)
+ {
+ pos = end - 1;
+ }
+ else
+ {
+ pos = end;
+ }
+ return Current;
+ }
+
+ public override char SetIndex(int position)
+ {
+ if (position < begin || position > end)
+ throw new ArgumentException("Invalid index");
+ pos = position;
+ return Current;
+ }
+
+ public override char Current
+ {
+ get
+ {
+ if (pos >= begin && pos < end)
+ {
+ return text[pos];
+ }
+ else
+ {
+ return DONE;
+ }
+ }
+ }
+
+ public override char Next()
+ {
+ if (pos < end - 1)
+ {
+ pos++;
+ return text[pos];
+ }
+ else
+ {
+ pos = end;
+ return DONE;
+ }
+ }
+
+ public override char Previous()
+ {
+ if (pos > begin)
+ {
+ pos--;
+ return text[pos];
+ }
+ else
+ {
+ return DONE;
+ }
+ }
+
+
+ public override int BeginIndex
+ {
+ get
+ {
+ return begin;
+ }
+ }
+
+ public override int EndIndex
+ {
+ get
+ {
+ return end;
+ }
+ }
+
+ public override int Index
+ {
+ get
+ {
+ return pos;
+ }
+ }
+
+ public override string GetTextAsString()
+ {
+ return text;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (this == obj)
+ return true;
+ if (!(obj is StringCharacterIterator))
+ return false;
+
+ StringCharacterIterator that = (StringCharacterIterator)obj;
+
+ if (GetHashCode() != that.GetHashCode())
+ return false;
+ if (!text.Equals(that.text, StringComparison.Ordinal))
+ return false;
+ if (pos != that.pos || begin != that.begin || end != that.end)
+ return false;
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ return base.GetHashCode() ^ pos ^ begin ^ end;
+ }
+
+ public override object Clone()
+ {
+ return MemberwiseClone();
+ }
+ }
+}
+#endif
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.ICU/project.json b/src/Lucene.Net.ICU/project.json
new file mode 100644
index 0000000..f0889b8
--- /dev/null
+++ b/src/Lucene.Net.ICU/project.json
@@ -0,0 +1,64 @@
+{
+ "version": "4.8.0",
+ "title": "Lucene.Net.ICU",
+ "description": "International Components for Unicode-based features including Thai analyzer support, an international postings highlighter, and BreakIterator support for the vector highlighter for the Lucene.Net full-text search engine library from The Apache Software Foundation.",
+ "authors": [ "The Apache Software Foundation" ],
+ "packOptions": {
+ "projectUrl": "http://lucenenet.apache.org/",
+ "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt",
+ "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true",
+ "owners": [ "The Apache Software Foundation" ],
+ "repository": { "url": "https://github.com/apache/lucenenet" },
+ "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query" ]
+ },
+ "buildOptions": {
+ "define": [ "FEATURE_BREAKITERATOR" ],
+ "compile": {
+ "includeFiles": [
+ "../CommonAssemblyInfo.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs",
+ "../Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/Passage.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs",
+ "../Lucene.Net.Highlighter/PostingsHighlight/WholeBreakIterator.cs",
+ "../Lucene.Net.Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs"
+ ]
+ },
+ "embed": {
+ "includeFiles": [ "Analysis/Th/stopwords.txt" ]
+ }
+ },
+ "dependencies": {
+ "icu.net": "54.1.1-alpha",
+ "Lucene.Net": "4.8.0",
+ "Lucene.Net.Analysis.Common": "4.8.0",
+ "Lucene.Net.Highlighter": "4.8.0"
+ },
+ "frameworks": {
+ "netstandard1.5": {
+ "imports": "dnxcore50",
+ "buildOptions": {
+ "debugType": "portable",
+ "define": [ "NETSTANDARD" ]
+ },
+ "dependencies": {
+ "NETStandard.Library": "1.6.0"
+ }
+ },
+ "net451": {
+ "buildOptions": {
+ "debugType": "full",
+ "define": [ "FEATURE_SERIALIZABLE" ]
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
deleted file mode 100644
index 07f0fab..0000000
--- a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
deleted file mode 100644
index e2ccbc0..0000000
--- a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
+++ /dev/null
@@ -1,145 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
-
--->
-
-<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
- <PropertyGroup>
- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
- <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid>
- <OutputType>Library</OutputType>
- <AppDesignerFolder>Properties</AppDesignerFolder>
- <RootNamespace>Lucene.Net</RootNamespace>
- <AssemblyName>Lucene.Net.Icu</AssemblyName>
- <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
- <FileAlignment>512</FileAlignment>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
- <DebugSymbols>true</DebugSymbols>
- <DebugType>full</DebugType>
- <Optimize>false</Optimize>
- <OutputPath>bin\Debug\</OutputPath>
- <DefineConstants>DEBUG;TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
- <DebugType>pdbonly</DebugType>
- <Optimize>true</Optimize>
- <OutputPath>bin\Release\</OutputPath>
- <DefineConstants>TRACE</DefineConstants>
- <ErrorReport>prompt</ErrorReport>
- <WarningLevel>4</WarningLevel>
- </PropertyGroup>
- <PropertyGroup>
- <DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants>
- </PropertyGroup>
- <ItemGroup>
- <Reference Include="System" />
- <Reference Include="System.Core" />
- <Reference Include="Microsoft.CSharp" />
- <Reference Include="System.Data" />
- </ItemGroup>
- <ItemGroup>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs">
- <Link>Analysis\Th\ThaiAnalyzer.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs">
- <Link>Analysis\Th\ThaiTokenizer.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs">
- <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs">
- <Link>Analysis\Th\ThaiWordFilter.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs">
- <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs">
- <Link>Analysis\Util\CharArrayIterator.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs">
- <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs">
- <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs">
- <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs">
- <Link>Search\PostingsHighlight\Passage.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs">
- <Link>Search\PostingsHighlight\PassageFormatter.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs">
- <Link>Search\PostingsHighlight\PassageScorer.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs">
- <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs">
- <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link>
- </Compile>
- <Compile Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs">
- <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link>
- </Compile>
- <Compile Include="Support\BreakIterator.cs" />
- <Compile Include="Support\CharacterIterator.cs" />
- <Compile Include="Support\IcuBreakIterator.cs" />
- <Compile Include="Properties\AssemblyInfo.cs" />
- <Compile Include="..\CommonAssemblyInfo.cs">
- <Link>Properties\CommonAssemblyInfo.cs</Link>
- </Compile>
- <Compile Include="Support\StringCharacterIterator.cs" />
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
- <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
- <Name>Lucene.Net.Analysis.Common</Name>
- </ProjectReference>
- <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj">
- <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project>
- <Name>Lucene.Net.Highlighter</Name>
- </ProjectReference>
- <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
- <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
- <Name>Lucene.Net</Name>
- </ProjectReference>
- </ItemGroup>
- <ItemGroup>
- <None Include="Lucene.Net.Icu.project.json" />
- </ItemGroup>
- <ItemGroup>
- <EmbeddedResource Include="Analysis\Th\stopwords.txt" />
- </ItemGroup>
- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
- <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
- Other similar extension points exist, see Microsoft.Common.targets.
- <Target Name="BeforeBuild">
- </Target>
- <Target Name="AfterBuild">
- </Target>
- -->
-</Project>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
deleted file mode 100644
index af28fc8..0000000
--- a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
- "runtimes": {
- "win": {}
- },
- "dependencies": {
- "icu.net": "54.1.1-alpha"
- },
- "frameworks": {
- "net451": {}
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
deleted file mode 100644
index 4b09c43..0000000
--- a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
+++ /dev/null
@@ -1,39 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
-
--->
-<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <PropertyGroup>
- <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
- <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
- </PropertyGroup>
- <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
- <PropertyGroup Label="Globals">
- <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid>
- <RootNamespace>Lucene.Net.Search</RootNamespace>
- <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
- <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
- <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
- </PropertyGroup>
- <PropertyGroup>
- <SchemaVersion>2.0</SchemaVersion>
- </PropertyGroup>
- <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
-</Project>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
deleted file mode 100644
index cfe269c..0000000
--- a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-// General Information about an assembly is controlled through the following
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Icu")]
-[assembly: AssemblyDescription(
- "International Components for Unicode-based features including Thai analyzer support, " +
- "an international postings highlighter, and BreakIterator support for the vector highlighter in Lucene.Net.Highlighter " +
- "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyDefaultAlias("Lucene.Net.Icu")]
-[assembly: AssemblyCulture("")]
-
-[assembly: CLSCompliant(true)]
-
-// Setting ComVisible to false makes the types in this assembly not visible
-// to COM components. If you need to access a type in this assembly from
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")]
-
-// for testing
-[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
-
-// NOTE: Version information is in CommonAssemblyInfo.cs
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Support/BreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/BreakIterator.cs b/src/Lucene.Net.Icu/Support/BreakIterator.cs
deleted file mode 100644
index df4a945..0000000
--- a/src/Lucene.Net.Icu/Support/BreakIterator.cs
+++ /dev/null
@@ -1,248 +0,0 @@
-#if FEATURE_BREAKITERATOR
-using System;
-
-namespace Lucene.Net.Support
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// The <code>BreakIterator</code> class implements methods for finding
- /// the location of boundaries in text. Instances of <code>BreakIterator</code>
- /// maintain a current position and scan over text
- /// returning the index of characters where boundaries occur.
- /// </summary>
- public abstract class BreakIterator
-#if FEATURE_CLONEABLE
- : ICloneable
-#endif
- {
- /// <summary>
- /// Constructor. BreakIterator is stateless and has no default behavior.
- /// </summary>
- protected BreakIterator()
- {
- }
-
- /// <summary>
- /// Create a copy of this iterator
- /// </summary>
- /// <returns>A member-wise copy of this</returns>
- public object Clone()
- {
- return MemberwiseClone();
- }
-
- /// <summary>
- /// DONE is returned by Previous(), Next(), Next(int), Preceding(int)
- /// and Following(int) when either the first or last text boundary has been
- /// reached.
- /// </summary>
- public static readonly int DONE = -1;
-
- /// <summary>
- /// Returns the first boundary. The iterator's current position is set
- /// to the first text boundary.
- /// </summary>
- /// <returns>The character index of the first text boundary</returns>
- public abstract int First();
-
- /// <summary>
- /// Returns the last boundary. The iterator's current position is set
- /// to the last text boundary.
- /// </summary>
- /// <returns>The character index of the last text boundary.</returns>
- public abstract int Last();
-
- /// <summary>
- /// Returns the nth boundary from the current boundary. If either
- /// the first or last text boundary has been reached, it returns
- /// <see cref="BreakIterator.DONE"/> and the current position is set to either
- /// the first or last text boundary depending on which one is reached. Otherwise,
- /// the iterator's current position is set to the new boundary.
- /// For example, if the iterator's current position is the mth text boundary
- /// and three more boundaries exist from the current boundary to the last text
- /// boundary, the Next(2) call will return m + 2. The new text position is set
- /// to the (m + 2)th text boundary. A Next(4) call would return
- /// <see cref="BreakIterator.DONE"/> and the last text boundary would become the
- /// new text position.
- /// </summary>
- /// <param name="n">
- /// which boundary to return. A value of 0
- /// does nothing. Negative values move to previous boundaries
- /// and positive values move to later boundaries.
- /// </param>
- /// <returns>
- /// The character index of the nth boundary from the current position
- /// or <see cref="BreakIterator.DONE"/> if either first or last text boundary
- /// has been reached.
- /// </returns>
- public abstract int Next(int n);
-
- /// <summary>
- /// Returns the boundary following the current boundary. If the current boundary
- /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and
- /// the iterator's current position is unchanged. Otherwise, the iterator's
- /// current position is set to the boundary following the current boundary.
- /// </summary>
- /// <returns>
- /// The character index of the next text boundary or
- /// <see cref="BreakIterator.DONE"/> if the current boundary is the last text
- /// boundary.
- /// Equivalent to Next(1).
- /// </returns>
- /// <seealso cref="Next(int)"/>
- public abstract int Next();
-
- /// <summary>
- /// Returns the boundary preceding the current boundary. If the current boundary
- /// is the first text boundary, it returns <code>BreakIterator.DONE</code> and
- /// the iterator's current position is unchanged. Otherwise, the iterator's
- /// current position is set to the boundary preceding the current boundary.
- /// </summary>
- /// <returns>
- /// The character index of the previous text boundary or
- /// <see cref="BreakIterator.DONE"/> if the current boundary is the first text
- /// boundary.
- /// </returns>
- public abstract int Previous();
-
- /// <summary>
- /// Returns the first boundary following the specified character offset. If the
- /// specified offset equals to the last text boundary, it returns
- /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
- /// Otherwise, the iterator's current position is set to the returned boundary.
- /// The value returned is always greater than the offset or the value
- /// <see cref="BreakIterator.DONE"/>.
- /// </summary>
- /// <param name="offset">the character offset to begin scanning.</param>
- /// <returns>
- /// The first boundary after the specified offset or
- /// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in
- /// as the offset.
- /// </returns>
- /// <exception cref="ArgumentException">
- /// if the specified offset is less than
- /// the first text boundary or greater than the last text boundary.
- /// </exception>
- public abstract int Following(int offset);
-
- /// <summary>
- /// Returns the last boundary preceding the specified character offset. If the
- /// specified offset equals to the first text boundary, it returns
- /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged.
- /// Otherwise, the iterator's current position is set to the returned boundary.
- /// The value returned is always less than the offset or the value
- /// <see cref="BreakIterator.DONE"/>.
- /// </summary>
- /// <param name="offset">the character offset to begin scanning.</param>
- /// <returns>
- /// The last boundary before the specified offset or
- /// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in
- /// as the offset.
- /// </returns>
- public abstract int Preceding(int offset);
- //{
- // // NOTE: This implementation is here solely because we can't add new
- // // abstract methods to an existing class. There is almost ALWAYS a
- // // better, faster way to do this.
- // int pos = Following(offset);
- // while (pos >= offset && pos != DONE)
- // {
- // pos = Previous();
- // }
- // return pos;
- //}
-
- /// <summary>
- /// Returns true if the specified character offset is a text boundary.
- /// </summary>
- /// <param name="offset">the character offset to check.</param>
- /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
- /// <exception cref="ArgumentException">
- /// if the specified offset is less than
- /// the first text boundary or greater than the last text boundary.
- /// </exception>
- public abstract bool IsBoundary(int offset);
- //{
- // // NOTE: This implementation probably is wrong for most situations
- // // because it fails to take into account the possibility that a
- // // CharacterIterator passed to setText() may not have a begin offset
- // // of 0. But since the abstract BreakIterator doesn't have that
- // // knowledge, it assumes the begin offset is 0. If you subclass
- // // BreakIterator, copy the SimpleTextBoundary implementation of this
- // // function into your subclass. [This should have been abstract at
- // // this level, but it's too late to fix that now.]
- // if (offset == 0)
- // {
- // return true;
- // }
- // int boundary = Following(offset - 1);
- // if (boundary == DONE)
- // {
- // throw new ArgumentException();
- // }
- // return boundary == offset;
- //}
-
- /// <summary>
- /// Returns character index of the text boundary that was most
- /// recently returned by Next(), Next(int), Previous(), First(), Last(),
- /// Following(int) or Preceding(int). If any of these methods returns
- /// <see cref="BreakIterator.DONE"/> because either first or last text boundary
- /// has been reached, it returns the first or last text boundary depending on
- /// which one is reached.
- /// </summary>
- /// <returns>
- /// The text boundary returned from the above methods, first or last
- /// text boundary.
- /// </returns>
- /// <seealso cref="Next()"/>
- /// <seealso cref="Next(int)"/>
- /// <seealso cref="Previous()"/>
- /// <seealso cref="First()"/>
- /// <seealso cref="Last()"/>
- /// <seealso cref="Following(int)"/>
- /// <seealso cref="Preceding(int)"/>
- public abstract int Current { get; }
-
- /// <summary>
- /// Get the text being scanned
- /// </summary>
- /// <returns>the text being scanned</returns>
- //public abstract CharacterIterator GetText();
- public abstract string Text { get; }
-
- /// <summary>
- /// Set a new text string to be scanned. The current scan
- /// position is reset to First().
- /// </summary>
- /// <param name="newText">new text to scan.</param>
- public virtual void SetText(string newText)
- {
- SetText(new StringCharacterIterator(newText));
- }
-
- /// <summary>
- /// Set a new text string to be scanned. The current scan
- /// position is reset to First().
- /// </summary>
- /// <param name="newText">new text to scan.</param>
- public abstract void SetText(CharacterIterator newText);
- }
-}
-#endif