You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2007/02/02 02:57:13 UTC
svn commit: r502439 - in /incubator/lucene.net/trunk/C#/contrib: ./
WordNet.Net/ WordNet.Net/WordNet.Net/ WordNet.Net/WordNet.Net/SynExpand/
WordNet.Net/WordNet.Net/SynLookup/ WordNet.Net/WordNet.Net/Syns2Index/
Author: aroush
Date: Thu Feb 1 17:57:12 2007
New Revision: 502439
URL: http://svn.apache.org/viewvc?view=rev&rev=502439
Log:
WordNet.Net 2.0.0 build 001 release
Added:
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/ABOUT.txt
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/HISTORY.txt
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/LICENSE.txt
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Build.xml
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Package.html
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/README.txt
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/App.ico (with props)
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/AssemblyInfo.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.csproj
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/App.ico (with props)
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/AssemblyInfo.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.csproj
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/App.ico (with props)
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/AssemblyInfo.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.cs
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.csproj
incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/WordNet.Net-2.0.0.sln
Modified:
incubator/lucene.net/trunk/C#/contrib/README.txt
Modified: incubator/lucene.net/trunk/C#/contrib/README.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/README.txt?view=diff&rev=502439&r1=502438&r2=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/README.txt (original)
+++ incubator/lucene.net/trunk/C#/contrib/README.txt Thu Feb 1 17:57:12 2007
@@ -6,4 +6,5 @@
Highlighter.Net
Snowball.Net
-SpellChecker.Net
\ No newline at end of file
+SpellChecker.Net
+WordNet.Net
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/ABOUT.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/ABOUT.txt?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/ABOUT.txt (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/ABOUT.txt Thu Feb 1 17:57:12 2007
@@ -0,0 +1 @@
+WordNet.Net is a port of Java WordNet to C#. The port from Java to C# of version 2.0.0 is done by George Aroush. To contact George Aroush please visit http://www.aroush.net/
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/HISTORY.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/HISTORY.txt?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/HISTORY.txt (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/HISTORY.txt Thu Feb 1 17:57:12 2007
@@ -0,0 +1,6 @@
+WordNet.Net History
+-------------------
+
+
+01Feb07:
+ - Release: WordNet.Net.2.0.0 build 001
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/LICENSE.txt?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/LICENSE.txt (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/LICENSE.txt Thu Feb 1 17:57:12 2007
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Build.xml
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Build.xml?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Build.xml (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Build.xml Thu Feb 1 17:57:12 2007
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+
+<project name="WordNet.Net" default="default">
+
+ <description>
+ WordNet.Net
+ </description>
+
+ <property name="prolog.file" location="prologwn/wn_s.pl"/>
+ <property name="synindex.dir" location="index"/>
+
+ <available property="synindex.exists" file="${synindex.dir}" type="dir"/>
+
+ <import file="../contrib-build.xml"/>
+
+ <target name="index" depends="compile" description="Build WordNet.Net index">
+ <fail if="synindex.exists">
+ Index already exists - must remove first.
+ </fail>
+
+ <java classname="WordNet.Net.Syns2Index">
+ <classpath>
+ <path refid="compile.classpath"/>
+ <pathelement location="${build.dir}/classes"/>
+ </classpath>
+
+ <arg file="${prolog.file}"/>
+ <arg file="${synindex.dir}"/>
+ </java>
+ </target>
+
+
+ <target name="synonym" description="Find synonyms for word">
+ <fail unless="synindex.exists">
+ Index does not exist.
+ </fail>
+
+ <fail unless="word">
+ Must specify 'word' property.
+ </fail>
+
+ <java classname="WordNet.Net.SynLookup">
+ <classpath>
+ <path refid="compile.classpath"/>
+ <pathelement location="${build.dir}/classes"/>
+ </classpath>
+
+ <arg file="${synindex.dir}"/>
+ <arg value="${word}"/>
+ </java>
+ </target>
+
+ <target name="expand" description="Perform synonym expansion on a query">
+ <fail unless="synindex.exists">
+ Index does not exist.
+ </fail>
+
+ <fail unless="query">
+ Must specify 'query' property.
+ </fail>
+
+ <java classname="WordNet.Net.SynExpand">
+ <classpath>
+ <path refid="compile.classpath"/>
+ <pathelement location="${build.dir}/classes"/>
+ </classpath>
+
+ <arg file="${synindex.dir}"/>
+ <arg value="${query}"/>
+ </java>
+ </target>
+
+</project>
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Package.html
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Package.html?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Package.html (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Package.html Thu Feb 1 17:57:12 2007
@@ -0,0 +1,28 @@
+<html>
+ <head>
+ <title>WordNet Lucene.Net Synonyms Integration</title>
+ </head>
+ <body>
+ This package uses synonyms defined by <a href="http:/www.cogsci.princeton.edu/~wn/">
+ WordNet</a> to build a Lucene.Net index storing them, which in turn can be
+ used for query expansion. You normally run {@link WordNet.Net.Syns2Index} once
+ to build the query index/"database", and then call {@link
+ WordNet.Net.SynExpand#Expand SynExpand.Expand(...)} to expand a query.
+ <p>
+ <h3>
+ Instructions
+ </h3>
+ <ol>
+ <li>
+ Download the <a href="http://wordnet.princeton.edu/3.0/WNprolog-3.0.tar.gz">WordNet
+ prolog database</a>
+ , gunzip, untar etc.
+ <li>
+ Invoke Syn2Index as appropriate to build a synonym index. It'll take 2
+ arguments, the path to wn_s.pl from that WordNet download, and the index name.
+ <li>
+ Update your UI so that as appropriate you call SynExpand.Expand(...) to expand
+ user queries with synonyms.</li>
+ </ol>
+ </body>
+</html>
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/README.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/README.txt?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/README.txt (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/README.txt Thu Feb 1 17:57:12 2007
@@ -0,0 +1,5 @@
+As of 2002-11-13 WordNet Lucene contribution contains a single Java/C# class:
+ WordNet.Net.Syns2Index.
+
+This class creates a Lucene index with synonyms for English words from
+a Prolog file, which is a part of WordNet database.
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/App.ico
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynExpand/App.ico?view=auto&rev=502439
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/App.ico
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynExpand/AssemblyInfo.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/AssemblyInfo.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,58 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+//
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+//
+[assembly: AssemblyTitle("")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+//
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Revision and Build Numbers
+// by using the '*' as shown below:
+
+[assembly: AssemblyVersion("2.0.0.1")]
+
+//
+// In order to sign your assembly you must specify a key to use. Refer to the
+// Microsoft .NET Framework documentation for more information on assembly signing.
+//
+// Use the attributes below to control which key is used for signing.
+//
+// Notes:
+// (*) If no key is specified, the assembly is not signed.
+// (*) KeyName refers to a key that has been installed in the Crypto Service
+// Provider (CSP) on your machine. KeyFile refers to a file which contains
+// a key.
+// (*) If the KeyFile and the KeyName values are both specified, the
+// following processing occurs:
+// (1) If the KeyName can be found in the CSP, that key is used.
+// (2) If the KeyName does not exist and the KeyFile does exist, the key
+// in the KeyFile is installed into the CSP and used.
+// (*) In order to create a KeyFile, you can use the sn.exe (Strong Name) utility.
+// When specifying the KeyFile, the location of the KeyFile should be
+// relative to the project output directory which is
+// %Project Directory%\obj\<configuration>. For example, if your KeyFile is
+// located in the project directory, you would specify the AssemblyKeyFile
+// attribute as [assembly: AssemblyKeyFile("..\\..\\mykey.snk")]
+// (*) Delay Signing is an advanced option - see the Microsoft .NET Framework
+// documentation for more information on this.
+//
+[assembly: AssemblyDelaySign(false)]
+[assembly: AssemblyKeyFile("")]
+[assembly: AssemblyKeyName("")]
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Lucene.Net.Store;
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Documents;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+
+namespace WorldNet.Net
+{
+
+
+ /// <summary> Expand a query by looking up synonyms for every term.
+ /// You need to invoke {@link Syns2Index} first to build the synonym index.
+ ///
+ /// </summary>
+ /// <seealso cref="Syns2Index">
+ /// </seealso>
+ public sealed class SynExpand
+ {
+
+ /// <summary> Test driver for synonym expansion.
+ /// Uses boost factor of 0.9 for illustrative purposes.
+ ///
+ /// If you pass in the query "big dog" then it prints out:
+ ///
+ /// <code><pre>
+ /// Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9 bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9 large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9 vainglorious^0.9 vauntingly^0.9
+ /// dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9 firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9 tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
+ /// </pre></code>
+ /// </summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ if (args.Length != 2)
+ {
+ System.Console.Out.WriteLine(typeof(SynExpand) + " <index path> <query>");
+ return;
+ }
+
+ FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
+ IndexSearcher searcher = new IndexSearcher(directory);
+
+ System.String query = args[1];
+ System.String field = "contents";
+
+ Query q = Expand(query, searcher, new StandardAnalyzer(), field, 0.9f);
+ System.Console.Out.WriteLine("Query: " + q.ToString(field));
+
+
+
+ searcher.Close();
+ directory.Close();
+ }
+
+
+ /// <summary> Perform synonym expansion on a query.
+ ///
+ /// </summary>
+ /// <param name="query">users query that is assumed to not have any "special" query syntax, thus it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2" doesn't as this should presumably be passed directly to the default query parser.
+ ///
+ /// </param>
+ /// <param name="syns">a opened to the Lucene index you previously created with {@link Syns2Index}. The searcher is not closed or otherwise altered.
+ ///
+ /// </param>
+ /// <param name="a">optional analyzer used to parse the users query else {@link StandardAnalyzer} is used
+ ///
+ /// </param>
+ /// <param name="field">optional field name to search in or null if you want the default of "contents"
+ ///
+ /// </param>
+ /// <param name="boost">optional boost applied to synonyms else no boost is applied
+ ///
+ /// </param>
+ /// <returns> the expanded Query
+ /// </returns>
+ public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost)
+ {
+ System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups
+ System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately listed..
+ if (field == null)
+ field = "contents";
+ if (a == null)
+ a = new StandardAnalyzer();
+
+ // [1] Parse query into separate words so that when we expand we can avoid dups
+ TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
+ Lucene.Net.Analysis.Token t;
+ while ((t = ts.Next()) != null)
+ {
+ System.String word = t.TermText();
+ if (already.Contains(word) == false)
+ {
+ already.Add(word, word);
+ top.Add(word);
+ }
+ }
+ BooleanQuery tmp = new BooleanQuery();
+
+ // [2] form query
+ System.Collections.IEnumerator it = top.GetEnumerator();
+ while (it.MoveNext())
+ {
+ // [2a] add to level words in
+ System.String word = (System.String) it.Current;
+ TermQuery tq = new TermQuery(new Term(field, word));
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+
+ // [2b] add in unique synonums
+ Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
+ for (int i = 0; i < hits.Length(); i++)
+ {
+ Document doc = hits.Doc(i);
+ System.String[] values = doc.GetValues(Syns2Index.F_SYN);
+ for (int j = 0; j < values.Length; j++)
+ {
+ System.String syn = values[j];
+ if (already.Contains(syn) == false)
+ // avoid dups of top level words and synonyms
+ {
+ already.Add(syn, syn);
+ tq = new TermQuery(new Term(field, syn));
+ if (boost > 0)
+ // else keep normal 1.0
+ tq.SetBoost(boost);
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+ }
+ }
+ }
+ }
+
+
+ return tmp;
+ }
+ }
+
+
+ /// <summary>
+ /// From project WordNet.Net.Syns2Index
+ /// </summary>
+ public class Syns2Index
+ {
+ /// <summary> </summary>
+ public const System.String F_SYN = "syn";
+
+ /// <summary> </summary>
+ public const System.String F_WORD = "word";
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.csproj?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.csproj (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynExpand/SynExpand.csproj Thu Feb 1 17:57:12 2007
@@ -0,0 +1,109 @@
+<VisualStudioProject>
+ <CSHARP
+ ProjectType = "Local"
+ ProductVersion = "7.10.3077"
+ SchemaVersion = "2.0"
+ ProjectGuid = "{1407C9BA-337C-4C6C-B065-68328D3871B3}"
+ >
+ <Build>
+ <Settings
+ ApplicationIcon = "App.ico"
+ AssemblyKeyContainerName = ""
+ AssemblyName = "SynExpand"
+ AssemblyOriginatorKeyFile = ""
+ DefaultClientScript = "JScript"
+ DefaultHTMLPageLayout = "Grid"
+ DefaultTargetSchema = "IE50"
+ DelaySign = "false"
+ OutputType = "Exe"
+ PreBuildEvent = ""
+ PostBuildEvent = ""
+ RootNamespace = "SynExpand"
+ RunPostBuildEvent = "OnBuildSuccess"
+ StartupObject = ""
+ >
+ <Config
+ Name = "Debug"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "DEBUG;TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "true"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "false"
+ OutputPath = "bin\Debug\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ <Config
+ Name = "Release"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "false"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "true"
+ OutputPath = "bin\Release\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ </Settings>
+ <References>
+ <Reference
+ Name = "System"
+ AssemblyName = "System"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.dll"
+ />
+ <Reference
+ Name = "System.Data"
+ AssemblyName = "System.Data"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.dll"
+ />
+ <Reference
+ Name = "System.XML"
+ AssemblyName = "System.XML"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.XML.dll"
+ />
+ <Reference
+ Name = "Lucene.Net"
+ AssemblyName = "Lucene.Net"
+ HintPath = "..\..\Lucene.Net-2.0.1-001.src\Lucene.Net\bin\Debug\Lucene.Net.dll"
+ />
+ </References>
+ </Build>
+ <Files>
+ <Include>
+ <File
+ RelPath = "App.ico"
+ BuildAction = "Content"
+ />
+ <File
+ RelPath = "AssemblyInfo.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ <File
+ RelPath = "SynExpand.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ </Include>
+ </Files>
+ </CSHARP>
+</VisualStudioProject>
+
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/App.ico
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynLookup/App.ico?view=auto&rev=502439
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/App.ico
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynLookup/AssemblyInfo.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/AssemblyInfo.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,58 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+//
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+//
+[assembly: AssemblyTitle("")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+//
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Revision and Build Numbers
+// by using the '*' as shown below:
+
+[assembly: AssemblyVersion("2.0.0.1")]
+
+//
+// In order to sign your assembly you must specify a key to use. Refer to the
+// Microsoft .NET Framework documentation for more information on assembly signing.
+//
+// Use the attributes below to control which key is used for signing.
+//
+// Notes:
+// (*) If no key is specified, the assembly is not signed.
+// (*) KeyName refers to a key that has been installed in the Crypto Service
+// Provider (CSP) on your machine. KeyFile refers to a file which contains
+// a key.
+// (*) If the KeyFile and the KeyName values are both specified, the
+// following processing occurs:
+// (1) If the KeyName can be found in the CSP, that key is used.
+// (2) If the KeyName does not exist and the KeyFile does exist, the key
+// in the KeyFile is installed into the CSP and used.
+// (*) In order to create a KeyFile, you can use the sn.exe (Strong Name) utility.
+// When specifying the KeyFile, the location of the KeyFile should be
+// relative to the project output directory which is
+// %Project Directory%\obj\<configuration>. For example, if your KeyFile is
+// located in the project directory, you would specify the AssemblyKeyFile
+// attribute as [assembly: AssemblyKeyFile("..\\..\\mykey.snk")]
+// (*) Delay Signing is an advanced option - see the Microsoft .NET Framework
+// documentation for more information on this.
+//
+[assembly: AssemblyDelaySign(false)]
+[assembly: AssemblyKeyFile("")]
+[assembly: AssemblyKeyName("")]
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Lucene.Net.Store;
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Documents;
+using Lucene.Net.Analysis;
+
+namespace WorldNet.Net
+{
+
+
+ /// <summary> Test program to look up synonyms.</summary>
+ public class SynLookup
+ {
+
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ if (args.Length != 2)
+ {
+ System.Console.Out.WriteLine(typeof(SynLookup) + " <index path> <word>");
+ return;
+ }
+
+ FSDirectory directory = FSDirectory.GetDirectory(args[0], false);
+ IndexSearcher searcher = new IndexSearcher(directory);
+
+ System.String word = args[1];
+ Hits hits = searcher.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
+
+ if (hits.Length() == 0)
+ {
+ System.Console.Out.WriteLine("No synonyms found for " + word);
+ }
+ else
+ {
+ System.Console.Out.WriteLine("Synonyms found for \"" + word + "\":");
+ }
+
+ for (int i = 0; i < hits.Length(); i++)
+ {
+ Document doc = hits.Doc(i);
+
+ System.String[] values = doc.GetValues(Syns2Index.F_SYN);
+
+ for (int j = 0; j < values.Length; j++)
+ {
+ System.Console.Out.WriteLine(values[j]);
+ }
+ }
+
+ searcher.Close();
+ directory.Close();
+ }
+
+
+ /// <summary> Perform synonym expansion on a query.
+ ///
+ /// </summary>
+ /// <param name="">query
+ /// </param>
+ /// <param name="">syns
+ /// </param>
+ /// <param name="">a
+ /// </param>
+ /// <param name="">field
+ /// </param>
+ /// <param name="">boost
+ /// </param>
+ public static Query Expand(System.String query, Searcher syns, Analyzer a, System.String field, float boost)
+ {
+ System.Collections.Hashtable already = new System.Collections.Hashtable(); // avoid dups
+ System.Collections.IList top = new System.Collections.ArrayList(); // needs to be separately listed..
+
+ // [1] Parse query into separate words so that when we expand we can avoid dups
+ TokenStream ts = a.TokenStream(field, new System.IO.StringReader(query));
+ Lucene.Net.Analysis.Token t;
+ while ((t = ts.Next()) != null)
+ {
+ System.String word = t.TermText();
+ if (already.Contains(word) == false)
+ {
+ already.Add(word, word);
+ top.Add(word);
+ }
+ }
+ BooleanQuery tmp = new BooleanQuery();
+
+ // [2] form query
+ System.Collections.IEnumerator it = top.GetEnumerator();
+ while (it.MoveNext())
+ {
+ // [2a] add to level words in
+ System.String word = (System.String) it.Current;
+ TermQuery tq = new TermQuery(new Term(field, word));
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+
+ // [2b] add in unique synonums
+ Hits hits = syns.Search(new TermQuery(new Term(Syns2Index.F_WORD, word)));
+ for (int i = 0; i < hits.Length(); i++)
+ {
+ Document doc = hits.Doc(i);
+ System.String[] values = doc.GetValues(Syns2Index.F_SYN);
+ for (int j = 0; j < values.Length; j++)
+ {
+ System.String syn = values[j];
+ if (already.Contains(syn) == false)
+ {
+ already.Add(syn, syn);
+ tq = new TermQuery(new Term(field, syn));
+ if (boost > 0)
+ // else keep normal 1.0
+ tq.SetBoost(boost);
+ tmp.Add(tq, BooleanClause.Occur.SHOULD);
+ }
+ }
+ }
+ }
+
+
+ return tmp;
+ }
+ }
+
+
+ /// <summary>
+ /// From project WordNet.Net.Syns2Index
+ /// </summary>
+ public class Syns2Index
+ {
+ /// <summary> </summary>
+ public const System.String F_SYN = "syn";
+
+ /// <summary> </summary>
+ public const System.String F_WORD = "word";
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.csproj?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.csproj (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/SynLookup/SynLookup.csproj Thu Feb 1 17:57:12 2007
@@ -0,0 +1,109 @@
+<VisualStudioProject>
+ <CSHARP
+ ProjectType = "Local"
+ ProductVersion = "7.10.3077"
+ SchemaVersion = "2.0"
+ ProjectGuid = "{2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}"
+ >
+ <Build>
+ <Settings
+ ApplicationIcon = "App.ico"
+ AssemblyKeyContainerName = ""
+ AssemblyName = "SynLookup"
+ AssemblyOriginatorKeyFile = ""
+ DefaultClientScript = "JScript"
+ DefaultHTMLPageLayout = "Grid"
+ DefaultTargetSchema = "IE50"
+ DelaySign = "false"
+ OutputType = "Exe"
+ PreBuildEvent = ""
+ PostBuildEvent = ""
+ RootNamespace = "SynLookup"
+ RunPostBuildEvent = "OnBuildSuccess"
+ StartupObject = ""
+ >
+ <Config
+ Name = "Debug"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "DEBUG;TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "true"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "false"
+ OutputPath = "bin\Debug\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ <Config
+ Name = "Release"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "false"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "true"
+ OutputPath = "bin\Release\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ </Settings>
+ <References>
+ <Reference
+ Name = "System"
+ AssemblyName = "System"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.dll"
+ />
+ <Reference
+ Name = "System.Data"
+ AssemblyName = "System.Data"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.dll"
+ />
+ <Reference
+ Name = "System.XML"
+ AssemblyName = "System.XML"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.XML.dll"
+ />
+ <Reference
+ Name = "Lucene.Net"
+ AssemblyName = "Lucene.Net"
+ HintPath = "..\..\Lucene.Net-2.0.1-001.src\Lucene.Net\bin\Debug\Lucene.Net.dll"
+ />
+ </References>
+ </Build>
+ <Files>
+ <Include>
+ <File
+ RelPath = "App.ico"
+ BuildAction = "Content"
+ />
+ <File
+ RelPath = "AssemblyInfo.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ <File
+ RelPath = "SynLookup.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ </Include>
+ </Files>
+ </CSHARP>
+</VisualStudioProject>
+
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/App.ico
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Syns2Index/App.ico?view=auto&rev=502439
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/App.ico
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Syns2Index/AssemblyInfo.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/AssemblyInfo.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,58 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+//
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+//
+[assembly: AssemblyTitle("")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+//
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Revision and Build Numbers
+// by using the '*' as shown below:
+
+[assembly: AssemblyVersion("2.0.0.1")]
+
+//
+// In order to sign your assembly you must specify a key to use. Refer to the
+// Microsoft .NET Framework documentation for more information on assembly signing.
+//
+// Use the attributes below to control which key is used for signing.
+//
+// Notes:
+// (*) If no key is specified, the assembly is not signed.
+// (*) KeyName refers to a key that has been installed in the Crypto Service
+// Provider (CSP) on your machine. KeyFile refers to a file which contains
+// a key.
+// (*) If the KeyFile and the KeyName values are both specified, the
+// following processing occurs:
+// (1) If the KeyName can be found in the CSP, that key is used.
+// (2) If the KeyName does not exist and the KeyFile does exist, the key
+// in the KeyFile is installed into the CSP and used.
+// (*) In order to create a KeyFile, you can use the sn.exe (Strong Name) utility.
+// When specifying the KeyFile, the location of the KeyFile should be
+// relative to the project output directory which is
+// %Project Directory%\obj\<configuration>. For example, if your KeyFile is
+// located in the project directory, you would specify the AssemblyKeyFile
+// attribute as [assembly: AssemblyKeyFile("..\\..\\mykey.snk")]
+// (*) Delay Signing is an advanced option - see the Microsoft .NET Framework
+// documentation for more information on this.
+//
+[assembly: AssemblyDelaySign(false)]
+[assembly: AssemblyKeyFile("")]
+[assembly: AssemblyKeyName("")]
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.cs?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.cs Thu Feb 1 17:57:12 2007
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
+using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+
+namespace WorldNet.Net
+{
+
+ /// <summary> Convert the prolog file wn_s.pl from the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog download</a>
+ /// into a Lucene index suitable for looking up synonyms and performing query expansion ({@link SynExpand#expand SynExpand.expand(...)}).
+ ///
+ /// This has been tested with WordNet 2.0.
+ ///
+ /// The index has fields named "word" ({@link #F_WORD})
+ /// and "syn" ({@link #F_SYN}).
+ /// <p>
+ /// The source word (such as 'big') can be looked up in the
+ /// "word" field, and if present there will be fields named "syn"
+ /// for every synonym. What's tricky here is that there could be <b>multiple</b>
+ /// fields with the same name, in the general case for words that have multiple synonyms.
+ /// That's not a problem with Lucene, you just use {@link org.apache.lucene.document.Document#getValues}
+ /// </p>
+ /// <p>
+ /// While the WordNet file distinguishes groups of synonyms with
+ /// related meanings we don't do that here.
+ /// </p>
+ ///
+ /// This can take 4 minutes to execute and build an index on a "fast" system and the index takes up almost 3 MB.
+ ///
+ /// </summary>
+ /// <author> Dave Spencer, dave@searchmorph.com
+ /// </author>
+ /// <seealso cref="href="http://www.cogsci.princeton.edu/~wn/">WordNet home page</a>">
+ /// </seealso>
+ /// <seealso cref="href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb man page</a>">
+ /// </seealso>
+ /// <seealso cref="href="http://www.hostmon.com/rfc/advanced.jsp">sample site that uses it</a>">
+ /// </seealso>
+ public class Syns2Index
+ {
+ /// <summary> </summary>
+ private static readonly System.IO.StreamWriter o;
+
+ /// <summary> </summary>
+ private static readonly System.IO.StreamWriter err;
+
+ /// <summary> </summary>
+ public const System.String F_SYN = "syn";
+
+ /// <summary> </summary>
+ public const System.String F_WORD = "word";
+
+ /// <summary> </summary>
+ private static readonly Analyzer ana = new StandardAnalyzer();
+
+ /// <summary> Takes arg of prolog file name and index directory.</summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ // get command line arguments
+ System.String prologFilename = null; // name of file "wn_s.pl"
+ System.String indexDir = null;
+ if (args.Length == 2)
+ {
+ prologFilename = args[0];
+ indexDir = args[1];
+ }
+ else
+ {
+ Usage();
+ System.Environment.Exit(1);
+ }
+
+ // ensure that the prolog file is readable
+ if (!(new System.IO.FileInfo(prologFilename)).Exists)
+ {
+ err.WriteLine("Error: cannot read Prolog file: " + prologFilename);
+ System.Environment.Exit(1);
+ }
+ // exit if the target index directory already exists
+ if (System.IO.Directory.Exists((new System.IO.FileInfo(indexDir)).FullName))
+ {
+ err.WriteLine("Error: index directory already exists: " + indexDir);
+ err.WriteLine("Please specify a name of a non-existent directory");
+ System.Environment.Exit(1);
+ }
+
+ o.WriteLine("Opening Prolog file " + prologFilename);
+ System.IO.FileStream fis = new System.IO.FileStream(prologFilename, System.IO.FileMode.Open, System.IO.FileAccess.Read);
+ System.IO.StreamReader br = new System.IO.StreamReader(new System.IO.StreamReader(fis, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(fis, System.Text.Encoding.Default).CurrentEncoding);
+ System.String line;
+
+ // maps a word to all the "groups" it's in
+ System.Collections.IDictionary word2Nums = new System.Collections.SortedList();
+ // maps a group to all the words in it
+ System.Collections.IDictionary num2Words = new System.Collections.SortedList();
+ // number of rejected words
+ int ndecent = 0;
+
+ // status output
+ int mod = 1;
+ int row = 1;
+ // parse prolog file
+ o.WriteLine("[1/2] Parsing " + prologFilename);
+ while ((line = br.ReadLine()) != null)
+ {
+ // occasional progress
+ if ((++row) % mod == 0)
+ // periodically print out line we read in
+ {
+ mod *= 2;
+ o.WriteLine("\t" + row + " " + line + " " + word2Nums.Count + " " + num2Words.Count + " ndecent=" + ndecent);
+ }
+
+ // syntax check
+ if (!line.StartsWith("s("))
+ {
+ err.WriteLine("OUCH: " + line);
+ System.Environment.Exit(1);
+ }
+
+ // parse line
+ line = line.Substring(2);
+ int comma = line.IndexOf((System.Char) ',');
+ System.String num = line.Substring(0, (comma) - (0));
+ int q1 = line.IndexOf((System.Char) '\'');
+ line = line.Substring(q1 + 1);
+ int q2 = line.IndexOf((System.Char) '\'');
+ System.String word = line.Substring(0, (q2) - (0)).ToLower();
+
+ // make sure is a normal word
+ if (!IsDecent(word))
+ {
+ ndecent++;
+ continue; // don't store words w/ spaces
+ }
+
+ // 1/2: word2Nums map
+ // append to entry or add new one
+ System.Collections.IList lis = (System.Collections.IList) word2Nums[word];
+ if (lis == null)
+ {
+ lis = new System.Collections.ArrayList();
+ lis.Add(num);
+ word2Nums[word] = lis;
+ }
+ else
+ lis.Add(num);
+
+ // 2/2: num2Words map
+ lis = (System.Collections.IList) num2Words[num];
+ if (lis == null)
+ {
+ lis = new System.Collections.ArrayList();
+ lis.Add(word);
+ num2Words[num] = lis;
+ }
+ else
+ lis.Add(word);
+ }
+
+ // close the streams
+ fis.Close();
+ br.Close();
+
+ // create the index
+ o.WriteLine("[2/2] Building index to store synonyms, " + " map sizes are " + word2Nums.Count + " and " + num2Words.Count);
+ Index(indexDir, word2Nums, num2Words);
+ }
+
+ /// <summary> Checks to see if a word contains only alphabetic characters by
+ /// checking it one character at a time.
+ ///
+ /// </summary>
+ /// <param name="s">string to check
+ /// </param>
+ /// <returns> <code>true</code> if the string is decent
+ /// </returns>
+ private static bool IsDecent(System.String s)
+ {
+ int len = s.Length;
+ for (int i = 0; i < len; i++)
+ {
+ if (!System.Char.IsLetter(s[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// <summary> Forms a Lucene index based on the 2 maps.
+ ///
+ /// </summary>
+ /// <param name="indexDir">the direcotry where the index should be created
+ /// </param>
+ /// <param name="">word2Nums
+ /// </param>
+ /// <param name="">num2Words
+ /// </param>
+ private static void Index(System.String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words)
+ {
+ int row = 0;
+ int mod = 1;
+
+ // override the specific index if it already exists
+ IndexWriter writer = new IndexWriter(indexDir, ana, true);
+ writer.SetUseCompoundFile(true); // why?
+ // blindly up these parameters for speed
+ writer.SetMergeFactor(writer.GetMergeFactor() * 2);
+ writer.SetMaxBufferedDocs(writer.GetMaxBufferedDocs() * 2);
+ System.Collections.IEnumerator i1 = word2Nums.Keys.GetEnumerator();
+ while (i1.MoveNext())
+ // for each word
+ {
+ System.String g = (System.String) i1.Current;
+ Document doc = new Document();
+
+ int n = Index(word2Nums, num2Words, g, doc);
+ if (n > 0)
+ {
+ doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.UN_TOKENIZED));
+ if ((++row % mod) == 0)
+ {
+ o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc);
+ mod *= 2;
+ }
+ writer.AddDocument(doc);
+ } // else degenerate
+ }
+ o.WriteLine("Optimizing..");
+ writer.Optimize();
+ writer.Close();
+ }
+
+ /// <summary> Given the 2 maps fills a document for 1 word.</summary>
+ private static int Index(System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words, System.String g, Document doc)
+ {
+ System.Collections.IList keys = (System.Collections.IList) word2Nums[g]; // get list of key#'s
+ System.Collections.IEnumerator i2 = keys.GetEnumerator();
+
+ System.Collections.SortedList already = new System.Collections.SortedList(); // keep them sorted
+
+ // pass 1: fill up 'already' with all words
+ while (i2.MoveNext()) // for each key#
+ {
+ foreach (object item in (System.Collections.IList) num2Words[i2.Current]) // get list of words
+ {
+ if (already.Contains(item) == false)
+ {
+ already.Add(item, item);
+ }
+ }
+ }
+ int num = 0;
+ already.Remove(g); // of course a word is it's own syn
+ System.Collections.IDictionaryEnumerator it = already.GetEnumerator();
+ while (it.MoveNext())
+ {
+ System.String cur = (System.String) it.Key;
+ // don't store things like 'pit bull' -> 'american pit bull'
+ if (!IsDecent(cur))
+ {
+ continue;
+ }
+ num++;
+ doc.Add(new Field(F_SYN, cur, Field.Store.YES, Field.Index.NO));
+ }
+ return num;
+ }
+
+ /// <summary> </summary>
+ private static void Usage()
+ {
+ o.WriteLine("\n\n" + typeof(Syns2Index) + " <prolog file> <index dir>\n\n");
+ }
+
+ static Syns2Index()
+ {
+ System.IO.StreamWriter temp_writer;
+ temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
+ temp_writer.AutoFlush = true;
+ o = temp_writer;
+ System.IO.StreamWriter temp_writer2;
+ temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
+ temp_writer2.AutoFlush = true;
+ err = temp_writer2;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.csproj?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.csproj (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/Syns2Index/Syns2Index.csproj Thu Feb 1 17:57:12 2007
@@ -0,0 +1,109 @@
+<VisualStudioProject>
+ <CSHARP
+ ProjectType = "Local"
+ ProductVersion = "7.10.3077"
+ SchemaVersion = "2.0"
+ ProjectGuid = "{7563D4D9-AE91-42BA-A270-1D264660F6DF}"
+ >
+ <Build>
+ <Settings
+ ApplicationIcon = "App.ico"
+ AssemblyKeyContainerName = ""
+ AssemblyName = "Syns2Index"
+ AssemblyOriginatorKeyFile = ""
+ DefaultClientScript = "JScript"
+ DefaultHTMLPageLayout = "Grid"
+ DefaultTargetSchema = "IE50"
+ DelaySign = "false"
+ OutputType = "Exe"
+ PreBuildEvent = ""
+ PostBuildEvent = ""
+ RootNamespace = "Syns2Index"
+ RunPostBuildEvent = "OnBuildSuccess"
+ StartupObject = ""
+ >
+ <Config
+ Name = "Debug"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "DEBUG;TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "true"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "false"
+ OutputPath = "bin\Debug\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ <Config
+ Name = "Release"
+ AllowUnsafeBlocks = "false"
+ BaseAddress = "285212672"
+ CheckForOverflowUnderflow = "false"
+ ConfigurationOverrideFile = ""
+ DefineConstants = "TRACE"
+ DocumentationFile = ""
+ DebugSymbols = "false"
+ FileAlignment = "4096"
+ IncrementalBuild = "false"
+ NoStdLib = "false"
+ NoWarn = ""
+ Optimize = "true"
+ OutputPath = "bin\Release\"
+ RegisterForComInterop = "false"
+ RemoveIntegerChecks = "false"
+ TreatWarningsAsErrors = "false"
+ WarningLevel = "4"
+ />
+ </Settings>
+ <References>
+ <Reference
+ Name = "System"
+ AssemblyName = "System"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.dll"
+ />
+ <Reference
+ Name = "System.Data"
+ AssemblyName = "System.Data"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.dll"
+ />
+ <Reference
+ Name = "System.XML"
+ AssemblyName = "System.XML"
+ HintPath = "C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.XML.dll"
+ />
+ <Reference
+ Name = "Lucene.Net"
+ AssemblyName = "Lucene.Net"
+ HintPath = "..\..\Lucene.Net-2.0.1-001.src\Lucene.Net\bin\Debug\Lucene.Net.dll"
+ />
+ </References>
+ </Build>
+ <Files>
+ <Include>
+ <File
+ RelPath = "App.ico"
+ BuildAction = "Content"
+ />
+ <File
+ RelPath = "AssemblyInfo.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ <File
+ RelPath = "Syns2Index.cs"
+ SubType = "Code"
+ BuildAction = "Compile"
+ />
+ </Include>
+ </Files>
+ </CSHARP>
+</VisualStudioProject>
+
Added: incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/WordNet.Net-2.0.0.sln
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/WordNet.Net/WordNet.Net/WordNet.Net-2.0.0.sln?view=auto&rev=502439
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/WordNet.Net-2.0.0.sln (added)
+++ incubator/lucene.net/trunk/C#/contrib/WordNet.Net/WordNet.Net/WordNet.Net-2.0.0.sln Thu Feb 1 17:57:12 2007
@@ -0,0 +1,45 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SynExpand", "SynExpand\SynExpand.csproj", "{1407C9BA-337C-4C6C-B065-68328D3871B3}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Syns2Index", "Syns2Index\Syns2Index.csproj", "{7563D4D9-AE91-42BA-A270-1D264660F6DF}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SynLookup", "SynLookup\SynLookup.csproj", "{2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(DPCodeReviewSolutionGUID) = preSolution
+ DPCodeReviewSolutionGUID = {00000000-0000-0000-0000-000000000000}
+ EndGlobalSection
+ GlobalSection(SolutionConfiguration) = preSolution
+ Debug = Debug
+ Release = Release
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+ {1407C9BA-337C-4C6C-B065-68328D3871B3}.Debug.ActiveCfg = Debug|.NET
+ {1407C9BA-337C-4C6C-B065-68328D3871B3}.Debug.Build.0 = Debug|.NET
+ {1407C9BA-337C-4C6C-B065-68328D3871B3}.Release.ActiveCfg = Release|.NET
+ {1407C9BA-337C-4C6C-B065-68328D3871B3}.Release.Build.0 = Release|.NET
+ {7563D4D9-AE91-42BA-A270-1D264660F6DF}.Debug.ActiveCfg = Debug|.NET
+ {7563D4D9-AE91-42BA-A270-1D264660F6DF}.Debug.Build.0 = Debug|.NET
+ {7563D4D9-AE91-42BA-A270-1D264660F6DF}.Release.ActiveCfg = Release|.NET
+ {7563D4D9-AE91-42BA-A270-1D264660F6DF}.Release.Build.0 = Release|.NET
+ {2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}.Debug.ActiveCfg = Debug|.NET
+ {2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}.Debug.Build.0 = Debug|.NET
+ {2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}.Release.ActiveCfg = Release|.NET
+ {2CA12E3F-76E1-4FA6-9E87-37079A7B7C69}.Release.Build.0 = Release|.NET
+ EndGlobalSection
+ GlobalSection(SolutionItems) = postSolution
+ Build.xml = Build.xml
+ Package.html = Package.html
+ README.txt = README.txt
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal