You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@lucenenet.apache.org by Digy <di...@gmail.com> on 2009/04/24 20:25:28 UTC

RE: Luke-0.9.x cannot open index files

I think, I found the bug. Here is the dump of the original index:

 

NUMDOCS: 3

MAXDOCS: 7

DELETED(0): True

DELETED(1): True

DELETED(2): False

DELETED(3): True

DELETED(4): True

DELETED(5): False

DELETED(6): False

TERM(0): _l_activationdatetime:552877632000000000

TERM(1): _l_author:admin

TERM(2): _l_bookmarkcount:0

TERM(3): _l_clix:0

TERM(4): _l_clix:1

TERM(5): _l_creationdatetime:633427319866778624

TERM(6): _l_creationdatetime:633427324812559872

TERM(7): _l_creationdatetime:633760609388437504

TERM(8): _l_deactivationdatetime:155377824000000000

TERM(9): _l_deactivationdatetime:155378687999969792

TERM(10): _l_document_class:1

TERM(11): _l_document_class:98305

TERM(12): _l_folder:163841

TERM(13): _l_folder:163843

TERM(14): _l_hidden:aaa

TERM(15): _l_last_modified_datetime:633427319866778624

TERM(16): _l_last_modified_datetime:633427324812559872

TERM(17): _l_last_modified_datetime:633760609388437504

TERM(18): _l_meta:abc

TERM(19): _l_meta:abc.ppt

TERM(20): _l_meta:ddx

TERM(21): _l_meta:doc

TERM(22): _l_meta:xyz

TERM(23): _l_meta:名

TERM(24): _l_meta:問

TERM(25): _l_meta:有

TERM(26): _l_meta:檔

TERM(27): _l_meta:測

TERM(28): _l_meta:看

TERM(29): _l_meta:試

TERM(30): _l_meta:還

TERM(31): _l_meta:題

TERM(32): _l_parentdocument:196609

TERM(33): _l_parentdocument:327681

TERM(34): _l_parentdocument:557057

TERM(35): _l_ratingavg:0

TERM(36): _l_ratingmedian:0

TERM(37): _l_ratingstdev:0

TERM(38): _l_ratingsum:0

TERM(39): _l_read_permission:admin

TERM(40): _l_rootdocument:196609

TERM(41): _l_rootdocument:327681

TERM(42): _l_rootdocument:557057

TERM(43): _l_state:0

TERM(44): _l_state:2

TERM(45): _l_summary:2123456789

TERM(46): _l_summary:abc

TERM(47): _l_summary:abc.ppt

TERM(48): _l_summary:ddx

TERM(49): _l_summary:doc

TERM(50): _l_summary:xyz

TERM(51): _l_summary:有

TERM(52): _l_summary:還

TERM(53): _l_title:123

TERM(54): _l_title:class

TERM(55): _l_title:default

TERM(56): _l_title:document

TERM(57): _l_title:名

TERM(58): _l_title:問

TERM(59): _l_title:檔

TERM(60): _l_title:測

TERM(61): _l_title:看

TERM(62): _l_title:試

TERM(63): _l_title:題

TERM(64): _l_unique_key:196609

TERM(65): _l_unique_key:327681

TERM(66): _l_unique_key:557057

TERM(67): _l_version:1

TERM(68): 作者:123

TERM(69): 摘要:2123456789

TERM(70): 摘要:abc

TERM(71): 摘要:abc.ppt

TERM(72): 摘要:ddx

TERM(73): 摘要:doc

TERM(74): 摘要:xyz

TERM(75): 摘要:有

TERM(76): 摘要:還

TERM(77): 標題:123

TERM(78): 標題:class

TERM(79): 標題:default

TERM(80): 標題:document

TERM(81): 標題:名

TERM(82): 標題:問

TERM(83): 標題:檔

TERM(84): 標題:測

TERM(85): 標題:看

TERM(86): 標題:試

TERM(87): 標題:題

TERM(88): 關鍵詞:123

 

 

 

And here is a sample code: read docs from original index and then write to an new one.

 

void CreateNewIndex(string OrgIndex)

        {

            IndexReader reader = IndexReader.Open(OrgIndex);

            IndexWriter writer = new IndexWriter("Floyd", new Lucene.Net.Analysis.WhitespaceAnalyzer(),true);

 

            for (int i = 0; i < reader.MaxDoc(); i++)

            {

                if (reader.IsDeleted(i) == true) continue;

                

                Lucene.Net.Documents.Document orgDoc =  reader.Document(i);

                System.Collections.IList fields = orgDoc.GetFields();

 

                Lucene.Net.Documents.Document newDoc = new Document();

                foreach (Lucene.Net.Documents.Field field in fields)

                {

                    Lucene.Net.Documents.Field newField = new Field(

                        System.Convert.ToBase64String( System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç

                        //field.Name(), //ç

                        field.StringValue(),

                        field.IsStored() ? Lucene.Net.Documents.Field.Store.YES : Lucene.Net.Documents.Field.Store.NO,

                        field.IsTokenized() ? Lucene.Net.Documents.Field.Index.TOKENIZED : Lucene.Net.Documents.Field.Index.UN_TOKENIZED);

 

                    newDoc.Add(newField);

                }

                writer.AddDocument(newDoc);

            }

 

            writer.Close();

            reader.Close();

        }

 

 

If some field names are chinese, then Luke returns “read past EOF”. But if those field names are replaced with non-chinese names, then it works.

 

DIGY

 

 

 

 

 

-----Original Message-----
From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com] 
Sent: Friday, April 24, 2009 8:53 PM
To: lucene-net-dev@incubator.apache.org
Subject: Luke-0.9.x cannot open index files

 

 

Digy,

 

Some additional information from the discussion on the lucene-net-user list with Floyd Wu.

 

 

I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.

 

The Java equivalents of the two small test applications I use to inspect an index and compact it, function identically to the .NET versions (that were built with VS2005 and Lucene.NET 2.3.1).

 

That Luke cannot open the index appears to be a problem within Luke.

Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does not flag the index as corrupt; and both the Java and .NET versions report the same index content before and after the optimize operation.

 

 

-- Neal

 

**************************************************************

Neal Granroth

Software Engineer, Molecular Spectroscopy

Thermo Fisher Scientific

5225 Verona Road, Madison, WI 53711

 

neal.granroth@thermofisher.com

Tel: 608-276-5645

Fax: 608-276-6328

 

www.thermofisher.com

 

WORLDWIDE CONFIDENTIALITY NOTE: Dissemination, distribution or copying of this e-mail or the information herein by anyone other than the intended recipient, or an employee or agent of a system responsible for delivering the message to the intended recipient, is prohibited. If you are not the intended recipient, please inform the sender and delete all copies.

 

-----Original Message-----

From: Digy (JIRA) [mailto:jira@apache.org]

Sent: Wednesday, April 08, 2009 6:28 PM

To: lucene-net-dev@incubator.apache.org

Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET compatible with ASP.NET Medium Trust Level, in hosting environments (like GoDaddy...)

 

 

    [ https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335 ]

 

Digy commented on LUCENENET-169:

--------------------------------

 

Although you can overcome all of them somehow;

 

* controlling the the lifetime of IndexWriter/IndexReader in a naturally manner,

* reopening the IndexReader only when needed using (for ex) FileSystemWatcher,

* providing a separation between data & bussiness layer,

* providing other apps an interface that may want to write its own user interface,

* accessing a single search service from different web apps/from load balanced web servers

* controlling the lifetime of searching/indexing code (without being effected by the restart of the IIS processes automatically when some memory limit is exceeded (for ex.) )

* Ability to access some system resources that can be restricted by IIS

etc.

make me think a separete search service is a better idea.But at last, it is a design decision of you.

(Think, A WebApp+Solr in Java world)

 

 

DIGY

 

> Changes to make Lucene.NET compatible with ASP.NET  Medium Trust Level, in hosting environments (like GoDaddy...)

> -----------------------------------------------------------------------------------------------------------------

> 

>                 Key: LUCENENET-169

>                 URL: https://issues.apache.org/jira/browse/LUCENENET-169

>             Project: Lucene.Net

>          Issue Type: Improvement

>         Environment: ASP.NET

>            Reporter: Corey Trager

>         Attachments: FSDirectory.patch

> 

> 

> Microsoft has a configuration file for shared hosting for what they call "Medium Trust".   There are a couple places in FSDirectory.cs  that violate the restrictions of Medium Trust, but I coded workarounds, shown below.

> #1)

> // Corey Trager, Oct 2008: Commented call to GetTempPath to workaround permission restrictions at shared host.

> // LOCK_DIR isn't used anyway.

> public static readonly System.String LOCK_DIR = null; // SupportClass.AppSettings.Get("Lucene.Net.lockDir", System.IO.Path.GetTempPath());

> #2)

>               /// <summary>Returns an array of strings, one for each Lucene index file in the directory. </summary>

>               public override System.String[] List()

>               {

> /* Changes by Corey Trager, Oct 2008, to workaround permission restrictions at shared host */

>                System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(directory.FullName);

>               System.IO.FileInfo[] files = dir.GetFiles();

>                 string[] list = new string[files.Length];

>                 for (int i = 0; i < files.Length; i++)

>                 {

>                     list[i] = files[i].Name;

>                 }

>                 return list;

> /* end of changes */

> //            System.String[] files = SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName, IndexFileNameFilter.GetFilter());

> //            for (int i = 0; i < files.Length; i++)

> //            {

> //                System.IO.FileInfo fi = new System.IO.FileInfo(files[i]);

> //                files[i] = fi.Name;

> //            }

> //                      return files;

>               }

 

--

This message is automatically generated by JIRA.

-

You can reply to this email to add a comment to the issue online.

Re: Luke-0.9.x cannot open index files

Posted by Floyd Wu <fl...@gmail.com>.

Hi Digy,

Will this bug affects the normal use of Lucene.Net?
Or this bug will only existed in Luke? It seems that my system work fine
with index contained chinese as field name.
Are there any potential problem if continue using chinese as field name?
Thanks

Floyd



2009/4/25 Digy <di...@gmail.com>

> I think, I found the bug. Here is the dump of the original index:
>
>
>
> NUMDOCS: 3
>
> MAXDOCS: 7
>
> DELETED(0): True
>
> DELETED(1): True
>
> DELETED(2): False
>
> DELETED(3): True
>
> DELETED(4): True
>
> DELETED(5): False
>
> DELETED(6): False
>
> TERM(0): _l_activationdatetime:552877632000000000
>
> TERM(1): _l_author:admin
>
> TERM(2): _l_bookmarkcount:0
>
> TERM(3): _l_clix:0
>
> TERM(4): _l_clix:1
>
> TERM(5): _l_creationdatetime:633427319866778624
>
> TERM(6): _l_creationdatetime:633427324812559872
>
> TERM(7): _l_creationdatetime:633760609388437504
>
> TERM(8): _l_deactivationdatetime:155377824000000000
>
> TERM(9): _l_deactivationdatetime:155378687999969792
>
> TERM(10): _l_document_class:1
>
> TERM(11): _l_document_class:98305
>
> TERM(12): _l_folder:163841
>
> TERM(13): _l_folder:163843
>
> TERM(14): _l_hidden:aaa
>
> TERM(15): _l_last_modified_datetime:633427319866778624
>
> TERM(16): _l_last_modified_datetime:633427324812559872
>
> TERM(17): _l_last_modified_datetime:633760609388437504
>
> TERM(18): _l_meta:abc
>
> TERM(19): _l_meta:abc.ppt
>
> TERM(20): _l_meta:ddx
>
> TERM(21): _l_meta:doc
>
> TERM(22): _l_meta:xyz
>
> TERM(23): _l_meta:名
>
> TERM(24): _l_meta:問
>
> TERM(25): _l_meta:有
>
> TERM(26): _l_meta:檔
>
> TERM(27): _l_meta:測
>
> TERM(28): _l_meta:看
>
> TERM(29): _l_meta:試
>
> TERM(30): _l_meta:還
>
> TERM(31): _l_meta:題
>
> TERM(32): _l_parentdocument:196609
>
> TERM(33): _l_parentdocument:327681
>
> TERM(34): _l_parentdocument:557057
>
> TERM(35): _l_ratingavg:0
>
> TERM(36): _l_ratingmedian:0
>
> TERM(37): _l_ratingstdev:0
>
> TERM(38): _l_ratingsum:0
>
> TERM(39): _l_read_permission:admin
>
> TERM(40): _l_rootdocument:196609
>
> TERM(41): _l_rootdocument:327681
>
> TERM(42): _l_rootdocument:557057
>
> TERM(43): _l_state:0
>
> TERM(44): _l_state:2
>
> TERM(45): _l_summary:2123456789
>
> TERM(46): _l_summary:abc
>
> TERM(47): _l_summary:abc.ppt
>
> TERM(48): _l_summary:ddx
>
> TERM(49): _l_summary:doc
>
> TERM(50): _l_summary:xyz
>
> TERM(51): _l_summary:有
>
> TERM(52): _l_summary:還
>
> TERM(53): _l_title:123
>
> TERM(54): _l_title:class
>
> TERM(55): _l_title:default
>
> TERM(56): _l_title:document
>
> TERM(57): _l_title:名
>
> TERM(58): _l_title:問
>
> TERM(59): _l_title:檔
>
> TERM(60): _l_title:測
>
> TERM(61): _l_title:看
>
> TERM(62): _l_title:試
>
> TERM(63): _l_title:題
>
> TERM(64): _l_unique_key:196609
>
> TERM(65): _l_unique_key:327681
>
> TERM(66): _l_unique_key:557057
>
> TERM(67): _l_version:1
>
> TERM(68): 作者:123
>
> TERM(69): 摘要:2123456789
>
> TERM(70): 摘要:abc
>
> TERM(71): 摘要:abc.ppt
>
> TERM(72): 摘要:ddx
>
> TERM(73): 摘要:doc
>
> TERM(74): 摘要:xyz
>
> TERM(75): 摘要:有
>
> TERM(76): 摘要:還
>
> TERM(77): 標題:123
>
> TERM(78): 標題:class
>
> TERM(79): 標題:default
>
> TERM(80): 標題:document
>
> TERM(81): 標題:名
>
> TERM(82): 標題:問
>
> TERM(83): 標題:檔
>
> TERM(84): 標題:測
>
> TERM(85): 標題:看
>
> TERM(86): 標題:試
>
> TERM(87): 標題:題
>
> TERM(88): 關鍵詞:123
>
>
>
>
>
>
>
> And here is a sample code: read docs from original index and then write to
> an new one.
>
>
>
> void CreateNewIndex(string OrgIndex)
>
>        {
>
>            IndexReader reader = IndexReader.Open(OrgIndex);
>
>            IndexWriter writer = new IndexWriter("Floyd", new
> Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
>
>
>
>            for (int i = 0; i < reader.MaxDoc(); i++)
>
>            {
>
>                if (reader.IsDeleted(i) == true) continue;
>
>
>
>                Lucene.Net.Documents.Document orgDoc =  reader.Document(i);
>
>                System.Collections.IList fields = orgDoc.GetFields();
>
>
>
>                Lucene.Net.Documents.Document newDoc = new Document();
>
>                foreach (Lucene.Net.Documents.Field field in fields)
>
>                {
>
>                    Lucene.Net.Documents.Field newField = new Field(
>
>                        System.Convert.ToBase64String(
> System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
>
>                        //field.Name(), //ç
>
>                        field.StringValue(),
>
>                        field.IsStored() ?
> Lucene.Net.Documents.Field.Store.YES : Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> ,
>
>                        field.IsTokenized() ?
> Lucene.Net.Documents.Field.Index.TOKENIZED :
> Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
>
>
>
>                    newDoc.Add(newField);
>
>                }
>
>                writer.AddDocument(newDoc);
>
>            }
>
>
>
>            writer.Close();
>
>            reader.Close();
>
>        }
>
>
>
>
>
> If some field names are chinese, then Luke returns “read past EOF”. But if
> those field names are replaced with non-chinese names, then it works.
>
>
>
> DIGY
>
>
>
>
>
>
>
>
>
>
>
> -----Original Message-----
> From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> Sent: Friday, April 24, 2009 8:53 PM
> To: lucene-net-dev@incubator.apache.org
>  Subject: Luke-0.9.x cannot open index files
>
>
>
>
>
> Digy,
>
>
>
> Some additional information from the discussion on the lucene-net-user list
> with Floyd Wu.
>
>
>
>
>
> I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
>
>
>
> The Java equivalents of the two small test applications I use to inspect an
> index and compact it, function identically to the .NET versions (that were
> built with VS2005 and Lucene.NET 2.3.1).
>
>
>
> That Luke cannot open the index appears to be a problem within Luke.
>
> Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does not
> flag the index as corrupt; and both the Java and .NET versions report the
> same index content before and after the optimize operation.
>
>
>
>
>
> -- Neal
>
>
>
> **************************************************************
>
> Neal Granroth
>
> Software Engineer, Molecular Spectroscopy
>
> Thermo Fisher Scientific
>
> 5225 Verona Road, Madison, WI 53711
>
>
>
> neal.granroth@thermofisher.com
>
> Tel: 608-276-5645
>
> Fax: 608-276-6328
>
>
>
> www.thermofisher.com
>
>
>
> WORLDWIDE CONFIDENTIALITY NOTE: Dissemination, distribution or copying of
> this e-mail or the information herein by anyone other than the intended
> recipient, or an employee or agent of a system responsible for delivering
> the message to the intended recipient, is prohibited. If you are not the
> intended recipient, please inform the sender and delete all copies.
>
>
>
> -----Original Message-----
>
> From: Digy (JIRA) [mailto:jira@apache.org]
>
> Sent: Wednesday, April 08, 2009 6:28 PM
>
> To: lucene-net-dev@incubator.apache.org
>
> Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
> compatible with ASP.NET <http://asp.net/> Medium Trust Level, in hosting
> environments (like GoDaddy...)
>
>
>
>
>
>    [
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335]
>
>
>
> Digy commented on LUCENENET-169:
>
> --------------------------------
>
>
>
> Although you can overcome all of them somehow;
>
>
>
> * controlling the the lifetime of IndexWriter/IndexReader in a naturally
> manner,
>
> * reopening the IndexReader only when needed using (for ex)
> FileSystemWatcher,
>
> * providing a separation between data & bussiness layer,
>
> * providing other apps an interface that may want to write its own user
> interface,
>
> * accessing a single search service from different web apps/from load
> balanced web servers
>
> * controlling the lifetime of searching/indexing code (without being
> effected by the restart of the IIS processes automatically when some memory
> limit is exceeded (for ex.) )
>
> * Ability to access some system resources that can be restricted by IIS
>
> etc.
>
> make me think a separete search service is a better idea.But at last, it is
> a design decision of you.
>
> (Think, A WebApp+Solr in Java world)
>
>
>
>
>
> DIGY
>
>
>
> > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/> Medium Trust Level, in hosting environments (like GoDaddy...)
>
> >
> -----------------------------------------------------------------------------------------------------------------
>
> >
>
> >                 Key: LUCENENET-169
>
> >                 URL: https://issues.apache.org/jira/browse/LUCENENET-169
>
> >             Project: Lucene.Net
>
> >          Issue Type: Improvement
>
> >         Environment: ASP.NET <http://asp.net/>
>
> >            Reporter: Corey Trager
>
> >         Attachments: FSDirectory.patch
>
> >
>
> >
>
> > Microsoft has a configuration file for shared hosting for what they call
> "Medium Trust".   There are a couple places in FSDirectory.cs  that violate
> the restrictions of Medium Trust, but I coded workarounds, shown below.
>
> > #1)
>
> > // Corey Trager, Oct 2008: Commented call to GetTempPath to workaround
> permission restrictions at shared host.
>
> > // LOCK_DIR isn't used anyway.
>
> > public static readonly System.String LOCK_DIR = null; //
> SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> System.IO.Path.GetTempPath());
>
> > #2)
>
> >               /// <summary>Returns an array of strings, one for each
> Lucene index file in the directory. </summary>
>
> >               public override System.String[] List()
>
> >               {
>
> > /* Changes by Corey Trager, Oct 2008, to workaround permission
> restrictions at shared host */
>
> >                System.IO.DirectoryInfo dir = new
> System.IO.DirectoryInfo(directory.FullName);
>
> >               System.IO.FileInfo[] files = dir.GetFiles();
>
> >                 string[] list = new string[files.Length];
>
> >                 for (int i = 0; i < files.Length; i++)
>
> >                 {
>
> >                     list[i] = files[i].Name;
>
> >                 }
>
> >                 return list;
>
> > /* end of changes */
>
> > //            System.String[] files =
> SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> IndexFileNameFilter.GetFilter());
>
> > //            for (int i = 0; i < files.Length; i++)
>
> > //            {
>
> > //                System.IO.FileInfo fi = new
> System.IO.FileInfo(files[i]);
>
> > //                files[i] = fi.Name;
>
> > //            }
>
> > //                      return files;
>
> >               }
>
>
>
> --
>
> This message is automatically generated by JIRA.
>
> -
>
> You can reply to this email to add a comment to the issue online.
>
>

Re: Luke-0.9.x cannot open index files

Posted by Floyd Wu <fl...@gmail.com>.

Hi Neal

Thanks for you help.

2009/4/27 Granroth, Neal V. <ne...@thermofisher.com>

> Floyd,
>
> Here is a simple Java command-line application that displays the content of
> an index.  It successfully opens your "documents" index using Java Lucene
> version 2.3.2 (the closest version to current Lucene.NET).
>
> That this program works properly with your index, but Luke does not, shows
> that the problem is within Luke.  You can provide this program with your
> index and bug report to the people supporting Luke.
>
> -----------------------------------------------------------------
> package app1;
> import org.apache.lucene.index.*;
> import org.apache.lucene.document.*;
>
> public class Main
> {
>    public Main() { }
>
>    public static void main(String[] args)
>    {
>        if (args.length > 0)
>        {
>            System.out.println("Index Folder: " + args[0]);
>
>            IndexReader ird = null;
>            try
>            {
>                ird = IndexReader.open( args[0] );
>                System.out.println("Index Version: " + ird.getVersion());
>                System.out.println("    isCurrent: " + ird.isCurrent() );
>                System.out.println("  isOptimized: " + ird.isOptimized());
>                System.out.println("     num Docs: " + ird.numDocs());
>                System.out.println("     max Docs: " + ird.maxDoc());
>
>                int numDOCs = ird.maxDoc();
>                if (numDOCs > 0)
>                {
>                    for(int ii=0; ii<numDOCs; ++ii)
>                    {
>
>  System.out.println("------------------------------------------");
>                        System.out.println("Doc: " + ii );
>                        System.out.println("  Deleted: " +
> ird.isDeleted(ii));
>                        if ( !ird.isDeleted(ii) )
>                        {
>                            Document dx = ird.document(ii);
>                            java.util.List fldList = dx.getFields();
>                            int numFields = fldList.size();
>                            for (int nn=0; nn<numFields; ++nn)
>                            {
>                                Field xf = (Field) fldList.get(nn);
>                                String zname = xf.name();
>                                boolean isStored = xf.isStored();
>                                boolean isIndexed = xf.isIndexed();
>                                String zvalue = xf.stringValue();
>
>                                String scode = (isIndexed ? "I":"N") + "," +
> ( isStored ? "S":"U" );
>
>                                if (zvalue != null)
>                                     System.out.println("  Field: " + zname
> + ", " + scode + ", Value: " + zvalue);
>                                else
>                                     System.out.println("  Field: " + zname
> + ", " + scode + ", Value: (none)");
>                            }
>                        }
>                    }
>                }
>
>
>  System.out.println("------------------------------------------");
>                System.out.println("Terms");
>
>  System.out.println("------------------------------------------");
>
>                TermEnum enuTRM = ird.terms();
>                while( enuTRM.next() )
>                {
>                    int freq = enuTRM.docFreq();
>                    Term oT = enuTRM.term();
>                    if (oT != null)
>                    {
>                        System.out.println("  " + freq + ", " + oT.field() +
> ", \"" + oT.text() + "\"");
>                    }
>                }
>
>                ird.close();
>            }
>            catch(Exception ex)
>            {
>                System.out.println("Open failed, exception: " +
> ex.toString() );
>            }
>        }
>    }
> }
>
> -----------------------------------------------------------------
>
>
> -- Neal
>
>
> -----Original Message-----
> From: Floyd Wu [mailto:floyd.wu@gmail.com]
> Sent: Monday, April 27, 2009 3:14 AM
> To: lucene-net-dev@incubator.apache.org
>  Subject: Re: Luke-0.9.x cannot open index files
>
> Thanks DIGY
> So will you report this to Luke author or Java Lucene group?
> I've been reported this situation to Luke author and he( or she) doesn't
> think this is a bug of Luke.
>
> floyd
>
> 2009/4/27 digy digy <di...@gmail.com>
>
> > It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net
> works
> > well with chinese field names.
> > I think, it is a bug in Luke.
> >
> > DIGY
> >
> >
> >
> > On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:
> >
> > > Hi Digy,
> > > Thanks for your help.
> > > But if chinese field name is the problem, will it be "fix" in
> Lucene.Net
> > or
> > > how can I avoid this problem.
> > >
> > > Chinese field name is by design and probably not avoidable.
> > >
> > > Floyd
> > >
> > > 2009/4/25 Digy <di...@gmail.com>
> > >
> > > > I think, I found the bug. Here is the dump of the original index:
> > > >
> > > >
> > > >
> > > > NUMDOCS: 3
> > > >
> > > > MAXDOCS: 7
> > > >
> > > > DELETED(0): True
> > > >
> > > > DELETED(1): True
> > > >
> > > > DELETED(2): False
> > > >
> > > > DELETED(3): True
> > > >
> > > > DELETED(4): True
> > > >
> > > > DELETED(5): False
> > > >
> > > > DELETED(6): False
> > > >
> > > > TERM(0): _l_activationdatetime:552877632000000000
> > > >
> > > > TERM(1): _l_author:admin
> > > >
> > > > TERM(2): _l_bookmarkcount:0
> > > >
> > > > TERM(3): _l_clix:0
> > > >
> > > > TERM(4): _l_clix:1
> > > >
> > > > TERM(5): _l_creationdatetime:633427319866778624
> > > >
> > > > TERM(6): _l_creationdatetime:633427324812559872
> > > >
> > > > TERM(7): _l_creationdatetime:633760609388437504
> > > >
> > > > TERM(8): _l_deactivationdatetime:155377824000000000
> > > >
> > > > TERM(9): _l_deactivationdatetime:155378687999969792
> > > >
> > > > TERM(10): _l_document_class:1
> > > >
> > > > TERM(11): _l_document_class:98305
> > > >
> > > > TERM(12): _l_folder:163841
> > > >
> > > > TERM(13): _l_folder:163843
> > > >
> > > > TERM(14): _l_hidden:aaa
> > > >
> > > > TERM(15): _l_last_modified_datetime:633427319866778624
> > > >
> > > > TERM(16): _l_last_modified_datetime:633427324812559872
> > > >
> > > > TERM(17): _l_last_modified_datetime:633760609388437504
> > > >
> > > > TERM(18): _l_meta:abc
> > > >
> > > > TERM(19): _l_meta:abc.ppt
> > > >
> > > > TERM(20): _l_meta:ddx
> > > >
> > > > TERM(21): _l_meta:doc
> > > >
> > > > TERM(22): _l_meta:xyz
> > > >
> > > > TERM(23): _l_meta:名
> > > >
> > > > TERM(24): _l_meta:問
> > > >
> > > > TERM(25): _l_meta:有
> > > >
> > > > TERM(26): _l_meta:檔
> > > >
> > > > TERM(27): _l_meta:測
> > > >
> > > > TERM(28): _l_meta:看
> > > >
> > > > TERM(29): _l_meta:試
> > > >
> > > > TERM(30): _l_meta:還
> > > >
> > > > TERM(31): _l_meta:題
> > > >
> > > > TERM(32): _l_parentdocument:196609
> > > >
> > > > TERM(33): _l_parentdocument:327681
> > > >
> > > > TERM(34): _l_parentdocument:557057
> > > >
> > > > TERM(35): _l_ratingavg:0
> > > >
> > > > TERM(36): _l_ratingmedian:0
> > > >
> > > > TERM(37): _l_ratingstdev:0
> > > >
> > > > TERM(38): _l_ratingsum:0
> > > >
> > > > TERM(39): _l_read_permission:admin
> > > >
> > > > TERM(40): _l_rootdocument:196609
> > > >
> > > > TERM(41): _l_rootdocument:327681
> > > >
> > > > TERM(42): _l_rootdocument:557057
> > > >
> > > > TERM(43): _l_state:0
> > > >
> > > > TERM(44): _l_state:2
> > > >
> > > > TERM(45): _l_summary:2123456789
> > > >
> > > > TERM(46): _l_summary:abc
> > > >
> > > > TERM(47): _l_summary:abc.ppt
> > > >
> > > > TERM(48): _l_summary:ddx
> > > >
> > > > TERM(49): _l_summary:doc
> > > >
> > > > TERM(50): _l_summary:xyz
> > > >
> > > > TERM(51): _l_summary:有
> > > >
> > > > TERM(52): _l_summary:還
> > > >
> > > > TERM(53): _l_title:123
> > > >
> > > > TERM(54): _l_title:class
> > > >
> > > > TERM(55): _l_title:default
> > > >
> > > > TERM(56): _l_title:document
> > > >
> > > > TERM(57): _l_title:名
> > > >
> > > > TERM(58): _l_title:問
> > > >
> > > > TERM(59): _l_title:檔
> > > >
> > > > TERM(60): _l_title:測
> > > >
> > > > TERM(61): _l_title:看
> > > >
> > > > TERM(62): _l_title:試
> > > >
> > > > TERM(63): _l_title:題
> > > >
> > > > TERM(64): _l_unique_key:196609
> > > >
> > > > TERM(65): _l_unique_key:327681
> > > >
> > > > TERM(66): _l_unique_key:557057
> > > >
> > > > TERM(67): _l_version:1
> > > >
> > > > TERM(68): 作者:123
> > > >
> > > > TERM(69): 摘要:2123456789
> > > >
> > > > TERM(70): 摘要:abc
> > > >
> > > > TERM(71): 摘要:abc.ppt
> > > >
> > > > TERM(72): 摘要:ddx
> > > >
> > > > TERM(73): 摘要:doc
> > > >
> > > > TERM(74): 摘要:xyz
> > > >
> > > > TERM(75): 摘要:有
> > > >
> > > > TERM(76): 摘要:還
> > > >
> > > > TERM(77): 標題:123
> > > >
> > > > TERM(78): 標題:class
> > > >
> > > > TERM(79): 標題:default
> > > >
> > > > TERM(80): 標題:document
> > > >
> > > > TERM(81): 標題:名
> > > >
> > > > TERM(82): 標題:問
> > > >
> > > > TERM(83): 標題:檔
> > > >
> > > > TERM(84): 標題:測
> > > >
> > > > TERM(85): 標題:看
> > > >
> > > > TERM(86): 標題:試
> > > >
> > > > TERM(87): 標題:題
> > > >
> > > > TERM(88): 關鍵詞:123
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > And here is a sample code: read docs from original index and then
> write
> > > to
> > > > an new one.
> > > >
> > > >
> > > >
> > > > void CreateNewIndex(string OrgIndex)
> > > >
> > > >        {
> > > >
> > > >            IndexReader reader = IndexReader.Open(OrgIndex);
> > > >
> > > >            IndexWriter writer = new IndexWriter("Floyd", new
> > > > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> > > >
> > > >
> > > >
> > > >            for (int i = 0; i < reader.MaxDoc(); i++)
> > > >
> > > >            {
> > > >
> > > >                if (reader.IsDeleted(i) == true) continue;
> > > >
> > > >
> > > >
> > > >                Lucene.Net.Documents.Document orgDoc =
> > >  reader.Document(i);
> > > >
> > > >                System.Collections.IList fields = orgDoc.GetFields();
> > > >
> > > >
> > > >
> > > >                Lucene.Net.Documents.Document newDoc = new Document();
> > > >
> > > >                foreach (Lucene.Net.Documents.Field field in fields)
> > > >
> > > >                {
> > > >
> > > >                    Lucene.Net.Documents.Field newField = new Field(
> > > >
> > > >                        System.Convert.ToBase64String(
> > > > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> > > >
> > > >                        //field.Name(), //ç
> > > >
> > > >                        field.StringValue(),
> > > >
> > > >                        field.IsStored() ?
> > > > Lucene.Net.Documents.Field.Store.YES :
> > > Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> <http://lucene.net.documents.field.store.no/>
> > <
> > > http://lucene.net.documents.field.store.no/>
> >  > > ,
> > > >
> > > >                        field.IsTokenized() ?
> > > > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > > > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> > > >
> > > >
> > > >
> > > >                    newDoc.Add(newField);
> > > >
> > > >                }
> > > >
> > > >                writer.AddDocument(newDoc);
> > > >
> > > >            }
> > > >
> > > >
> > > >
> > > >            writer.Close();
> > > >
> > > >            reader.Close();
> > > >
> > > >        }
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > If some field names are chinese, then Luke returns “read past EOF”.
> But
> > > if
> > > > those field names are replaced with non-chinese names, then it works.
> > > >
> > > >
> > > >
> > > > DIGY
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > -----Original Message-----
> > > > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > > > Sent: Friday, April 24, 2009 8:53 PM
> > > > To: lucene-net-dev@incubator.apache.org
> > > >  Subject: Luke-0.9.x cannot open index files
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > Digy,
> > > >
> > > >
> > > >
> > > > Some additional information from the discussion on the
> lucene-net-user
> > > list
> > > > with Floyd Wu.
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> > > >
> > > >
> > > >
> > > > The Java equivalents of the two small test applications I use to
> > inspect
> > > an
> > > > index and compact it, function identically to the .NET versions (that
> > > were
> > > > built with VS2005 and Lucene.NET 2.3.1).
> > > >
> > > >
> > > >
> > > > That Luke cannot open the index appears to be a problem within Luke.
> > > >
> > > > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2
> does
> > > not
> > > > flag the index as corrupt; and both the Java and .NET versions report
> > the
> > > > same index content before and after the optimize operation.
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > -- Neal
> > > >
> > > >
> > > >
> > > > -----Original Message-----
> > > >
> > > > From: Digy (JIRA) [mailto:jira@apache.org]
> > > >
> > > > Sent: Wednesday, April 08, 2009 6:28 PM
> > > >
> > > > To: lucene-net-dev@incubator.apache.org
> > > >
> > > > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
>  >  > > compatible with ASP.NET <http://asp.net/> <http://asp.net/> <
> http://asp.net/> Medium
> > Trust Level, in hosting
> > > > environments (like GoDaddy...)
> > > >
> > > >
> > > >
> > > >
> > > >
> > > >    [
> > > >
> > >
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> > > ]
> > > >
> > > >
> > > >
> > > > Digy commented on LUCENENET-169:
> > > >
> > > > --------------------------------
> > > >
> > > >
> > > >
> > > > Although you can overcome all of them somehow;
> > > >
> > > >
> > > >
> > > > * controlling the the lifetime of IndexWriter/IndexReader in a
> > naturally
> > > > manner,
> > > >
> > > > * reopening the IndexReader only when needed using (for ex)
> > > > FileSystemWatcher,
> > > >
> > > > * providing a separation between data & bussiness layer,
> > > >
> > > > * providing other apps an interface that may want to write its own
> user
> > > > interface,
> > > >
> > > > * accessing a single search service from different web apps/from load
> > > > balanced web servers
> > > >
> > > > * controlling the lifetime of searching/indexing code (without being
> > > > effected by the restart of the IIS processes automatically when some
> > > memory
> > > > limit is exceeded (for ex.) )
> > > >
> > > > * Ability to access some system resources that can be restricted by
> IIS
> > > >
> > > > etc.
> > > >
> > > > make me think a separete search service is a better idea.But at last,
> > it
> > > is
> > > > a design decision of you.
> > > >
> > > > (Think, A WebApp+Solr in Java world)
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > DIGY
> > > >
> > > >
> > > >
> > > > > Changes to make Lucene.NET compatible with ASP.NET<http://asp.net/><
> http://asp.net/><
> > http://asp.net/>
> > > Medium Trust Level, in hosting environments (like GoDaddy...)
> > > >
> > > > >
> > > >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> > > >
> > > > >
> > > >
> > > > >                 Key: LUCENENET-169
> > > >
> > > > >                 URL:
> > > https://issues.apache.org/jira/browse/LUCENENET-169
> > > >
> > > > >             Project: Lucene.Net
> > > >
> > > > >          Issue Type: Improvement
> > > >
> > > > >         Environment: ASP.NET <http://asp.net/> <http://asp.net/> <
> http://asp.net/>
>  >  > >
> > > > >            Reporter: Corey Trager
> > > >
> > > > >         Attachments: FSDirectory.patch
> > > >
> > > > >
> > > >
> > > > >
> > > >
> > > > > Microsoft has a configuration file for shared hosting for what they
> > > call
> > > > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> > > violate
> > > > the restrictions of Medium Trust, but I coded workarounds, shown
> below.
> > > >
> > > > > #1)
> > > >
> > > > > // Corey Trager, Oct 2008: Commented call to GetTempPath to
> > workaround
> > > > permission restrictions at shared host.
> > > >
> > > > > // LOCK_DIR isn't used anyway.
> > > >
> > > > > public static readonly System.String LOCK_DIR = null; //
> > > > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > > > System.IO.Path.GetTempPath());
> > > >
> > > > > #2)
> > > >
> > > > >               /// <summary>Returns an array of strings, one for
> each
> > > > Lucene index file in the directory. </summary>
> > > >
> > > > >               public override System.String[] List()
> > > >
> > > > >               {
> > > >
> > > > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > > > restrictions at shared host */
> > > >
> > > > >                System.IO.DirectoryInfo dir = new
> > > > System.IO.DirectoryInfo(directory.FullName);
> > > >
> > > > >               System.IO.FileInfo[] files = dir.GetFiles();
> > > >
> > > > >                 string[] list = new string[files.Length];
> > > >
> > > > >                 for (int i = 0; i < files.Length; i++)
> > > >
> > > > >                 {
> > > >
> > > > >                     list[i] = files[i].Name;
> > > >
> > > > >                 }
> > > >
> > > > >                 return list;
> > > >
> > > > > /* end of changes */
> > > >
> > > > > //            System.String[] files =
> > > > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > > > IndexFileNameFilter.GetFilter());
> > > >
> > > > > //            for (int i = 0; i < files.Length; i++)
> > > >
> > > > > //            {
> > > >
> > > > > //                System.IO.FileInfo fi = new
> > > > System.IO.FileInfo(files[i]);
> > > >
> > > > > //                files[i] = fi.Name;
> > > >
> > > > > //            }
> > > >
> > > > > //                      return files;
> > > >
> > > > >               }
> > > >
> > > >
> > > >
> > > > --
> > > >
> > > > This message is automatically generated by JIRA.
> > > >
> > > > -
> > > >
> > > > You can reply to this email to add a comment to the issue online.
> > > >
> > > >
> > >
> >
>

RE: Luke-0.9.x cannot open index files

Posted by Digy <di...@gmail.com>.

https://issues.apache.org/jira/browse/LUCENE-1623

DIGY

-----Original Message-----
From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com] 
Sent: Monday, April 27, 2009 5:58 PM
To: lucene-net-dev@incubator.apache.org
Subject: RE: Luke-0.9.x cannot open index files

Floyd,

Here is a simple Java command-line application that displays the content of an index.  It successfully opens your "documents" index using Java Lucene version 2.3.2 (the closest version to current Lucene.NET).

That this program works properly with your index, but Luke does not, shows that the problem is within Luke.  You can provide this program with your index and bug report to the people supporting Luke.

-----------------------------------------------------------------
package app1;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;

public class Main
{
    public Main() { }

    public static void main(String[] args)
    {
        if (args.length > 0)
        {
            System.out.println("Index Folder: " + args[0]);

            IndexReader ird = null;
            try
            {
                ird = IndexReader.open( args[0] );
                System.out.println("Index Version: " + ird.getVersion());
                System.out.println("    isCurrent: " + ird.isCurrent() );
                System.out.println("  isOptimized: " + ird.isOptimized());
                System.out.println("     num Docs: " + ird.numDocs());
                System.out.println("     max Docs: " + ird.maxDoc());

                int numDOCs = ird.maxDoc();
                if (numDOCs > 0)
                {
                    for(int ii=0; ii<numDOCs; ++ii)
                    {
                        System.out.println("------------------------------------------");
                        System.out.println("Doc: " + ii );
                        System.out.println("  Deleted: " + ird.isDeleted(ii));
                        if ( !ird.isDeleted(ii) )
                        {
                            Document dx = ird.document(ii);
                            java.util.List fldList = dx.getFields();
                            int numFields = fldList.size();
                            for (int nn=0; nn<numFields; ++nn)
                            {
                                Field xf = (Field) fldList.get(nn);
                                String zname = xf.name();
                                boolean isStored = xf.isStored();
                                boolean isIndexed = xf.isIndexed();
                                String zvalue = xf.stringValue();

                                String scode = (isIndexed ? "I":"N") + "," + ( isStored ? "S":"U" );

                                if (zvalue != null)
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: " + zvalue);
                                else
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: (none)");
                            }
                        }
                    }
                }

                System.out.println("------------------------------------------");
                System.out.println("Terms");
                System.out.println("------------------------------------------");

                TermEnum enuTRM = ird.terms();
                while( enuTRM.next() )
                {
                    int freq = enuTRM.docFreq();
                    Term oT = enuTRM.term();
                    if (oT != null)
                    {
                        System.out.println("  " + freq + ", " + oT.field() + ", \"" + oT.text() + "\"");
                    }
                }

                ird.close();
            }
            catch(Exception ex)
            {
                System.out.println("Open failed, exception: " + ex.toString() );
            }
        }
    }
}

-----------------------------------------------------------------


-- Neal


-----Original Message-----
From: Floyd Wu [mailto:floyd.wu@gmail.com]
Sent: Monday, April 27, 2009 3:14 AM
To: lucene-net-dev@incubator.apache.org
Subject: Re: Luke-0.9.x cannot open index files

Thanks DIGY
So will you report this to Luke author or Java Lucene group?
I've been reported this situation to Luke author and he( or she) doesn't
think this is a bug of Luke.

floyd

2009/4/27 digy digy <di...@gmail.com>

> It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net works
> well with chinese field names.
> I think, it is a bug in Luke.
>
> DIGY
>
>
>
> On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:
>
> > Hi Digy,
> > Thanks for your help.
> > But if chinese field name is the problem, will it be "fix" in Lucene.Net
> or
> > how can I avoid this problem.
> >
> > Chinese field name is by design and probably not avoidable.
> >
> > Floyd
> >
> > 2009/4/25 Digy <di...@gmail.com>
> >
> > > I think, I found the bug. Here is the dump of the original index:
> > >
> > >
> > >
> > > NUMDOCS: 3
> > >
> > > MAXDOCS: 7
> > >
> > > DELETED(0): True
> > >
> > > DELETED(1): True
> > >
> > > DELETED(2): False
> > >
> > > DELETED(3): True
> > >
> > > DELETED(4): True
> > >
> > > DELETED(5): False
> > >
> > > DELETED(6): False
> > >
> > > TERM(0): _l_activationdatetime:552877632000000000
> > >
> > > TERM(1): _l_author:admin
> > >
> > > TERM(2): _l_bookmarkcount:0
> > >
> > > TERM(3): _l_clix:0
> > >
> > > TERM(4): _l_clix:1
> > >
> > > TERM(5): _l_creationdatetime:633427319866778624
> > >
> > > TERM(6): _l_creationdatetime:633427324812559872
> > >
> > > TERM(7): _l_creationdatetime:633760609388437504
> > >
> > > TERM(8): _l_deactivationdatetime:155377824000000000
> > >
> > > TERM(9): _l_deactivationdatetime:155378687999969792
> > >
> > > TERM(10): _l_document_class:1
> > >
> > > TERM(11): _l_document_class:98305
> > >
> > > TERM(12): _l_folder:163841
> > >
> > > TERM(13): _l_folder:163843
> > >
> > > TERM(14): _l_hidden:aaa
> > >
> > > TERM(15): _l_last_modified_datetime:633427319866778624
> > >
> > > TERM(16): _l_last_modified_datetime:633427324812559872
> > >
> > > TERM(17): _l_last_modified_datetime:633760609388437504
> > >
> > > TERM(18): _l_meta:abc
> > >
> > > TERM(19): _l_meta:abc.ppt
> > >
> > > TERM(20): _l_meta:ddx
> > >
> > > TERM(21): _l_meta:doc
> > >
> > > TERM(22): _l_meta:xyz
> > >
> > > TERM(23): _l_meta:名
> > >
> > > TERM(24): _l_meta:問
> > >
> > > TERM(25): _l_meta:有
> > >
> > > TERM(26): _l_meta:檔
> > >
> > > TERM(27): _l_meta:測
> > >
> > > TERM(28): _l_meta:看
> > >
> > > TERM(29): _l_meta:試
> > >
> > > TERM(30): _l_meta:還
> > >
> > > TERM(31): _l_meta:題
> > >
> > > TERM(32): _l_parentdocument:196609
> > >
> > > TERM(33): _l_parentdocument:327681
> > >
> > > TERM(34): _l_parentdocument:557057
> > >
> > > TERM(35): _l_ratingavg:0
> > >
> > > TERM(36): _l_ratingmedian:0
> > >
> > > TERM(37): _l_ratingstdev:0
> > >
> > > TERM(38): _l_ratingsum:0
> > >
> > > TERM(39): _l_read_permission:admin
> > >
> > > TERM(40): _l_rootdocument:196609
> > >
> > > TERM(41): _l_rootdocument:327681
> > >
> > > TERM(42): _l_rootdocument:557057
> > >
> > > TERM(43): _l_state:0
> > >
> > > TERM(44): _l_state:2
> > >
> > > TERM(45): _l_summary:2123456789
> > >
> > > TERM(46): _l_summary:abc
> > >
> > > TERM(47): _l_summary:abc.ppt
> > >
> > > TERM(48): _l_summary:ddx
> > >
> > > TERM(49): _l_summary:doc
> > >
> > > TERM(50): _l_summary:xyz
> > >
> > > TERM(51): _l_summary:有
> > >
> > > TERM(52): _l_summary:還
> > >
> > > TERM(53): _l_title:123
> > >
> > > TERM(54): _l_title:class
> > >
> > > TERM(55): _l_title:default
> > >
> > > TERM(56): _l_title:document
> > >
> > > TERM(57): _l_title:名
> > >
> > > TERM(58): _l_title:問
> > >
> > > TERM(59): _l_title:檔
> > >
> > > TERM(60): _l_title:測
> > >
> > > TERM(61): _l_title:看
> > >
> > > TERM(62): _l_title:試
> > >
> > > TERM(63): _l_title:題
> > >
> > > TERM(64): _l_unique_key:196609
> > >
> > > TERM(65): _l_unique_key:327681
> > >
> > > TERM(66): _l_unique_key:557057
> > >
> > > TERM(67): _l_version:1
> > >
> > > TERM(68): 作者:123
> > >
> > > TERM(69): 摘要:2123456789
> > >
> > > TERM(70): 摘要:abc
> > >
> > > TERM(71): 摘要:abc.ppt
> > >
> > > TERM(72): 摘要:ddx
> > >
> > > TERM(73): 摘要:doc
> > >
> > > TERM(74): 摘要:xyz
> > >
> > > TERM(75): 摘要:有
> > >
> > > TERM(76): 摘要:還
> > >
> > > TERM(77): 標題:123
> > >
> > > TERM(78): 標題:class
> > >
> > > TERM(79): 標題:default
> > >
> > > TERM(80): 標題:document
> > >
> > > TERM(81): 標題:名
> > >
> > > TERM(82): 標題:問
> > >
> > > TERM(83): 標題:檔
> > >
> > > TERM(84): 標題:測
> > >
> > > TERM(85): 標題:看
> > >
> > > TERM(86): 標題:試
> > >
> > > TERM(87): 標題:題
> > >
> > > TERM(88): 關鍵詞:123
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > And here is a sample code: read docs from original index and then write
> > to
> > > an new one.
> > >
> > >
> > >
> > > void CreateNewIndex(string OrgIndex)
> > >
> > >        {
> > >
> > >            IndexReader reader = IndexReader.Open(OrgIndex);
> > >
> > >            IndexWriter writer = new IndexWriter("Floyd", new
> > > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> > >
> > >
> > >
> > >            for (int i = 0; i < reader.MaxDoc(); i++)
> > >
> > >            {
> > >
> > >                if (reader.IsDeleted(i) == true) continue;
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document orgDoc =
> >  reader.Document(i);
> > >
> > >                System.Collections.IList fields = orgDoc.GetFields();
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document newDoc = new Document();
> > >
> > >                foreach (Lucene.Net.Documents.Field field in fields)
> > >
> > >                {
> > >
> > >                    Lucene.Net.Documents.Field newField = new Field(
> > >
> > >                        System.Convert.ToBase64String(
> > > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> > >
> > >                        //field.Name(), //ç
> > >
> > >                        field.StringValue(),
> > >
> > >                        field.IsStored() ?
> > > Lucene.Net.Documents.Field.Store.YES :
> > Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> <
> > http://lucene.net.documents.field.store.no/>
>  > > ,
> > >
> > >                        field.IsTokenized() ?
> > > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> > >
> > >
> > >
> > >                    newDoc.Add(newField);
> > >
> > >                }
> > >
> > >                writer.AddDocument(newDoc);
> > >
> > >            }
> > >
> > >
> > >
> > >            writer.Close();
> > >
> > >            reader.Close();
> > >
> > >        }
> > >
> > >
> > >
> > >
> > >
> > > If some field names are chinese, then Luke returns “read past EOF”. But
> > if
> > > those field names are replaced with non-chinese names, then it works.
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > -----Original Message-----
> > > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > > Sent: Friday, April 24, 2009 8:53 PM
> > > To: lucene-net-dev@incubator.apache.org
> > >  Subject: Luke-0.9.x cannot open index files
> > >
> > >
> > >
> > >
> > >
> > > Digy,
> > >
> > >
> > >
> > > Some additional information from the discussion on the lucene-net-user
> > list
> > > with Floyd Wu.
> > >
> > >
> > >
> > >
> > >
> > > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> > >
> > >
> > >
> > > The Java equivalents of the two small test applications I use to
> inspect
> > an
> > > index and compact it, function identically to the .NET versions (that
> > were
> > > built with VS2005 and Lucene.NET 2.3.1).
> > >
> > >
> > >
> > > That Luke cannot open the index appears to be a problem within Luke.
> > >
> > > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does
> > not
> > > flag the index as corrupt; and both the Java and .NET versions report
> the
> > > same index content before and after the optimize operation.
> > >
> > >
> > >
> > >
> > >
> > > -- Neal
> > >
> > >
> > >
> > > -----Original Message-----
> > >
> > > From: Digy (JIRA) [mailto:jira@apache.org]
> > >
> > > Sent: Wednesday, April 08, 2009 6:28 PM
> > >
> > > To: lucene-net-dev@incubator.apache.org
> > >
> > > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
>  > > compatible with ASP.NET <http://asp.net/> <http://asp.net/> Medium
> Trust Level, in hosting
> > > environments (like GoDaddy...)
> > >
> > >
> > >
> > >
> > >
> > >    [
> > >
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> > ]
> > >
> > >
> > >
> > > Digy commented on LUCENENET-169:
> > >
> > > --------------------------------
> > >
> > >
> > >
> > > Although you can overcome all of them somehow;
> > >
> > >
> > >
> > > * controlling the the lifetime of IndexWriter/IndexReader in a
> naturally
> > > manner,
> > >
> > > * reopening the IndexReader only when needed using (for ex)
> > > FileSystemWatcher,
> > >
> > > * providing a separation between data & bussiness layer,
> > >
> > > * providing other apps an interface that may want to write its own user
> > > interface,
> > >
> > > * accessing a single search service from different web apps/from load
> > > balanced web servers
> > >
> > > * controlling the lifetime of searching/indexing code (without being
> > > effected by the restart of the IIS processes automatically when some
> > memory
> > > limit is exceeded (for ex.) )
> > >
> > > * Ability to access some system resources that can be restricted by IIS
> > >
> > > etc.
> > >
> > > make me think a separete search service is a better idea.But at last,
> it
> > is
> > > a design decision of you.
> > >
> > > (Think, A WebApp+Solr in Java world)
> > >
> > >
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > > > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/><
> http://asp.net/>
> > Medium Trust Level, in hosting environments (like GoDaddy...)
> > >
> > > >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> > >
> > > >
> > >
> > > >                 Key: LUCENENET-169
> > >
> > > >                 URL:
> > https://issues.apache.org/jira/browse/LUCENENET-169
> > >
> > > >             Project: Lucene.Net
> > >
> > > >          Issue Type: Improvement
> > >
> > > >         Environment: ASP.NET <http://asp.net/> <http://asp.net/>
>  > >
> > > >            Reporter: Corey Trager
> > >
> > > >         Attachments: FSDirectory.patch
> > >
> > > >
> > >
> > > >
> > >
> > > > Microsoft has a configuration file for shared hosting for what they
> > call
> > > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> > violate
> > > the restrictions of Medium Trust, but I coded workarounds, shown below.
> > >
> > > > #1)
> > >
> > > > // Corey Trager, Oct 2008: Commented call to GetTempPath to
> workaround
> > > permission restrictions at shared host.
> > >
> > > > // LOCK_DIR isn't used anyway.
> > >
> > > > public static readonly System.String LOCK_DIR = null; //
> > > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > > System.IO.Path.GetTempPath());
> > >
> > > > #2)
> > >
> > > >               /// <summary>Returns an array of strings, one for each
> > > Lucene index file in the directory. </summary>
> > >
> > > >               public override System.String[] List()
> > >
> > > >               {
> > >
> > > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > > restrictions at shared host */
> > >
> > > >                System.IO.DirectoryInfo dir = new
> > > System.IO.DirectoryInfo(directory.FullName);
> > >
> > > >               System.IO.FileInfo[] files = dir.GetFiles();
> > >
> > > >                 string[] list = new string[files.Length];
> > >
> > > >                 for (int i = 0; i < files.Length; i++)
> > >
> > > >                 {
> > >
> > > >                     list[i] = files[i].Name;
> > >
> > > >                 }
> > >
> > > >                 return list;
> > >
> > > > /* end of changes */
> > >
> > > > //            System.String[] files =
> > > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > > IndexFileNameFilter.GetFilter());
> > >
> > > > //            for (int i = 0; i < files.Length; i++)
> > >
> > > > //            {
> > >
> > > > //                System.IO.FileInfo fi = new
> > > System.IO.FileInfo(files[i]);
> > >
> > > > //                files[i] = fi.Name;
> > >
> > > > //            }
> > >
> > > > //                      return files;
> > >
> > > >               }
> > >
> > >
> > >
> > > --
> > >
> > > This message is automatically generated by JIRA.
> > >
> > > -
> > >
> > > You can reply to this email to add a comment to the issue online.
> > >
> > >
> >
>

RE: Luke-0.9.x cannot open index files

Posted by Digy <di...@gmail.com>.

Please see the thread "Read past EOF"

http://mail-archives.apache.org/mod_mbox/lucene-java-user/200904.mbox/%3C9ac0c6aa0904280715m6f762016id7c2de7acb627d8@mail.gmail.com%3E

DIGY

-----Original Message-----
From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com] 
Sent: Monday, April 27, 2009 5:58 PM
To: lucene-net-dev@incubator.apache.org
Subject: RE: Luke-0.9.x cannot open index files

Floyd,

Here is a simple Java command-line application that displays the content of an index.  It successfully opens your "documents" index using Java Lucene version 2.3.2 (the closest version to current Lucene.NET).

That this program works properly with your index, but Luke does not, shows that the problem is within Luke.  You can provide this program with your index and bug report to the people supporting Luke.

-----------------------------------------------------------------
package app1;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;

public class Main
{
    public Main() { }

    public static void main(String[] args)
    {
        if (args.length > 0)
        {
            System.out.println("Index Folder: " + args[0]);

            IndexReader ird = null;
            try
            {
                ird = IndexReader.open( args[0] );
                System.out.println("Index Version: " + ird.getVersion());
                System.out.println("    isCurrent: " + ird.isCurrent() );
                System.out.println("  isOptimized: " + ird.isOptimized());
                System.out.println("     num Docs: " + ird.numDocs());
                System.out.println("     max Docs: " + ird.maxDoc());

                int numDOCs = ird.maxDoc();
                if (numDOCs > 0)
                {
                    for(int ii=0; ii<numDOCs; ++ii)
                    {
                        System.out.println("------------------------------------------");
                        System.out.println("Doc: " + ii );
                        System.out.println("  Deleted: " + ird.isDeleted(ii));
                        if ( !ird.isDeleted(ii) )
                        {
                            Document dx = ird.document(ii);
                            java.util.List fldList = dx.getFields();
                            int numFields = fldList.size();
                            for (int nn=0; nn<numFields; ++nn)
                            {
                                Field xf = (Field) fldList.get(nn);
                                String zname = xf.name();
                                boolean isStored = xf.isStored();
                                boolean isIndexed = xf.isIndexed();
                                String zvalue = xf.stringValue();

                                String scode = (isIndexed ? "I":"N") + "," + ( isStored ? "S":"U" );

                                if (zvalue != null)
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: " + zvalue);
                                else
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: (none)");
                            }
                        }
                    }
                }

                System.out.println("------------------------------------------");
                System.out.println("Terms");
                System.out.println("------------------------------------------");

                TermEnum enuTRM = ird.terms();
                while( enuTRM.next() )
                {
                    int freq = enuTRM.docFreq();
                    Term oT = enuTRM.term();
                    if (oT != null)
                    {
                        System.out.println("  " + freq + ", " + oT.field() + ", \"" + oT.text() + "\"");
                    }
                }

                ird.close();
            }
            catch(Exception ex)
            {
                System.out.println("Open failed, exception: " + ex.toString() );
            }
        }
    }
}

-----------------------------------------------------------------


-- Neal


-----Original Message-----
From: Floyd Wu [mailto:floyd.wu@gmail.com]
Sent: Monday, April 27, 2009 3:14 AM
To: lucene-net-dev@incubator.apache.org
Subject: Re: Luke-0.9.x cannot open index files

Thanks DIGY
So will you report this to Luke author or Java Lucene group?
I've been reported this situation to Luke author and he( or she) doesn't
think this is a bug of Luke.

floyd

2009/4/27 digy digy <di...@gmail.com>

> It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net works
> well with chinese field names.
> I think, it is a bug in Luke.
>
> DIGY
>
>
>
> On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:
>
> > Hi Digy,
> > Thanks for your help.
> > But if chinese field name is the problem, will it be "fix" in Lucene.Net
> or
> > how can I avoid this problem.
> >
> > Chinese field name is by design and probably not avoidable.
> >
> > Floyd
> >
> > 2009/4/25 Digy <di...@gmail.com>
> >
> > > I think, I found the bug. Here is the dump of the original index:
> > >
> > >
> > >
> > > NUMDOCS: 3
> > >
> > > MAXDOCS: 7
> > >
> > > DELETED(0): True
> > >
> > > DELETED(1): True
> > >
> > > DELETED(2): False
> > >
> > > DELETED(3): True
> > >
> > > DELETED(4): True
> > >
> > > DELETED(5): False
> > >
> > > DELETED(6): False
> > >
> > > TERM(0): _l_activationdatetime:552877632000000000
> > >
> > > TERM(1): _l_author:admin
> > >
> > > TERM(2): _l_bookmarkcount:0
> > >
> > > TERM(3): _l_clix:0
> > >
> > > TERM(4): _l_clix:1
> > >
> > > TERM(5): _l_creationdatetime:633427319866778624
> > >
> > > TERM(6): _l_creationdatetime:633427324812559872
> > >
> > > TERM(7): _l_creationdatetime:633760609388437504
> > >
> > > TERM(8): _l_deactivationdatetime:155377824000000000
> > >
> > > TERM(9): _l_deactivationdatetime:155378687999969792
> > >
> > > TERM(10): _l_document_class:1
> > >
> > > TERM(11): _l_document_class:98305
> > >
> > > TERM(12): _l_folder:163841
> > >
> > > TERM(13): _l_folder:163843
> > >
> > > TERM(14): _l_hidden:aaa
> > >
> > > TERM(15): _l_last_modified_datetime:633427319866778624
> > >
> > > TERM(16): _l_last_modified_datetime:633427324812559872
> > >
> > > TERM(17): _l_last_modified_datetime:633760609388437504
> > >
> > > TERM(18): _l_meta:abc
> > >
> > > TERM(19): _l_meta:abc.ppt
> > >
> > > TERM(20): _l_meta:ddx
> > >
> > > TERM(21): _l_meta:doc
> > >
> > > TERM(22): _l_meta:xyz
> > >
> > > TERM(23): _l_meta:名
> > >
> > > TERM(24): _l_meta:問
> > >
> > > TERM(25): _l_meta:有
> > >
> > > TERM(26): _l_meta:檔
> > >
> > > TERM(27): _l_meta:測
> > >
> > > TERM(28): _l_meta:看
> > >
> > > TERM(29): _l_meta:試
> > >
> > > TERM(30): _l_meta:還
> > >
> > > TERM(31): _l_meta:題
> > >
> > > TERM(32): _l_parentdocument:196609
> > >
> > > TERM(33): _l_parentdocument:327681
> > >
> > > TERM(34): _l_parentdocument:557057
> > >
> > > TERM(35): _l_ratingavg:0
> > >
> > > TERM(36): _l_ratingmedian:0
> > >
> > > TERM(37): _l_ratingstdev:0
> > >
> > > TERM(38): _l_ratingsum:0
> > >
> > > TERM(39): _l_read_permission:admin
> > >
> > > TERM(40): _l_rootdocument:196609
> > >
> > > TERM(41): _l_rootdocument:327681
> > >
> > > TERM(42): _l_rootdocument:557057
> > >
> > > TERM(43): _l_state:0
> > >
> > > TERM(44): _l_state:2
> > >
> > > TERM(45): _l_summary:2123456789
> > >
> > > TERM(46): _l_summary:abc
> > >
> > > TERM(47): _l_summary:abc.ppt
> > >
> > > TERM(48): _l_summary:ddx
> > >
> > > TERM(49): _l_summary:doc
> > >
> > > TERM(50): _l_summary:xyz
> > >
> > > TERM(51): _l_summary:有
> > >
> > > TERM(52): _l_summary:還
> > >
> > > TERM(53): _l_title:123
> > >
> > > TERM(54): _l_title:class
> > >
> > > TERM(55): _l_title:default
> > >
> > > TERM(56): _l_title:document
> > >
> > > TERM(57): _l_title:名
> > >
> > > TERM(58): _l_title:問
> > >
> > > TERM(59): _l_title:檔
> > >
> > > TERM(60): _l_title:測
> > >
> > > TERM(61): _l_title:看
> > >
> > > TERM(62): _l_title:試
> > >
> > > TERM(63): _l_title:題
> > >
> > > TERM(64): _l_unique_key:196609
> > >
> > > TERM(65): _l_unique_key:327681
> > >
> > > TERM(66): _l_unique_key:557057
> > >
> > > TERM(67): _l_version:1
> > >
> > > TERM(68): 作者:123
> > >
> > > TERM(69): 摘要:2123456789
> > >
> > > TERM(70): 摘要:abc
> > >
> > > TERM(71): 摘要:abc.ppt
> > >
> > > TERM(72): 摘要:ddx
> > >
> > > TERM(73): 摘要:doc
> > >
> > > TERM(74): 摘要:xyz
> > >
> > > TERM(75): 摘要:有
> > >
> > > TERM(76): 摘要:還
> > >
> > > TERM(77): 標題:123
> > >
> > > TERM(78): 標題:class
> > >
> > > TERM(79): 標題:default
> > >
> > > TERM(80): 標題:document
> > >
> > > TERM(81): 標題:名
> > >
> > > TERM(82): 標題:問
> > >
> > > TERM(83): 標題:檔
> > >
> > > TERM(84): 標題:測
> > >
> > > TERM(85): 標題:看
> > >
> > > TERM(86): 標題:試
> > >
> > > TERM(87): 標題:題
> > >
> > > TERM(88): 關鍵詞:123
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > And here is a sample code: read docs from original index and then write
> > to
> > > an new one.
> > >
> > >
> > >
> > > void CreateNewIndex(string OrgIndex)
> > >
> > >        {
> > >
> > >            IndexReader reader = IndexReader.Open(OrgIndex);
> > >
> > >            IndexWriter writer = new IndexWriter("Floyd", new
> > > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> > >
> > >
> > >
> > >            for (int i = 0; i < reader.MaxDoc(); i++)
> > >
> > >            {
> > >
> > >                if (reader.IsDeleted(i) == true) continue;
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document orgDoc =
> >  reader.Document(i);
> > >
> > >                System.Collections.IList fields = orgDoc.GetFields();
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document newDoc = new Document();
> > >
> > >                foreach (Lucene.Net.Documents.Field field in fields)
> > >
> > >                {
> > >
> > >                    Lucene.Net.Documents.Field newField = new Field(
> > >
> > >                        System.Convert.ToBase64String(
> > > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> > >
> > >                        //field.Name(), //ç
> > >
> > >                        field.StringValue(),
> > >
> > >                        field.IsStored() ?
> > > Lucene.Net.Documents.Field.Store.YES :
> > Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> <
> > http://lucene.net.documents.field.store.no/>
>  > > ,
> > >
> > >                        field.IsTokenized() ?
> > > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> > >
> > >
> > >
> > >                    newDoc.Add(newField);
> > >
> > >                }
> > >
> > >                writer.AddDocument(newDoc);
> > >
> > >            }
> > >
> > >
> > >
> > >            writer.Close();
> > >
> > >            reader.Close();
> > >
> > >        }
> > >
> > >
> > >
> > >
> > >
> > > If some field names are chinese, then Luke returns “read past EOF”. But
> > if
> > > those field names are replaced with non-chinese names, then it works.
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > -----Original Message-----
> > > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > > Sent: Friday, April 24, 2009 8:53 PM
> > > To: lucene-net-dev@incubator.apache.org
> > >  Subject: Luke-0.9.x cannot open index files
> > >
> > >
> > >
> > >
> > >
> > > Digy,
> > >
> > >
> > >
> > > Some additional information from the discussion on the lucene-net-user
> > list
> > > with Floyd Wu.
> > >
> > >
> > >
> > >
> > >
> > > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> > >
> > >
> > >
> > > The Java equivalents of the two small test applications I use to
> inspect
> > an
> > > index and compact it, function identically to the .NET versions (that
> > were
> > > built with VS2005 and Lucene.NET 2.3.1).
> > >
> > >
> > >
> > > That Luke cannot open the index appears to be a problem within Luke.
> > >
> > > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does
> > not
> > > flag the index as corrupt; and both the Java and .NET versions report
> the
> > > same index content before and after the optimize operation.
> > >
> > >
> > >
> > >
> > >
> > > -- Neal
> > >
> > >
> > >
> > > -----Original Message-----
> > >
> > > From: Digy (JIRA) [mailto:jira@apache.org]
> > >
> > > Sent: Wednesday, April 08, 2009 6:28 PM
> > >
> > > To: lucene-net-dev@incubator.apache.org
> > >
> > > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
>  > > compatible with ASP.NET <http://asp.net/> <http://asp.net/> Medium
> Trust Level, in hosting
> > > environments (like GoDaddy...)
> > >
> > >
> > >
> > >
> > >
> > >    [
> > >
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> > ]
> > >
> > >
> > >
> > > Digy commented on LUCENENET-169:
> > >
> > > --------------------------------
> > >
> > >
> > >
> > > Although you can overcome all of them somehow;
> > >
> > >
> > >
> > > * controlling the the lifetime of IndexWriter/IndexReader in a
> naturally
> > > manner,
> > >
> > > * reopening the IndexReader only when needed using (for ex)
> > > FileSystemWatcher,
> > >
> > > * providing a separation between data & bussiness layer,
> > >
> > > * providing other apps an interface that may want to write its own user
> > > interface,
> > >
> > > * accessing a single search service from different web apps/from load
> > > balanced web servers
> > >
> > > * controlling the lifetime of searching/indexing code (without being
> > > effected by the restart of the IIS processes automatically when some
> > memory
> > > limit is exceeded (for ex.) )
> > >
> > > * Ability to access some system resources that can be restricted by IIS
> > >
> > > etc.
> > >
> > > make me think a separete search service is a better idea.But at last,
> it
> > is
> > > a design decision of you.
> > >
> > > (Think, A WebApp+Solr in Java world)
> > >
> > >
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > > > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/><
> http://asp.net/>
> > Medium Trust Level, in hosting environments (like GoDaddy...)
> > >
> > > >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> > >
> > > >
> > >
> > > >                 Key: LUCENENET-169
> > >
> > > >                 URL:
> > https://issues.apache.org/jira/browse/LUCENENET-169
> > >
> > > >             Project: Lucene.Net
> > >
> > > >          Issue Type: Improvement
> > >
> > > >         Environment: ASP.NET <http://asp.net/> <http://asp.net/>
>  > >
> > > >            Reporter: Corey Trager
> > >
> > > >         Attachments: FSDirectory.patch
> > >
> > > >
> > >
> > > >
> > >
> > > > Microsoft has a configuration file for shared hosting for what they
> > call
> > > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> > violate
> > > the restrictions of Medium Trust, but I coded workarounds, shown below.
> > >
> > > > #1)
> > >
> > > > // Corey Trager, Oct 2008: Commented call to GetTempPath to
> workaround
> > > permission restrictions at shared host.
> > >
> > > > // LOCK_DIR isn't used anyway.
> > >
> > > > public static readonly System.String LOCK_DIR = null; //
> > > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > > System.IO.Path.GetTempPath());
> > >
> > > > #2)
> > >
> > > >               /// <summary>Returns an array of strings, one for each
> > > Lucene index file in the directory. </summary>
> > >
> > > >               public override System.String[] List()
> > >
> > > >               {
> > >
> > > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > > restrictions at shared host */
> > >
> > > >                System.IO.DirectoryInfo dir = new
> > > System.IO.DirectoryInfo(directory.FullName);
> > >
> > > >               System.IO.FileInfo[] files = dir.GetFiles();
> > >
> > > >                 string[] list = new string[files.Length];
> > >
> > > >                 for (int i = 0; i < files.Length; i++)
> > >
> > > >                 {
> > >
> > > >                     list[i] = files[i].Name;
> > >
> > > >                 }
> > >
> > > >                 return list;
> > >
> > > > /* end of changes */
> > >
> > > > //            System.String[] files =
> > > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > > IndexFileNameFilter.GetFilter());
> > >
> > > > //            for (int i = 0; i < files.Length; i++)
> > >
> > > > //            {
> > >
> > > > //                System.IO.FileInfo fi = new
> > > System.IO.FileInfo(files[i]);
> > >
> > > > //                files[i] = fi.Name;
> > >
> > > > //            }
> > >
> > > > //                      return files;
> > >
> > > >               }
> > >
> > >
> > >
> > > --
> > >
> > > This message is automatically generated by JIRA.
> > >
> > > -
> > >
> > > You can reply to this email to add a comment to the issue online.
> > >
> > >
> >
>

RE: Luke-0.9.x cannot open index files

Posted by "Granroth, Neal V." <ne...@thermofisher.com>.

Floyd,

Here is a simple Java command-line application that displays the content of an index.  It successfully opens your "documents" index using Java Lucene version 2.3.2 (the closest version to current Lucene.NET).

That this program works properly with your index, but Luke does not, shows that the problem is within Luke.  You can provide this program with your index and bug report to the people supporting Luke.

-----------------------------------------------------------------
package app1;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;

public class Main
{
    public Main() { }

    public static void main(String[] args)
    {
        if (args.length > 0)
        {
            System.out.println("Index Folder: " + args[0]);

            IndexReader ird = null;
            try
            {
                ird = IndexReader.open( args[0] );
                System.out.println("Index Version: " + ird.getVersion());
                System.out.println("    isCurrent: " + ird.isCurrent() );
                System.out.println("  isOptimized: " + ird.isOptimized());
                System.out.println("     num Docs: " + ird.numDocs());
                System.out.println("     max Docs: " + ird.maxDoc());

                int numDOCs = ird.maxDoc();
                if (numDOCs > 0)
                {
                    for(int ii=0; ii<numDOCs; ++ii)
                    {
                        System.out.println("------------------------------------------");
                        System.out.println("Doc: " + ii );
                        System.out.println("  Deleted: " + ird.isDeleted(ii));
                        if ( !ird.isDeleted(ii) )
                        {
                            Document dx = ird.document(ii);
                            java.util.List fldList = dx.getFields();
                            int numFields = fldList.size();
                            for (int nn=0; nn<numFields; ++nn)
                            {
                                Field xf = (Field) fldList.get(nn);
                                String zname = xf.name();
                                boolean isStored = xf.isStored();
                                boolean isIndexed = xf.isIndexed();
                                String zvalue = xf.stringValue();

                                String scode = (isIndexed ? "I":"N") + "," + ( isStored ? "S":"U" );

                                if (zvalue != null)
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: " + zvalue);
                                else
                                     System.out.println("  Field: " + zname + ", " + scode + ", Value: (none)");
                            }
                        }
                    }
                }

                System.out.println("------------------------------------------");
                System.out.println("Terms");
                System.out.println("------------------------------------------");

                TermEnum enuTRM = ird.terms();
                while( enuTRM.next() )
                {
                    int freq = enuTRM.docFreq();
                    Term oT = enuTRM.term();
                    if (oT != null)
                    {
                        System.out.println("  " + freq + ", " + oT.field() + ", \"" + oT.text() + "\"");
                    }
                }

                ird.close();
            }
            catch(Exception ex)
            {
                System.out.println("Open failed, exception: " + ex.toString() );
            }
        }
    }
}

-----------------------------------------------------------------


-- Neal


-----Original Message-----
From: Floyd Wu [mailto:floyd.wu@gmail.com]
Sent: Monday, April 27, 2009 3:14 AM
To: lucene-net-dev@incubator.apache.org
Subject: Re: Luke-0.9.x cannot open index files

Thanks DIGY
So will you report this to Luke author or Java Lucene group?
I've been reported this situation to Luke author and he( or she) doesn't
think this is a bug of Luke.

floyd

2009/4/27 digy digy <di...@gmail.com>

> It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net works
> well with chinese field names.
> I think, it is a bug in Luke.
>
> DIGY
>
>
>
> On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:
>
> > Hi Digy,
> > Thanks for your help.
> > But if chinese field name is the problem, will it be "fix" in Lucene.Net
> or
> > how can I avoid this problem.
> >
> > Chinese field name is by design and probably not avoidable.
> >
> > Floyd
> >
> > 2009/4/25 Digy <di...@gmail.com>
> >
> > > I think, I found the bug. Here is the dump of the original index:
> > >
> > >
> > >
> > > NUMDOCS: 3
> > >
> > > MAXDOCS: 7
> > >
> > > DELETED(0): True
> > >
> > > DELETED(1): True
> > >
> > > DELETED(2): False
> > >
> > > DELETED(3): True
> > >
> > > DELETED(4): True
> > >
> > > DELETED(5): False
> > >
> > > DELETED(6): False
> > >
> > > TERM(0): _l_activationdatetime:552877632000000000
> > >
> > > TERM(1): _l_author:admin
> > >
> > > TERM(2): _l_bookmarkcount:0
> > >
> > > TERM(3): _l_clix:0
> > >
> > > TERM(4): _l_clix:1
> > >
> > > TERM(5): _l_creationdatetime:633427319866778624
> > >
> > > TERM(6): _l_creationdatetime:633427324812559872
> > >
> > > TERM(7): _l_creationdatetime:633760609388437504
> > >
> > > TERM(8): _l_deactivationdatetime:155377824000000000
> > >
> > > TERM(9): _l_deactivationdatetime:155378687999969792
> > >
> > > TERM(10): _l_document_class:1
> > >
> > > TERM(11): _l_document_class:98305
> > >
> > > TERM(12): _l_folder:163841
> > >
> > > TERM(13): _l_folder:163843
> > >
> > > TERM(14): _l_hidden:aaa
> > >
> > > TERM(15): _l_last_modified_datetime:633427319866778624
> > >
> > > TERM(16): _l_last_modified_datetime:633427324812559872
> > >
> > > TERM(17): _l_last_modified_datetime:633760609388437504
> > >
> > > TERM(18): _l_meta:abc
> > >
> > > TERM(19): _l_meta:abc.ppt
> > >
> > > TERM(20): _l_meta:ddx
> > >
> > > TERM(21): _l_meta:doc
> > >
> > > TERM(22): _l_meta:xyz
> > >
> > > TERM(23): _l_meta:名
> > >
> > > TERM(24): _l_meta:問
> > >
> > > TERM(25): _l_meta:有
> > >
> > > TERM(26): _l_meta:檔
> > >
> > > TERM(27): _l_meta:測
> > >
> > > TERM(28): _l_meta:看
> > >
> > > TERM(29): _l_meta:試
> > >
> > > TERM(30): _l_meta:還
> > >
> > > TERM(31): _l_meta:題
> > >
> > > TERM(32): _l_parentdocument:196609
> > >
> > > TERM(33): _l_parentdocument:327681
> > >
> > > TERM(34): _l_parentdocument:557057
> > >
> > > TERM(35): _l_ratingavg:0
> > >
> > > TERM(36): _l_ratingmedian:0
> > >
> > > TERM(37): _l_ratingstdev:0
> > >
> > > TERM(38): _l_ratingsum:0
> > >
> > > TERM(39): _l_read_permission:admin
> > >
> > > TERM(40): _l_rootdocument:196609
> > >
> > > TERM(41): _l_rootdocument:327681
> > >
> > > TERM(42): _l_rootdocument:557057
> > >
> > > TERM(43): _l_state:0
> > >
> > > TERM(44): _l_state:2
> > >
> > > TERM(45): _l_summary:2123456789
> > >
> > > TERM(46): _l_summary:abc
> > >
> > > TERM(47): _l_summary:abc.ppt
> > >
> > > TERM(48): _l_summary:ddx
> > >
> > > TERM(49): _l_summary:doc
> > >
> > > TERM(50): _l_summary:xyz
> > >
> > > TERM(51): _l_summary:有
> > >
> > > TERM(52): _l_summary:還
> > >
> > > TERM(53): _l_title:123
> > >
> > > TERM(54): _l_title:class
> > >
> > > TERM(55): _l_title:default
> > >
> > > TERM(56): _l_title:document
> > >
> > > TERM(57): _l_title:名
> > >
> > > TERM(58): _l_title:問
> > >
> > > TERM(59): _l_title:檔
> > >
> > > TERM(60): _l_title:測
> > >
> > > TERM(61): _l_title:看
> > >
> > > TERM(62): _l_title:試
> > >
> > > TERM(63): _l_title:題
> > >
> > > TERM(64): _l_unique_key:196609
> > >
> > > TERM(65): _l_unique_key:327681
> > >
> > > TERM(66): _l_unique_key:557057
> > >
> > > TERM(67): _l_version:1
> > >
> > > TERM(68): 作者:123
> > >
> > > TERM(69): 摘要:2123456789
> > >
> > > TERM(70): 摘要:abc
> > >
> > > TERM(71): 摘要:abc.ppt
> > >
> > > TERM(72): 摘要:ddx
> > >
> > > TERM(73): 摘要:doc
> > >
> > > TERM(74): 摘要:xyz
> > >
> > > TERM(75): 摘要:有
> > >
> > > TERM(76): 摘要:還
> > >
> > > TERM(77): 標題:123
> > >
> > > TERM(78): 標題:class
> > >
> > > TERM(79): 標題:default
> > >
> > > TERM(80): 標題:document
> > >
> > > TERM(81): 標題:名
> > >
> > > TERM(82): 標題:問
> > >
> > > TERM(83): 標題:檔
> > >
> > > TERM(84): 標題:測
> > >
> > > TERM(85): 標題:看
> > >
> > > TERM(86): 標題:試
> > >
> > > TERM(87): 標題:題
> > >
> > > TERM(88): 關鍵詞:123
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > And here is a sample code: read docs from original index and then write
> > to
> > > an new one.
> > >
> > >
> > >
> > > void CreateNewIndex(string OrgIndex)
> > >
> > >        {
> > >
> > >            IndexReader reader = IndexReader.Open(OrgIndex);
> > >
> > >            IndexWriter writer = new IndexWriter("Floyd", new
> > > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> > >
> > >
> > >
> > >            for (int i = 0; i < reader.MaxDoc(); i++)
> > >
> > >            {
> > >
> > >                if (reader.IsDeleted(i) == true) continue;
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document orgDoc =
> >  reader.Document(i);
> > >
> > >                System.Collections.IList fields = orgDoc.GetFields();
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document newDoc = new Document();
> > >
> > >                foreach (Lucene.Net.Documents.Field field in fields)
> > >
> > >                {
> > >
> > >                    Lucene.Net.Documents.Field newField = new Field(
> > >
> > >                        System.Convert.ToBase64String(
> > > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> > >
> > >                        //field.Name(), //ç
> > >
> > >                        field.StringValue(),
> > >
> > >                        field.IsStored() ?
> > > Lucene.Net.Documents.Field.Store.YES :
> > Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> <
> > http://lucene.net.documents.field.store.no/>
>  > > ,
> > >
> > >                        field.IsTokenized() ?
> > > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> > >
> > >
> > >
> > >                    newDoc.Add(newField);
> > >
> > >                }
> > >
> > >                writer.AddDocument(newDoc);
> > >
> > >            }
> > >
> > >
> > >
> > >            writer.Close();
> > >
> > >            reader.Close();
> > >
> > >        }
> > >
> > >
> > >
> > >
> > >
> > > If some field names are chinese, then Luke returns “read past EOF”. But
> > if
> > > those field names are replaced with non-chinese names, then it works.
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > -----Original Message-----
> > > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > > Sent: Friday, April 24, 2009 8:53 PM
> > > To: lucene-net-dev@incubator.apache.org
> > >  Subject: Luke-0.9.x cannot open index files
> > >
> > >
> > >
> > >
> > >
> > > Digy,
> > >
> > >
> > >
> > > Some additional information from the discussion on the lucene-net-user
> > list
> > > with Floyd Wu.
> > >
> > >
> > >
> > >
> > >
> > > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> > >
> > >
> > >
> > > The Java equivalents of the two small test applications I use to
> inspect
> > an
> > > index and compact it, function identically to the .NET versions (that
> > were
> > > built with VS2005 and Lucene.NET 2.3.1).
> > >
> > >
> > >
> > > That Luke cannot open the index appears to be a problem within Luke.
> > >
> > > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does
> > not
> > > flag the index as corrupt; and both the Java and .NET versions report
> the
> > > same index content before and after the optimize operation.
> > >
> > >
> > >
> > >
> > >
> > > -- Neal
> > >
> > >
> > >
> > > -----Original Message-----
> > >
> > > From: Digy (JIRA) [mailto:jira@apache.org]
> > >
> > > Sent: Wednesday, April 08, 2009 6:28 PM
> > >
> > > To: lucene-net-dev@incubator.apache.org
> > >
> > > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
>  > > compatible with ASP.NET <http://asp.net/> <http://asp.net/> Medium
> Trust Level, in hosting
> > > environments (like GoDaddy...)
> > >
> > >
> > >
> > >
> > >
> > >    [
> > >
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> > ]
> > >
> > >
> > >
> > > Digy commented on LUCENENET-169:
> > >
> > > --------------------------------
> > >
> > >
> > >
> > > Although you can overcome all of them somehow;
> > >
> > >
> > >
> > > * controlling the the lifetime of IndexWriter/IndexReader in a
> naturally
> > > manner,
> > >
> > > * reopening the IndexReader only when needed using (for ex)
> > > FileSystemWatcher,
> > >
> > > * providing a separation between data & bussiness layer,
> > >
> > > * providing other apps an interface that may want to write its own user
> > > interface,
> > >
> > > * accessing a single search service from different web apps/from load
> > > balanced web servers
> > >
> > > * controlling the lifetime of searching/indexing code (without being
> > > effected by the restart of the IIS processes automatically when some
> > memory
> > > limit is exceeded (for ex.) )
> > >
> > > * Ability to access some system resources that can be restricted by IIS
> > >
> > > etc.
> > >
> > > make me think a separete search service is a better idea.But at last,
> it
> > is
> > > a design decision of you.
> > >
> > > (Think, A WebApp+Solr in Java world)
> > >
> > >
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > > > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/><
> http://asp.net/>
> > Medium Trust Level, in hosting environments (like GoDaddy...)
> > >
> > > >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> > >
> > > >
> > >
> > > >                 Key: LUCENENET-169
> > >
> > > >                 URL:
> > https://issues.apache.org/jira/browse/LUCENENET-169
> > >
> > > >             Project: Lucene.Net
> > >
> > > >          Issue Type: Improvement
> > >
> > > >         Environment: ASP.NET <http://asp.net/> <http://asp.net/>
>  > >
> > > >            Reporter: Corey Trager
> > >
> > > >         Attachments: FSDirectory.patch
> > >
> > > >
> > >
> > > >
> > >
> > > > Microsoft has a configuration file for shared hosting for what they
> > call
> > > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> > violate
> > > the restrictions of Medium Trust, but I coded workarounds, shown below.
> > >
> > > > #1)
> > >
> > > > // Corey Trager, Oct 2008: Commented call to GetTempPath to
> workaround
> > > permission restrictions at shared host.
> > >
> > > > // LOCK_DIR isn't used anyway.
> > >
> > > > public static readonly System.String LOCK_DIR = null; //
> > > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > > System.IO.Path.GetTempPath());
> > >
> > > > #2)
> > >
> > > >               /// <summary>Returns an array of strings, one for each
> > > Lucene index file in the directory. </summary>
> > >
> > > >               public override System.String[] List()
> > >
> > > >               {
> > >
> > > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > > restrictions at shared host */
> > >
> > > >                System.IO.DirectoryInfo dir = new
> > > System.IO.DirectoryInfo(directory.FullName);
> > >
> > > >               System.IO.FileInfo[] files = dir.GetFiles();
> > >
> > > >                 string[] list = new string[files.Length];
> > >
> > > >                 for (int i = 0; i < files.Length; i++)
> > >
> > > >                 {
> > >
> > > >                     list[i] = files[i].Name;
> > >
> > > >                 }
> > >
> > > >                 return list;
> > >
> > > > /* end of changes */
> > >
> > > > //            System.String[] files =
> > > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > > IndexFileNameFilter.GetFilter());
> > >
> > > > //            for (int i = 0; i < files.Length; i++)
> > >
> > > > //            {
> > >
> > > > //                System.IO.FileInfo fi = new
> > > System.IO.FileInfo(files[i]);
> > >
> > > > //                files[i] = fi.Name;
> > >
> > > > //            }
> > >
> > > > //                      return files;
> > >
> > > >               }
> > >
> > >
> > >
> > > --
> > >
> > > This message is automatically generated by JIRA.
> > >
> > > -
> > >
> > > You can reply to this email to add a comment to the issue online.
> > >
> > >
> >
>

Re: Luke-0.9.x cannot open index files

Posted by Floyd Wu <fl...@gmail.com>.

Thanks DIGY
So will you report this to Luke author or Java Lucene group?
I've been reported this situation to Luke author and he( or she) doesn't
think this is a bug of Luke.

floyd

2009/4/27 digy digy <di...@gmail.com>

> It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net works
> well with chinese field names.
> I think, it is a bug in Luke.
>
> DIGY
>
>
>
> On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:
>
> > Hi Digy,
> > Thanks for your help.
> > But if chinese field name is the problem, will it be "fix" in Lucene.Net
> or
> > how can I avoid this problem.
> >
> > Chinese field name is by design and probably not avoidable.
> >
> > Floyd
> >
> > 2009/4/25 Digy <di...@gmail.com>
> >
> > > I think, I found the bug. Here is the dump of the original index:
> > >
> > >
> > >
> > > NUMDOCS: 3
> > >
> > > MAXDOCS: 7
> > >
> > > DELETED(0): True
> > >
> > > DELETED(1): True
> > >
> > > DELETED(2): False
> > >
> > > DELETED(3): True
> > >
> > > DELETED(4): True
> > >
> > > DELETED(5): False
> > >
> > > DELETED(6): False
> > >
> > > TERM(0): _l_activationdatetime:552877632000000000
> > >
> > > TERM(1): _l_author:admin
> > >
> > > TERM(2): _l_bookmarkcount:0
> > >
> > > TERM(3): _l_clix:0
> > >
> > > TERM(4): _l_clix:1
> > >
> > > TERM(5): _l_creationdatetime:633427319866778624
> > >
> > > TERM(6): _l_creationdatetime:633427324812559872
> > >
> > > TERM(7): _l_creationdatetime:633760609388437504
> > >
> > > TERM(8): _l_deactivationdatetime:155377824000000000
> > >
> > > TERM(9): _l_deactivationdatetime:155378687999969792
> > >
> > > TERM(10): _l_document_class:1
> > >
> > > TERM(11): _l_document_class:98305
> > >
> > > TERM(12): _l_folder:163841
> > >
> > > TERM(13): _l_folder:163843
> > >
> > > TERM(14): _l_hidden:aaa
> > >
> > > TERM(15): _l_last_modified_datetime:633427319866778624
> > >
> > > TERM(16): _l_last_modified_datetime:633427324812559872
> > >
> > > TERM(17): _l_last_modified_datetime:633760609388437504
> > >
> > > TERM(18): _l_meta:abc
> > >
> > > TERM(19): _l_meta:abc.ppt
> > >
> > > TERM(20): _l_meta:ddx
> > >
> > > TERM(21): _l_meta:doc
> > >
> > > TERM(22): _l_meta:xyz
> > >
> > > TERM(23): _l_meta:名
> > >
> > > TERM(24): _l_meta:問
> > >
> > > TERM(25): _l_meta:有
> > >
> > > TERM(26): _l_meta:檔
> > >
> > > TERM(27): _l_meta:測
> > >
> > > TERM(28): _l_meta:看
> > >
> > > TERM(29): _l_meta:試
> > >
> > > TERM(30): _l_meta:還
> > >
> > > TERM(31): _l_meta:題
> > >
> > > TERM(32): _l_parentdocument:196609
> > >
> > > TERM(33): _l_parentdocument:327681
> > >
> > > TERM(34): _l_parentdocument:557057
> > >
> > > TERM(35): _l_ratingavg:0
> > >
> > > TERM(36): _l_ratingmedian:0
> > >
> > > TERM(37): _l_ratingstdev:0
> > >
> > > TERM(38): _l_ratingsum:0
> > >
> > > TERM(39): _l_read_permission:admin
> > >
> > > TERM(40): _l_rootdocument:196609
> > >
> > > TERM(41): _l_rootdocument:327681
> > >
> > > TERM(42): _l_rootdocument:557057
> > >
> > > TERM(43): _l_state:0
> > >
> > > TERM(44): _l_state:2
> > >
> > > TERM(45): _l_summary:2123456789
> > >
> > > TERM(46): _l_summary:abc
> > >
> > > TERM(47): _l_summary:abc.ppt
> > >
> > > TERM(48): _l_summary:ddx
> > >
> > > TERM(49): _l_summary:doc
> > >
> > > TERM(50): _l_summary:xyz
> > >
> > > TERM(51): _l_summary:有
> > >
> > > TERM(52): _l_summary:還
> > >
> > > TERM(53): _l_title:123
> > >
> > > TERM(54): _l_title:class
> > >
> > > TERM(55): _l_title:default
> > >
> > > TERM(56): _l_title:document
> > >
> > > TERM(57): _l_title:名
> > >
> > > TERM(58): _l_title:問
> > >
> > > TERM(59): _l_title:檔
> > >
> > > TERM(60): _l_title:測
> > >
> > > TERM(61): _l_title:看
> > >
> > > TERM(62): _l_title:試
> > >
> > > TERM(63): _l_title:題
> > >
> > > TERM(64): _l_unique_key:196609
> > >
> > > TERM(65): _l_unique_key:327681
> > >
> > > TERM(66): _l_unique_key:557057
> > >
> > > TERM(67): _l_version:1
> > >
> > > TERM(68): 作者:123
> > >
> > > TERM(69): 摘要:2123456789
> > >
> > > TERM(70): 摘要:abc
> > >
> > > TERM(71): 摘要:abc.ppt
> > >
> > > TERM(72): 摘要:ddx
> > >
> > > TERM(73): 摘要:doc
> > >
> > > TERM(74): 摘要:xyz
> > >
> > > TERM(75): 摘要:有
> > >
> > > TERM(76): 摘要:還
> > >
> > > TERM(77): 標題:123
> > >
> > > TERM(78): 標題:class
> > >
> > > TERM(79): 標題:default
> > >
> > > TERM(80): 標題:document
> > >
> > > TERM(81): 標題:名
> > >
> > > TERM(82): 標題:問
> > >
> > > TERM(83): 標題:檔
> > >
> > > TERM(84): 標題:測
> > >
> > > TERM(85): 標題:看
> > >
> > > TERM(86): 標題:試
> > >
> > > TERM(87): 標題:題
> > >
> > > TERM(88): 關鍵詞:123
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > And here is a sample code: read docs from original index and then write
> > to
> > > an new one.
> > >
> > >
> > >
> > > void CreateNewIndex(string OrgIndex)
> > >
> > >        {
> > >
> > >            IndexReader reader = IndexReader.Open(OrgIndex);
> > >
> > >            IndexWriter writer = new IndexWriter("Floyd", new
> > > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> > >
> > >
> > >
> > >            for (int i = 0; i < reader.MaxDoc(); i++)
> > >
> > >            {
> > >
> > >                if (reader.IsDeleted(i) == true) continue;
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document orgDoc =
> >  reader.Document(i);
> > >
> > >                System.Collections.IList fields = orgDoc.GetFields();
> > >
> > >
> > >
> > >                Lucene.Net.Documents.Document newDoc = new Document();
> > >
> > >                foreach (Lucene.Net.Documents.Field field in fields)
> > >
> > >                {
> > >
> > >                    Lucene.Net.Documents.Field newField = new Field(
> > >
> > >                        System.Convert.ToBase64String(
> > > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> > >
> > >                        //field.Name(), //ç
> > >
> > >                        field.StringValue(),
> > >
> > >                        field.IsStored() ?
> > > Lucene.Net.Documents.Field.Store.YES :
> > Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> <
> > http://lucene.net.documents.field.store.no/>
>  > > ,
> > >
> > >                        field.IsTokenized() ?
> > > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> > >
> > >
> > >
> > >                    newDoc.Add(newField);
> > >
> > >                }
> > >
> > >                writer.AddDocument(newDoc);
> > >
> > >            }
> > >
> > >
> > >
> > >            writer.Close();
> > >
> > >            reader.Close();
> > >
> > >        }
> > >
> > >
> > >
> > >
> > >
> > > If some field names are chinese, then Luke returns “read past EOF”. But
> > if
> > > those field names are replaced with non-chinese names, then it works.
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > >
> > > -----Original Message-----
> > > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > > Sent: Friday, April 24, 2009 8:53 PM
> > > To: lucene-net-dev@incubator.apache.org
> > >  Subject: Luke-0.9.x cannot open index files
> > >
> > >
> > >
> > >
> > >
> > > Digy,
> > >
> > >
> > >
> > > Some additional information from the discussion on the lucene-net-user
> > list
> > > with Floyd Wu.
> > >
> > >
> > >
> > >
> > >
> > > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> > >
> > >
> > >
> > > The Java equivalents of the two small test applications I use to
> inspect
> > an
> > > index and compact it, function identically to the .NET versions (that
> > were
> > > built with VS2005 and Lucene.NET 2.3.1).
> > >
> > >
> > >
> > > That Luke cannot open the index appears to be a problem within Luke.
> > >
> > > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does
> > not
> > > flag the index as corrupt; and both the Java and .NET versions report
> the
> > > same index content before and after the optimize operation.
> > >
> > >
> > >
> > >
> > >
> > > -- Neal
> > >
> > >
> > >
> > > **************************************************************
> > >
> > > Neal Granroth
> > >
> > > Software Engineer, Molecular Spectroscopy
> > >
> > > Thermo Fisher Scientific
> > >
> > > 5225 Verona Road, Madison, WI 53711
> > >
> > >
> > >
> > > neal.granroth@thermofisher.com
> > >
> > > Tel: 608-276-5645
> > >
> > > Fax: 608-276-6328
> > >
> > >
> > >
> > > www.thermofisher.com
> > >
> > >
> > >
> > > WORLDWIDE CONFIDENTIALITY NOTE: Dissemination, distribution or copying
> of
> > > this e-mail or the information herein by anyone other than the intended
> > > recipient, or an employee or agent of a system responsible for
> delivering
> > > the message to the intended recipient, is prohibited. If you are not
> the
> > > intended recipient, please inform the sender and delete all copies.
> > >
> > >
> > >
> > > -----Original Message-----
> > >
> > > From: Digy (JIRA) [mailto:jira@apache.org]
> > >
> > > Sent: Wednesday, April 08, 2009 6:28 PM
> > >
> > > To: lucene-net-dev@incubator.apache.org
> > >
> > > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
>  > > compatible with ASP.NET <http://asp.net/> <http://asp.net/> Medium
> Trust Level, in hosting
> > > environments (like GoDaddy...)
> > >
> > >
> > >
> > >
> > >
> > >    [
> > >
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> > ]
> > >
> > >
> > >
> > > Digy commented on LUCENENET-169:
> > >
> > > --------------------------------
> > >
> > >
> > >
> > > Although you can overcome all of them somehow;
> > >
> > >
> > >
> > > * controlling the the lifetime of IndexWriter/IndexReader in a
> naturally
> > > manner,
> > >
> > > * reopening the IndexReader only when needed using (for ex)
> > > FileSystemWatcher,
> > >
> > > * providing a separation between data & bussiness layer,
> > >
> > > * providing other apps an interface that may want to write its own user
> > > interface,
> > >
> > > * accessing a single search service from different web apps/from load
> > > balanced web servers
> > >
> > > * controlling the lifetime of searching/indexing code (without being
> > > effected by the restart of the IIS processes automatically when some
> > memory
> > > limit is exceeded (for ex.) )
> > >
> > > * Ability to access some system resources that can be restricted by IIS
> > >
> > > etc.
> > >
> > > make me think a separete search service is a better idea.But at last,
> it
> > is
> > > a design decision of you.
> > >
> > > (Think, A WebApp+Solr in Java world)
> > >
> > >
> > >
> > >
> > >
> > > DIGY
> > >
> > >
> > >
> > > > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/><
> http://asp.net/>
> > Medium Trust Level, in hosting environments (like GoDaddy...)
> > >
> > > >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> > >
> > > >
> > >
> > > >                 Key: LUCENENET-169
> > >
> > > >                 URL:
> > https://issues.apache.org/jira/browse/LUCENENET-169
> > >
> > > >             Project: Lucene.Net
> > >
> > > >          Issue Type: Improvement
> > >
> > > >         Environment: ASP.NET <http://asp.net/> <http://asp.net/>
>  > >
> > > >            Reporter: Corey Trager
> > >
> > > >         Attachments: FSDirectory.patch
> > >
> > > >
> > >
> > > >
> > >
> > > > Microsoft has a configuration file for shared hosting for what they
> > call
> > > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> > violate
> > > the restrictions of Medium Trust, but I coded workarounds, shown below.
> > >
> > > > #1)
> > >
> > > > // Corey Trager, Oct 2008: Commented call to GetTempPath to
> workaround
> > > permission restrictions at shared host.
> > >
> > > > // LOCK_DIR isn't used anyway.
> > >
> > > > public static readonly System.String LOCK_DIR = null; //
> > > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > > System.IO.Path.GetTempPath());
> > >
> > > > #2)
> > >
> > > >               /// <summary>Returns an array of strings, one for each
> > > Lucene index file in the directory. </summary>
> > >
> > > >               public override System.String[] List()
> > >
> > > >               {
> > >
> > > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > > restrictions at shared host */
> > >
> > > >                System.IO.DirectoryInfo dir = new
> > > System.IO.DirectoryInfo(directory.FullName);
> > >
> > > >               System.IO.FileInfo[] files = dir.GetFiles();
> > >
> > > >                 string[] list = new string[files.Length];
> > >
> > > >                 for (int i = 0; i < files.Length; i++)
> > >
> > > >                 {
> > >
> > > >                     list[i] = files[i].Name;
> > >
> > > >                 }
> > >
> > > >                 return list;
> > >
> > > > /* end of changes */
> > >
> > > > //            System.String[] files =
> > > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > > IndexFileNameFilter.GetFilter());
> > >
> > > > //            for (int i = 0; i < files.Length; i++)
> > >
> > > > //            {
> > >
> > > > //                System.IO.FileInfo fi = new
> > > System.IO.FileInfo(files[i]);
> > >
> > > > //                files[i] = fi.Name;
> > >
> > > > //            }
> > >
> > > > //                      return files;
> > >
> > > >               }
> > >
> > >
> > >
> > > --
> > >
> > > This message is automatically generated by JIRA.
> > >
> > > -
> > >
> > > You can reply to this email to add a comment to the issue online.
> > >
> > >
> >
>

Re: Luke-0.9.x cannot open index files

Posted by digy digy <di...@gmail.com>.

It is not a bug of Lucene.Net and as my sample code shows, Lucene.Net works
well with chinese field names.
I think, it is a bug in Luke.

DIGY



On Mon, Apr 27, 2009 at 8:49 AM, Floyd Wu <fl...@gmail.com> wrote:

> Hi Digy,
> Thanks for your help.
> But if chinese field name is the problem, will it be "fix" in Lucene.Net or
> how can I avoid this problem.
>
> Chinese field name is by design and probably not avoidable.
>
> Floyd
>
> 2009/4/25 Digy <di...@gmail.com>
>
> > I think, I found the bug. Here is the dump of the original index:
> >
> >
> >
> > NUMDOCS: 3
> >
> > MAXDOCS: 7
> >
> > DELETED(0): True
> >
> > DELETED(1): True
> >
> > DELETED(2): False
> >
> > DELETED(3): True
> >
> > DELETED(4): True
> >
> > DELETED(5): False
> >
> > DELETED(6): False
> >
> > TERM(0): _l_activationdatetime:552877632000000000
> >
> > TERM(1): _l_author:admin
> >
> > TERM(2): _l_bookmarkcount:0
> >
> > TERM(3): _l_clix:0
> >
> > TERM(4): _l_clix:1
> >
> > TERM(5): _l_creationdatetime:633427319866778624
> >
> > TERM(6): _l_creationdatetime:633427324812559872
> >
> > TERM(7): _l_creationdatetime:633760609388437504
> >
> > TERM(8): _l_deactivationdatetime:155377824000000000
> >
> > TERM(9): _l_deactivationdatetime:155378687999969792
> >
> > TERM(10): _l_document_class:1
> >
> > TERM(11): _l_document_class:98305
> >
> > TERM(12): _l_folder:163841
> >
> > TERM(13): _l_folder:163843
> >
> > TERM(14): _l_hidden:aaa
> >
> > TERM(15): _l_last_modified_datetime:633427319866778624
> >
> > TERM(16): _l_last_modified_datetime:633427324812559872
> >
> > TERM(17): _l_last_modified_datetime:633760609388437504
> >
> > TERM(18): _l_meta:abc
> >
> > TERM(19): _l_meta:abc.ppt
> >
> > TERM(20): _l_meta:ddx
> >
> > TERM(21): _l_meta:doc
> >
> > TERM(22): _l_meta:xyz
> >
> > TERM(23): _l_meta:名
> >
> > TERM(24): _l_meta:問
> >
> > TERM(25): _l_meta:有
> >
> > TERM(26): _l_meta:檔
> >
> > TERM(27): _l_meta:測
> >
> > TERM(28): _l_meta:看
> >
> > TERM(29): _l_meta:試
> >
> > TERM(30): _l_meta:還
> >
> > TERM(31): _l_meta:題
> >
> > TERM(32): _l_parentdocument:196609
> >
> > TERM(33): _l_parentdocument:327681
> >
> > TERM(34): _l_parentdocument:557057
> >
> > TERM(35): _l_ratingavg:0
> >
> > TERM(36): _l_ratingmedian:0
> >
> > TERM(37): _l_ratingstdev:0
> >
> > TERM(38): _l_ratingsum:0
> >
> > TERM(39): _l_read_permission:admin
> >
> > TERM(40): _l_rootdocument:196609
> >
> > TERM(41): _l_rootdocument:327681
> >
> > TERM(42): _l_rootdocument:557057
> >
> > TERM(43): _l_state:0
> >
> > TERM(44): _l_state:2
> >
> > TERM(45): _l_summary:2123456789
> >
> > TERM(46): _l_summary:abc
> >
> > TERM(47): _l_summary:abc.ppt
> >
> > TERM(48): _l_summary:ddx
> >
> > TERM(49): _l_summary:doc
> >
> > TERM(50): _l_summary:xyz
> >
> > TERM(51): _l_summary:有
> >
> > TERM(52): _l_summary:還
> >
> > TERM(53): _l_title:123
> >
> > TERM(54): _l_title:class
> >
> > TERM(55): _l_title:default
> >
> > TERM(56): _l_title:document
> >
> > TERM(57): _l_title:名
> >
> > TERM(58): _l_title:問
> >
> > TERM(59): _l_title:檔
> >
> > TERM(60): _l_title:測
> >
> > TERM(61): _l_title:看
> >
> > TERM(62): _l_title:試
> >
> > TERM(63): _l_title:題
> >
> > TERM(64): _l_unique_key:196609
> >
> > TERM(65): _l_unique_key:327681
> >
> > TERM(66): _l_unique_key:557057
> >
> > TERM(67): _l_version:1
> >
> > TERM(68): 作者:123
> >
> > TERM(69): 摘要:2123456789
> >
> > TERM(70): 摘要:abc
> >
> > TERM(71): 摘要:abc.ppt
> >
> > TERM(72): 摘要:ddx
> >
> > TERM(73): 摘要:doc
> >
> > TERM(74): 摘要:xyz
> >
> > TERM(75): 摘要:有
> >
> > TERM(76): 摘要:還
> >
> > TERM(77): 標題:123
> >
> > TERM(78): 標題:class
> >
> > TERM(79): 標題:default
> >
> > TERM(80): 標題:document
> >
> > TERM(81): 標題:名
> >
> > TERM(82): 標題:問
> >
> > TERM(83): 標題:檔
> >
> > TERM(84): 標題:測
> >
> > TERM(85): 標題:看
> >
> > TERM(86): 標題:試
> >
> > TERM(87): 標題:題
> >
> > TERM(88): 關鍵詞:123
> >
> >
> >
> >
> >
> >
> >
> > And here is a sample code: read docs from original index and then write
> to
> > an new one.
> >
> >
> >
> > void CreateNewIndex(string OrgIndex)
> >
> >        {
> >
> >            IndexReader reader = IndexReader.Open(OrgIndex);
> >
> >            IndexWriter writer = new IndexWriter("Floyd", new
> > Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
> >
> >
> >
> >            for (int i = 0; i < reader.MaxDoc(); i++)
> >
> >            {
> >
> >                if (reader.IsDeleted(i) == true) continue;
> >
> >
> >
> >                Lucene.Net.Documents.Document orgDoc =
>  reader.Document(i);
> >
> >                System.Collections.IList fields = orgDoc.GetFields();
> >
> >
> >
> >                Lucene.Net.Documents.Document newDoc = new Document();
> >
> >                foreach (Lucene.Net.Documents.Field field in fields)
> >
> >                {
> >
> >                    Lucene.Net.Documents.Field newField = new Field(
> >
> >                        System.Convert.ToBase64String(
> > System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
> >
> >                        //field.Name(), //ç
> >
> >                        field.StringValue(),
> >
> >                        field.IsStored() ?
> > Lucene.Net.Documents.Field.Store.YES :
> Lucene.Net.Documents.Field.Store.NO<
> http://lucene.net.documents.field.store.no/>
> > ,
> >
> >                        field.IsTokenized() ?
> > Lucene.Net.Documents.Field.Index.TOKENIZED :
> > Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
> >
> >
> >
> >                    newDoc.Add(newField);
> >
> >                }
> >
> >                writer.AddDocument(newDoc);
> >
> >            }
> >
> >
> >
> >            writer.Close();
> >
> >            reader.Close();
> >
> >        }
> >
> >
> >
> >
> >
> > If some field names are chinese, then Luke returns “read past EOF”. But
> if
> > those field names are replaced with non-chinese names, then it works.
> >
> >
> >
> > DIGY
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> > -----Original Message-----
> > From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> > Sent: Friday, April 24, 2009 8:53 PM
> > To: lucene-net-dev@incubator.apache.org
> >  Subject: Luke-0.9.x cannot open index files
> >
> >
> >
> >
> >
> > Digy,
> >
> >
> >
> > Some additional information from the discussion on the lucene-net-user
> list
> > with Floyd Wu.
> >
> >
> >
> >
> >
> > I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
> >
> >
> >
> > The Java equivalents of the two small test applications I use to inspect
> an
> > index and compact it, function identically to the .NET versions (that
> were
> > built with VS2005 and Lucene.NET 2.3.1).
> >
> >
> >
> > That Luke cannot open the index appears to be a problem within Luke.
> >
> > Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does
> not
> > flag the index as corrupt; and both the Java and .NET versions report the
> > same index content before and after the optimize operation.
> >
> >
> >
> >
> >
> > -- Neal
> >
> >
> >
> > **************************************************************
> >
> > Neal Granroth
> >
> > Software Engineer, Molecular Spectroscopy
> >
> > Thermo Fisher Scientific
> >
> > 5225 Verona Road, Madison, WI 53711
> >
> >
> >
> > neal.granroth@thermofisher.com
> >
> > Tel: 608-276-5645
> >
> > Fax: 608-276-6328
> >
> >
> >
> > www.thermofisher.com
> >
> >
> >
> > WORLDWIDE CONFIDENTIALITY NOTE: Dissemination, distribution or copying of
> > this e-mail or the information herein by anyone other than the intended
> > recipient, or an employee or agent of a system responsible for delivering
> > the message to the intended recipient, is prohibited. If you are not the
> > intended recipient, please inform the sender and delete all copies.
> >
> >
> >
> > -----Original Message-----
> >
> > From: Digy (JIRA) [mailto:jira@apache.org]
> >
> > Sent: Wednesday, April 08, 2009 6:28 PM
> >
> > To: lucene-net-dev@incubator.apache.org
> >
> > Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
> > compatible with ASP.NET <http://asp.net/> Medium Trust Level, in hosting
> > environments (like GoDaddy...)
> >
> >
> >
> >
> >
> >    [
> >
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335
> ]
> >
> >
> >
> > Digy commented on LUCENENET-169:
> >
> > --------------------------------
> >
> >
> >
> > Although you can overcome all of them somehow;
> >
> >
> >
> > * controlling the the lifetime of IndexWriter/IndexReader in a naturally
> > manner,
> >
> > * reopening the IndexReader only when needed using (for ex)
> > FileSystemWatcher,
> >
> > * providing a separation between data & bussiness layer,
> >
> > * providing other apps an interface that may want to write its own user
> > interface,
> >
> > * accessing a single search service from different web apps/from load
> > balanced web servers
> >
> > * controlling the lifetime of searching/indexing code (without being
> > effected by the restart of the IIS processes automatically when some
> memory
> > limit is exceeded (for ex.) )
> >
> > * Ability to access some system resources that can be restricted by IIS
> >
> > etc.
> >
> > make me think a separete search service is a better idea.But at last, it
> is
> > a design decision of you.
> >
> > (Think, A WebApp+Solr in Java world)
> >
> >
> >
> >
> >
> > DIGY
> >
> >
> >
> > > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/>
> Medium Trust Level, in hosting environments (like GoDaddy...)
> >
> > >
> >
> -----------------------------------------------------------------------------------------------------------------
> >
> > >
> >
> > >                 Key: LUCENENET-169
> >
> > >                 URL:
> https://issues.apache.org/jira/browse/LUCENENET-169
> >
> > >             Project: Lucene.Net
> >
> > >          Issue Type: Improvement
> >
> > >         Environment: ASP.NET <http://asp.net/>
> >
> > >            Reporter: Corey Trager
> >
> > >         Attachments: FSDirectory.patch
> >
> > >
> >
> > >
> >
> > > Microsoft has a configuration file for shared hosting for what they
> call
> > "Medium Trust".   There are a couple places in FSDirectory.cs  that
> violate
> > the restrictions of Medium Trust, but I coded workarounds, shown below.
> >
> > > #1)
> >
> > > // Corey Trager, Oct 2008: Commented call to GetTempPath to workaround
> > permission restrictions at shared host.
> >
> > > // LOCK_DIR isn't used anyway.
> >
> > > public static readonly System.String LOCK_DIR = null; //
> > SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> > System.IO.Path.GetTempPath());
> >
> > > #2)
> >
> > >               /// <summary>Returns an array of strings, one for each
> > Lucene index file in the directory. </summary>
> >
> > >               public override System.String[] List()
> >
> > >               {
> >
> > > /* Changes by Corey Trager, Oct 2008, to workaround permission
> > restrictions at shared host */
> >
> > >                System.IO.DirectoryInfo dir = new
> > System.IO.DirectoryInfo(directory.FullName);
> >
> > >               System.IO.FileInfo[] files = dir.GetFiles();
> >
> > >                 string[] list = new string[files.Length];
> >
> > >                 for (int i = 0; i < files.Length; i++)
> >
> > >                 {
> >
> > >                     list[i] = files[i].Name;
> >
> > >                 }
> >
> > >                 return list;
> >
> > > /* end of changes */
> >
> > > //            System.String[] files =
> > SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> > IndexFileNameFilter.GetFilter());
> >
> > > //            for (int i = 0; i < files.Length; i++)
> >
> > > //            {
> >
> > > //                System.IO.FileInfo fi = new
> > System.IO.FileInfo(files[i]);
> >
> > > //                files[i] = fi.Name;
> >
> > > //            }
> >
> > > //                      return files;
> >
> > >               }
> >
> >
> >
> > --
> >
> > This message is automatically generated by JIRA.
> >
> > -
> >
> > You can reply to this email to add a comment to the issue online.
> >
> >
>

Re: Luke-0.9.x cannot open index files

Posted by Floyd Wu <fl...@gmail.com>.

Hi Digy,
Thanks for your help.
But if chinese field name is the problem, will it be "fix" in Lucene.Net or
how can I avoid this problem.

Chinese field name is by design and probably not avoidable.

Floyd

2009/4/25 Digy <di...@gmail.com>

> I think, I found the bug. Here is the dump of the original index:
>
>
>
> NUMDOCS: 3
>
> MAXDOCS: 7
>
> DELETED(0): True
>
> DELETED(1): True
>
> DELETED(2): False
>
> DELETED(3): True
>
> DELETED(4): True
>
> DELETED(5): False
>
> DELETED(6): False
>
> TERM(0): _l_activationdatetime:552877632000000000
>
> TERM(1): _l_author:admin
>
> TERM(2): _l_bookmarkcount:0
>
> TERM(3): _l_clix:0
>
> TERM(4): _l_clix:1
>
> TERM(5): _l_creationdatetime:633427319866778624
>
> TERM(6): _l_creationdatetime:633427324812559872
>
> TERM(7): _l_creationdatetime:633760609388437504
>
> TERM(8): _l_deactivationdatetime:155377824000000000
>
> TERM(9): _l_deactivationdatetime:155378687999969792
>
> TERM(10): _l_document_class:1
>
> TERM(11): _l_document_class:98305
>
> TERM(12): _l_folder:163841
>
> TERM(13): _l_folder:163843
>
> TERM(14): _l_hidden:aaa
>
> TERM(15): _l_last_modified_datetime:633427319866778624
>
> TERM(16): _l_last_modified_datetime:633427324812559872
>
> TERM(17): _l_last_modified_datetime:633760609388437504
>
> TERM(18): _l_meta:abc
>
> TERM(19): _l_meta:abc.ppt
>
> TERM(20): _l_meta:ddx
>
> TERM(21): _l_meta:doc
>
> TERM(22): _l_meta:xyz
>
> TERM(23): _l_meta:名
>
> TERM(24): _l_meta:問
>
> TERM(25): _l_meta:有
>
> TERM(26): _l_meta:檔
>
> TERM(27): _l_meta:測
>
> TERM(28): _l_meta:看
>
> TERM(29): _l_meta:試
>
> TERM(30): _l_meta:還
>
> TERM(31): _l_meta:題
>
> TERM(32): _l_parentdocument:196609
>
> TERM(33): _l_parentdocument:327681
>
> TERM(34): _l_parentdocument:557057
>
> TERM(35): _l_ratingavg:0
>
> TERM(36): _l_ratingmedian:0
>
> TERM(37): _l_ratingstdev:0
>
> TERM(38): _l_ratingsum:0
>
> TERM(39): _l_read_permission:admin
>
> TERM(40): _l_rootdocument:196609
>
> TERM(41): _l_rootdocument:327681
>
> TERM(42): _l_rootdocument:557057
>
> TERM(43): _l_state:0
>
> TERM(44): _l_state:2
>
> TERM(45): _l_summary:2123456789
>
> TERM(46): _l_summary:abc
>
> TERM(47): _l_summary:abc.ppt
>
> TERM(48): _l_summary:ddx
>
> TERM(49): _l_summary:doc
>
> TERM(50): _l_summary:xyz
>
> TERM(51): _l_summary:有
>
> TERM(52): _l_summary:還
>
> TERM(53): _l_title:123
>
> TERM(54): _l_title:class
>
> TERM(55): _l_title:default
>
> TERM(56): _l_title:document
>
> TERM(57): _l_title:名
>
> TERM(58): _l_title:問
>
> TERM(59): _l_title:檔
>
> TERM(60): _l_title:測
>
> TERM(61): _l_title:看
>
> TERM(62): _l_title:試
>
> TERM(63): _l_title:題
>
> TERM(64): _l_unique_key:196609
>
> TERM(65): _l_unique_key:327681
>
> TERM(66): _l_unique_key:557057
>
> TERM(67): _l_version:1
>
> TERM(68): 作者:123
>
> TERM(69): 摘要:2123456789
>
> TERM(70): 摘要:abc
>
> TERM(71): 摘要:abc.ppt
>
> TERM(72): 摘要:ddx
>
> TERM(73): 摘要:doc
>
> TERM(74): 摘要:xyz
>
> TERM(75): 摘要:有
>
> TERM(76): 摘要:還
>
> TERM(77): 標題:123
>
> TERM(78): 標題:class
>
> TERM(79): 標題:default
>
> TERM(80): 標題:document
>
> TERM(81): 標題:名
>
> TERM(82): 標題:問
>
> TERM(83): 標題:檔
>
> TERM(84): 標題:測
>
> TERM(85): 標題:看
>
> TERM(86): 標題:試
>
> TERM(87): 標題:題
>
> TERM(88): 關鍵詞:123
>
>
>
>
>
>
>
> And here is a sample code: read docs from original index and then write to
> an new one.
>
>
>
> void CreateNewIndex(string OrgIndex)
>
>        {
>
>            IndexReader reader = IndexReader.Open(OrgIndex);
>
>            IndexWriter writer = new IndexWriter("Floyd", new
> Lucene.Net.Analysis.WhitespaceAnalyzer(),true);
>
>
>
>            for (int i = 0; i < reader.MaxDoc(); i++)
>
>            {
>
>                if (reader.IsDeleted(i) == true) continue;
>
>
>
>                Lucene.Net.Documents.Document orgDoc =  reader.Document(i);
>
>                System.Collections.IList fields = orgDoc.GetFields();
>
>
>
>                Lucene.Net.Documents.Document newDoc = new Document();
>
>                foreach (Lucene.Net.Documents.Field field in fields)
>
>                {
>
>                    Lucene.Net.Documents.Field newField = new Field(
>
>                        System.Convert.ToBase64String(
> System.Text.Encoding.UTF8.GetBytes(field.Name())), //ç
>
>                        //field.Name(), //ç
>
>                        field.StringValue(),
>
>                        field.IsStored() ?
> Lucene.Net.Documents.Field.Store.YES : Lucene.Net.Documents.Field.Store.NO<http://lucene.net.documents.field.store.no/>
> ,
>
>                        field.IsTokenized() ?
> Lucene.Net.Documents.Field.Index.TOKENIZED :
> Lucene.Net.Documents.Field.Index.UN_TOKENIZED);
>
>
>
>                    newDoc.Add(newField);
>
>                }
>
>                writer.AddDocument(newDoc);
>
>            }
>
>
>
>            writer.Close();
>
>            reader.Close();
>
>        }
>
>
>
>
>
> If some field names are chinese, then Luke returns “read past EOF”. But if
> those field names are replaced with non-chinese names, then it works.
>
>
>
> DIGY
>
>
>
>
>
>
>
>
>
>
>
> -----Original Message-----
> From: Granroth, Neal V. [mailto:neal.granroth@thermofisher.com]
> Sent: Friday, April 24, 2009 8:53 PM
> To: lucene-net-dev@incubator.apache.org
>  Subject: Luke-0.9.x cannot open index files
>
>
>
>
>
> Digy,
>
>
>
> Some additional information from the discussion on the lucene-net-user list
> with Floyd Wu.
>
>
>
>
>
> I ran some further tests using Java Lucene 2.3.2 and JDK 1.5.
>
>
>
> The Java equivalents of the two small test applications I use to inspect an
> index and compact it, function identically to the .NET versions (that were
> built with VS2005 and Lucene.NET 2.3.1).
>
>
>
> That Luke cannot open the index appears to be a problem within Luke.
>
> Even if Floyd's index contains some odd entries, Java Lucene 2.3.2 does not
> flag the index as corrupt; and both the Java and .NET versions report the
> same index content before and after the optimize operation.
>
>
>
>
>
> -- Neal
>
>
>
> **************************************************************
>
> Neal Granroth
>
> Software Engineer, Molecular Spectroscopy
>
> Thermo Fisher Scientific
>
> 5225 Verona Road, Madison, WI 53711
>
>
>
> neal.granroth@thermofisher.com
>
> Tel: 608-276-5645
>
> Fax: 608-276-6328
>
>
>
> www.thermofisher.com
>
>
>
> WORLDWIDE CONFIDENTIALITY NOTE: Dissemination, distribution or copying of
> this e-mail or the information herein by anyone other than the intended
> recipient, or an employee or agent of a system responsible for delivering
> the message to the intended recipient, is prohibited. If you are not the
> intended recipient, please inform the sender and delete all copies.
>
>
>
> -----Original Message-----
>
> From: Digy (JIRA) [mailto:jira@apache.org]
>
> Sent: Wednesday, April 08, 2009 6:28 PM
>
> To: lucene-net-dev@incubator.apache.org
>
> Subject: [jira] Commented: (LUCENENET-169) Changes to make Lucene.NET
> compatible with ASP.NET <http://asp.net/> Medium Trust Level, in hosting
> environments (like GoDaddy...)
>
>
>
>
>
>    [
> https://issues.apache.org/jira/browse/LUCENENET-169?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12697335#action_12697335]
>
>
>
> Digy commented on LUCENENET-169:
>
> --------------------------------
>
>
>
> Although you can overcome all of them somehow;
>
>
>
> * controlling the the lifetime of IndexWriter/IndexReader in a naturally
> manner,
>
> * reopening the IndexReader only when needed using (for ex)
> FileSystemWatcher,
>
> * providing a separation between data & bussiness layer,
>
> * providing other apps an interface that may want to write its own user
> interface,
>
> * accessing a single search service from different web apps/from load
> balanced web servers
>
> * controlling the lifetime of searching/indexing code (without being
> effected by the restart of the IIS processes automatically when some memory
> limit is exceeded (for ex.) )
>
> * Ability to access some system resources that can be restricted by IIS
>
> etc.
>
> make me think a separete search service is a better idea.But at last, it is
> a design decision of you.
>
> (Think, A WebApp+Solr in Java world)
>
>
>
>
>
> DIGY
>
>
>
> > Changes to make Lucene.NET compatible with ASP.NET <http://asp.net/> Medium Trust Level, in hosting environments (like GoDaddy...)
>
> >
> -----------------------------------------------------------------------------------------------------------------
>
> >
>
> >                 Key: LUCENENET-169
>
> >                 URL: https://issues.apache.org/jira/browse/LUCENENET-169
>
> >             Project: Lucene.Net
>
> >          Issue Type: Improvement
>
> >         Environment: ASP.NET <http://asp.net/>
>
> >            Reporter: Corey Trager
>
> >         Attachments: FSDirectory.patch
>
> >
>
> >
>
> > Microsoft has a configuration file for shared hosting for what they call
> "Medium Trust".   There are a couple places in FSDirectory.cs  that violate
> the restrictions of Medium Trust, but I coded workarounds, shown below.
>
> > #1)
>
> > // Corey Trager, Oct 2008: Commented call to GetTempPath to workaround
> permission restrictions at shared host.
>
> > // LOCK_DIR isn't used anyway.
>
> > public static readonly System.String LOCK_DIR = null; //
> SupportClass.AppSettings.Get("Lucene.Net.lockDir",
> System.IO.Path.GetTempPath());
>
> > #2)
>
> >               /// <summary>Returns an array of strings, one for each
> Lucene index file in the directory. </summary>
>
> >               public override System.String[] List()
>
> >               {
>
> > /* Changes by Corey Trager, Oct 2008, to workaround permission
> restrictions at shared host */
>
> >                System.IO.DirectoryInfo dir = new
> System.IO.DirectoryInfo(directory.FullName);
>
> >               System.IO.FileInfo[] files = dir.GetFiles();
>
> >                 string[] list = new string[files.Length];
>
> >                 for (int i = 0; i < files.Length; i++)
>
> >                 {
>
> >                     list[i] = files[i].Name;
>
> >                 }
>
> >                 return list;
>
> > /* end of changes */
>
> > //            System.String[] files =
> SupportClass.FileSupport.GetLuceneIndexFiles(directory.FullName,
> IndexFileNameFilter.GetFilter());
>
> > //            for (int i = 0; i < files.Length; i++)
>
> > //            {
>
> > //                System.IO.FileInfo fi = new
> System.IO.FileInfo(files[i]);
>
> > //                files[i] = fi.Name;
>
> > //            }
>
> > //                      return files;
>
> >               }
>
>
>
> --
>
> This message is automatically generated by JIRA.
>
> -
>
> You can reply to this email to add a comment to the issue online.
>
>