You are viewing a plain text version of this content. The canonical link for it is here.

Posted to java-user@lucene.apache.org by manjula wijewickrema <ma...@gmail.com> on 2010/05/17 13:23:28 UTC

Problem of getTermFrequencies()

Hi,

I wrote a code with a view to display the indexed terms and get their term
frequencies of a single document. Although it displys those terms in the
index, it does not give the term frequencies. Instead it displays ' frequencies
are:[I@80fa6f '. What's the reason for this. The code I have written and the
display, can be given as follows.

Code:

 *

import* org.apache.lucene.analysis.standard.StandardAnalyzer;
*

import* org.apache.lucene.document.Document;
*

import* org.apache.lucene.document.Field;
*

import* org.apache.lucene.index.IndexWriter;
*

import* org.apache.lucene.index.IndexReader;
*

import* org.apache.lucene.queryParser.ParseException;
*

import* org.apache.lucene.queryParser.QueryParser;
*

import* org.apache.lucene.search.*;
*

import* org.apache.lucene.store.Directory;
*

import* org.apache.lucene.store.RAMDirectory;
*

import* org.apache.lucene.util.Version;
*

import* org.apache.lucene.index.TermFreqVector;

*

import* java.io.BufferedReader;
*

import* java.io.FileReader;
*

import* java.io.IOException;
*

import* org.apache.lucene.analysis.StopAnalyzer;
*

import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;


*

public* *class* Testing{

*

public* *static* *void* main(String[] args) *throws* IOException,
ParseException {

//StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
ENGLISH_STOP_WORDS);

*try*{

Directory directory=*new* RAMDirectory();

IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,

IndexWriter.MaxFieldLength.*UNLIMITED*);

Document doc = *new* Document();

String text="This is a sample codes code for testing lucene's capabilities
over lucene term frequencies";

doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
,Field.TermVector.*YES*));

w.addDocument(doc);

w.close();

IndexReader ir=IndexReader.open(directory);

TermFreqVector[] tfv=ir.getTermFreqVectors(0);

// for (int xy = 0; xy < tfv.length; xy++) {

String[] terms = tfv[0].getTerms();

*int*[] freqs=tfv[0].getTermFrequencies();

//System.out.println("terms are:"+tfv[xy]);

//System.out.println("length is:"+terms.length);

System.*out*.println("array terms are:"+tfv[0]);

System.*out*.println("terms are:"+terms);

System.*out*.println("frequencies are:"+freqs);

// }

 }*catch*(Exception ex){

ex.printStackTrace();

}

}

}



Display:

array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
sampl/1, term/1, test/1}

terms are:[Ljava.lang.String;@1e13d52

frequencies are:[I@80fa6f



If some body can pls. help me to get the desired output.

Thanx,

Manjula.

Re: Problem of getTermFrequencies()

Posted by manjula wijewickrema <ma...@gmail.com>.

Thanx

On Mon, May 17, 2010 at 10:19 PM, Grant Ingersoll <gs...@apache.org>wrote:

> Note, depending on your downstream use, you may consider using a
> TermVectorMapper that allows you to construct your own data structures as
> needed.
>
> -Grant
>
> On May 17, 2010, at 3:16 PM, Ian Lea wrote:
>
> > terms and freqs are arrays.  Try terms[i] and freqs[i].
> >
> >
> > --
> > Ian.
> >
> >
> > On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> > <ma...@gmail.com> wrote:
> >> Hi,
> >>
> >> I wrote a code with a view to display the indexed terms and get their
> term
> >> frequencies of a single document. Although it displys those terms in the
> >> index, it does not give the term frequencies. Instead it displays '
> frequencies
> >> are:[I@80fa6f '. What's the reason for this. The code I have written
> and the
> >> display, can be given as follows.
> >>
> >> Code:
> >>
> >>  *
> >>
> >> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> >> *
> >>
> >> import* org.apache.lucene.document.Document;
> >> *
> >>
> >> import* org.apache.lucene.document.Field;
> >> *
> >>
> >> import* org.apache.lucene.index.IndexWriter;
> >> *
> >>
> >> import* org.apache.lucene.index.IndexReader;
> >> *
> >>
> >> import* org.apache.lucene.queryParser.ParseException;
> >> *
> >>
> >> import* org.apache.lucene.queryParser.QueryParser;
> >> *
> >>
> >> import* org.apache.lucene.search.*;
> >> *
> >>
> >> import* org.apache.lucene.store.Directory;
> >> *
> >>
> >> import* org.apache.lucene.store.RAMDirectory;
> >> *
> >>
> >> import* org.apache.lucene.util.Version;
> >> *
> >>
> >> import* org.apache.lucene.index.TermFreqVector;
> >>
> >> *
> >>
> >> import* java.io.BufferedReader;
> >> *
> >>
> >> import* java.io.FileReader;
> >> *
> >>
> >> import* java.io.IOException;
> >> *
> >>
> >> import* org.apache.lucene.analysis.StopAnalyzer;
> >> *
> >>
> >> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
> >>
> >>
> >> *
> >>
> >> public* *class* Testing{
> >>
> >> *
> >>
> >> public* *static* *void* main(String[] args) *throws* IOException,
> >> ParseException {
> >>
> >> //StandardAnalyzer analyzer = new
> StandardAnalyzer(Version.LUCENE_CURRENT);
> >>
> >> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English",
> StopAnalyzer.
> >> ENGLISH_STOP_WORDS);
> >>
> >> *try*{
> >>
> >> Directory directory=*new* RAMDirectory();
> >>
> >> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
> >>
> >> IndexWriter.MaxFieldLength.*UNLIMITED*);
> >>
> >> Document doc = *new* Document();
> >>
> >> String text="This is a sample codes code for testing lucene's
> capabilities
> >> over lucene term frequencies";
> >>
> >> doc.add(*new* Field("title", text, Field.Store.*YES*,
> Field.Index.*ANALYZED*
> >> ,Field.TermVector.*YES*));
> >>
> >> w.addDocument(doc);
> >>
> >> w.close();
> >>
> >> IndexReader ir=IndexReader.open(directory);
> >>
> >> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
> >>
> >> // for (int xy = 0; xy < tfv.length; xy++) {
> >>
> >> String[] terms = tfv[0].getTerms();
> >>
> >> *int*[] freqs=tfv[0].getTermFrequencies();
> >>
> >> //System.out.println("terms are:"+tfv[xy]);
> >>
> >> //System.out.println("length is:"+terms.length);
> >>
> >> System.*out*.println("array terms are:"+tfv[0]);
> >>
> >> System.*out*.println("terms are:"+terms);
> >>
> >> System.*out*.println("frequencies are:"+freqs);
> >>
> >> // }
> >>
> >>  }*catch*(Exception ex){
> >>
> >> ex.printStackTrace();
> >>
> >> }
> >>
> >> }
> >>
> >> }
> >>
> >>
> >>
> >> Display:
> >>
> >> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> >> sampl/1, term/1, test/1}
> >>
> >> terms are:[Ljava.lang.String;@1e13d52
> >>
> >> frequencies are:[I@80fa6f
> >>
> >>
> >>
> >> If some body can pls. help me to get the desired output.
> >>
> >> Thanx,
> >>
> >> Manjula.
> >>
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> > For additional commands, e-mail: java-user-help@lucene.apache.org
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>

Re: Problem of getTermFrequencies()

Posted by Grant Ingersoll <gs...@apache.org>.

Note, depending on your downstream use, you may consider using a TermVectorMapper that allows you to construct your own data structures as needed.

-Grant

On May 17, 2010, at 3:16 PM, Ian Lea wrote:

> terms and freqs are arrays.  Try terms[i] and freqs[i].
> 
> 
> --
> Ian.
> 
> 
> On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> <ma...@gmail.com> wrote:
>> Hi,
>> 
>> I wrote a code with a view to display the indexed terms and get their term
>> frequencies of a single document. Although it displys those terms in the
>> index, it does not give the term frequencies. Instead it displays ' frequencies
>> are:[I@80fa6f '. What's the reason for this. The code I have written and the
>> display, can be given as follows.
>> 
>> Code:
>> 
>>  *
>> 
>> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
>> *
>> 
>> import* org.apache.lucene.document.Document;
>> *
>> 
>> import* org.apache.lucene.document.Field;
>> *
>> 
>> import* org.apache.lucene.index.IndexWriter;
>> *
>> 
>> import* org.apache.lucene.index.IndexReader;
>> *
>> 
>> import* org.apache.lucene.queryParser.ParseException;
>> *
>> 
>> import* org.apache.lucene.queryParser.QueryParser;
>> *
>> 
>> import* org.apache.lucene.search.*;
>> *
>> 
>> import* org.apache.lucene.store.Directory;
>> *
>> 
>> import* org.apache.lucene.store.RAMDirectory;
>> *
>> 
>> import* org.apache.lucene.util.Version;
>> *
>> 
>> import* org.apache.lucene.index.TermFreqVector;
>> 
>> *
>> 
>> import* java.io.BufferedReader;
>> *
>> 
>> import* java.io.FileReader;
>> *
>> 
>> import* java.io.IOException;
>> *
>> 
>> import* org.apache.lucene.analysis.StopAnalyzer;
>> *
>> 
>> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
>> 
>> 
>> *
>> 
>> public* *class* Testing{
>> 
>> *
>> 
>> public* *static* *void* main(String[] args) *throws* IOException,
>> ParseException {
>> 
>> //StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
>> 
>> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
>> ENGLISH_STOP_WORDS);
>> 
>> *try*{
>> 
>> Directory directory=*new* RAMDirectory();
>> 
>> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
>> 
>> IndexWriter.MaxFieldLength.*UNLIMITED*);
>> 
>> Document doc = *new* Document();
>> 
>> String text="This is a sample codes code for testing lucene's capabilities
>> over lucene term frequencies";
>> 
>> doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
>> ,Field.TermVector.*YES*));
>> 
>> w.addDocument(doc);
>> 
>> w.close();
>> 
>> IndexReader ir=IndexReader.open(directory);
>> 
>> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
>> 
>> // for (int xy = 0; xy < tfv.length; xy++) {
>> 
>> String[] terms = tfv[0].getTerms();
>> 
>> *int*[] freqs=tfv[0].getTermFrequencies();
>> 
>> //System.out.println("terms are:"+tfv[xy]);
>> 
>> //System.out.println("length is:"+terms.length);
>> 
>> System.*out*.println("array terms are:"+tfv[0]);
>> 
>> System.*out*.println("terms are:"+terms);
>> 
>> System.*out*.println("frequencies are:"+freqs);
>> 
>> // }
>> 
>>  }*catch*(Exception ex){
>> 
>> ex.printStackTrace();
>> 
>> }
>> 
>> }
>> 
>> }
>> 
>> 
>> 
>> Display:
>> 
>> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
>> sampl/1, term/1, test/1}
>> 
>> terms are:[Ljava.lang.String;@1e13d52
>> 
>> frequencies are:[I@80fa6f
>> 
>> 
>> 
>> If some body can pls. help me to get the desired output.
>> 
>> Thanx,
>> 
>> Manjula.
>> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org

Re: Problem of getTermFrequencies()

Posted by manjula wijewickrema <ma...@gmail.com>.

Dear Ian,

I changed it as you said and now it is working nicely. Thanks a lot for your
kind help.

Manjula

On Mon, May 17, 2010 at 6:46 PM, Ian Lea <ia...@gmail.com> wrote:

> terms and freqs are arrays.  Try terms[i] and freqs[i].
>
>
> --
> Ian.
>
>
> On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> <ma...@gmail.com> wrote:
> > Hi,
> >
> > I wrote a code with a view to display the indexed terms and get their
> term
> > frequencies of a single document. Although it displys those terms in the
> > index, it does not give the term frequencies. Instead it displays '
> frequencies
> > are:[I@80fa6f '. What's the reason for this. The code I have written and
> the
> > display, can be given as follows.
> >
> > Code:
> >
> >  *
> >
> > import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> > *
> >
> > import* org.apache.lucene.document.Document;
> > *
> >
> > import* org.apache.lucene.document.Field;
> > *
> >
> > import* org.apache.lucene.index.IndexWriter;
> > *
> >
> > import* org.apache.lucene.index.IndexReader;
> > *
> >
> > import* org.apache.lucene.queryParser.ParseException;
> > *
> >
> > import* org.apache.lucene.queryParser.QueryParser;
> > *
> >
> > import* org.apache.lucene.search.*;
> > *
> >
> > import* org.apache.lucene.store.Directory;
> > *
> >
> > import* org.apache.lucene.store.RAMDirectory;
> > *
> >
> > import* org.apache.lucene.util.Version;
> > *
> >
> > import* org.apache.lucene.index.TermFreqVector;
> >
> > *
> >
> > import* java.io.BufferedReader;
> > *
> >
> > import* java.io.FileReader;
> > *
> >
> > import* java.io.IOException;
> > *
> >
> > import* org.apache.lucene.analysis.StopAnalyzer;
> > *
> >
> > import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
> >
> >
> > *
> >
> > public* *class* Testing{
> >
> > *
> >
> > public* *static* *void* main(String[] args) *throws* IOException,
> > ParseException {
> >
> > //StandardAnalyzer analyzer = new
> StandardAnalyzer(Version.LUCENE_CURRENT);
> >
> > SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English",
> StopAnalyzer.
> > ENGLISH_STOP_WORDS);
> >
> > *try*{
> >
> > Directory directory=*new* RAMDirectory();
> >
> > IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
> >
> > IndexWriter.MaxFieldLength.*UNLIMITED*);
> >
> > Document doc = *new* Document();
> >
> > String text="This is a sample codes code for testing lucene's
> capabilities
> > over lucene term frequencies";
> >
> > doc.add(*new* Field("title", text, Field.Store.*YES*,
> Field.Index.*ANALYZED*
> > ,Field.TermVector.*YES*));
> >
> > w.addDocument(doc);
> >
> > w.close();
> >
> > IndexReader ir=IndexReader.open(directory);
> >
> > TermFreqVector[] tfv=ir.getTermFreqVectors(0);
> >
> > // for (int xy = 0; xy < tfv.length; xy++) {
> >
> > String[] terms = tfv[0].getTerms();
> >
> > *int*[] freqs=tfv[0].getTermFrequencies();
> >
> > //System.out.println("terms are:"+tfv[xy]);
> >
> > //System.out.println("length is:"+terms.length);
> >
> > System.*out*.println("array terms are:"+tfv[0]);
> >
> > System.*out*.println("terms are:"+terms);
> >
> > System.*out*.println("frequencies are:"+freqs);
> >
> > // }
> >
> >  }*catch*(Exception ex){
> >
> > ex.printStackTrace();
> >
> > }
> >
> > }
> >
> > }
> >
> >
> >
> > Display:
> >
> > array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> > sampl/1, term/1, test/1}
> >
> > terms are:[Ljava.lang.String;@1e13d52
> >
> > frequencies are:[I@80fa6f
> >
> >
> >
> > If some body can pls. help me to get the desired output.
> >
> > Thanx,
> >
> > Manjula.
> >
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>

Re: Problem of getTermFrequencies()

Posted by Ian Lea <ia...@gmail.com>.

terms and freqs are arrays.  Try terms[i] and freqs[i].


--
Ian.


On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
<ma...@gmail.com> wrote:
> Hi,
>
> I wrote a code with a view to display the indexed terms and get their term
> frequencies of a single document. Although it displys those terms in the
> index, it does not give the term frequencies. Instead it displays ' frequencies
> are:[I@80fa6f '. What's the reason for this. The code I have written and the
> display, can be given as follows.
>
> Code:
>
>  *
>
> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> *
>
> import* org.apache.lucene.document.Document;
> *
>
> import* org.apache.lucene.document.Field;
> *
>
> import* org.apache.lucene.index.IndexWriter;
> *
>
> import* org.apache.lucene.index.IndexReader;
> *
>
> import* org.apache.lucene.queryParser.ParseException;
> *
>
> import* org.apache.lucene.queryParser.QueryParser;
> *
>
> import* org.apache.lucene.search.*;
> *
>
> import* org.apache.lucene.store.Directory;
> *
>
> import* org.apache.lucene.store.RAMDirectory;
> *
>
> import* org.apache.lucene.util.Version;
> *
>
> import* org.apache.lucene.index.TermFreqVector;
>
> *
>
> import* java.io.BufferedReader;
> *
>
> import* java.io.FileReader;
> *
>
> import* java.io.IOException;
> *
>
> import* org.apache.lucene.analysis.StopAnalyzer;
> *
>
> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
>
>
> *
>
> public* *class* Testing{
>
> *
>
> public* *static* *void* main(String[] args) *throws* IOException,
> ParseException {
>
> //StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
>
> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
> ENGLISH_STOP_WORDS);
>
> *try*{
>
> Directory directory=*new* RAMDirectory();
>
> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
>
> IndexWriter.MaxFieldLength.*UNLIMITED*);
>
> Document doc = *new* Document();
>
> String text="This is a sample codes code for testing lucene's capabilities
> over lucene term frequencies";
>
> doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
> ,Field.TermVector.*YES*));
>
> w.addDocument(doc);
>
> w.close();
>
> IndexReader ir=IndexReader.open(directory);
>
> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
>
> // for (int xy = 0; xy < tfv.length; xy++) {
>
> String[] terms = tfv[0].getTerms();
>
> *int*[] freqs=tfv[0].getTermFrequencies();
>
> //System.out.println("terms are:"+tfv[xy]);
>
> //System.out.println("length is:"+terms.length);
>
> System.*out*.println("array terms are:"+tfv[0]);
>
> System.*out*.println("terms are:"+terms);
>
> System.*out*.println("frequencies are:"+freqs);
>
> // }
>
>  }*catch*(Exception ex){
>
> ex.printStackTrace();
>
> }
>
> }
>
> }
>
>
>
> Display:
>
> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> sampl/1, term/1, test/1}
>
> terms are:[Ljava.lang.String;@1e13d52
>
> frequencies are:[I@80fa6f
>
>
>
> If some body can pls. help me to get the desired output.
>
> Thanx,
>
> Manjula.
>

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org