You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by manjula wijewickrema <ma...@gmail.com> on 2010/05/17 13:23:28 UTC
Problem of getTermFrequencies()
Hi,
I wrote a code with a view to display the indexed terms and get their term
frequencies of a single document. Although it displys those terms in the
index, it does not give the term frequencies. Instead it displays ' frequencies
are:[I@80fa6f '. What's the reason for this. The code I have written and the
display, can be given as follows.
Code:
*
import* org.apache.lucene.analysis.standard.StandardAnalyzer;
*
import* org.apache.lucene.document.Document;
*
import* org.apache.lucene.document.Field;
*
import* org.apache.lucene.index.IndexWriter;
*
import* org.apache.lucene.index.IndexReader;
*
import* org.apache.lucene.queryParser.ParseException;
*
import* org.apache.lucene.queryParser.QueryParser;
*
import* org.apache.lucene.search.*;
*
import* org.apache.lucene.store.Directory;
*
import* org.apache.lucene.store.RAMDirectory;
*
import* org.apache.lucene.util.Version;
*
import* org.apache.lucene.index.TermFreqVector;
*
import* java.io.BufferedReader;
*
import* java.io.FileReader;
*
import* java.io.IOException;
*
import* org.apache.lucene.analysis.StopAnalyzer;
*
import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
*
public* *class* Testing{
*
public* *static* *void* main(String[] args) *throws* IOException,
ParseException {
//StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
ENGLISH_STOP_WORDS);
*try*{
Directory directory=*new* RAMDirectory();
IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
IndexWriter.MaxFieldLength.*UNLIMITED*);
Document doc = *new* Document();
String text="This is a sample codes code for testing lucene's capabilities
over lucene term frequencies";
doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
,Field.TermVector.*YES*));
w.addDocument(doc);
w.close();
IndexReader ir=IndexReader.open(directory);
TermFreqVector[] tfv=ir.getTermFreqVectors(0);
// for (int xy = 0; xy < tfv.length; xy++) {
String[] terms = tfv[0].getTerms();
*int*[] freqs=tfv[0].getTermFrequencies();
//System.out.println("terms are:"+tfv[xy]);
//System.out.println("length is:"+terms.length);
System.*out*.println("array terms are:"+tfv[0]);
System.*out*.println("terms are:"+terms);
System.*out*.println("frequencies are:"+freqs);
// }
}*catch*(Exception ex){
ex.printStackTrace();
}
}
}
Display:
array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
sampl/1, term/1, test/1}
terms are:[Ljava.lang.String;@1e13d52
frequencies are:[I@80fa6f
If some body can pls. help me to get the desired output.
Thanx,
Manjula.
Re: Problem of getTermFrequencies()
Posted by manjula wijewickrema <ma...@gmail.com>.
Thanx
On Mon, May 17, 2010 at 10:19 PM, Grant Ingersoll <gs...@apache.org>wrote:
> Note, depending on your downstream use, you may consider using a
> TermVectorMapper that allows you to construct your own data structures as
> needed.
>
> -Grant
>
> On May 17, 2010, at 3:16 PM, Ian Lea wrote:
>
> > terms and freqs are arrays. Try terms[i] and freqs[i].
> >
> >
> > --
> > Ian.
> >
> >
> > On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> > <ma...@gmail.com> wrote:
> >> Hi,
> >>
> >> I wrote a code with a view to display the indexed terms and get their
> term
> >> frequencies of a single document. Although it displys those terms in the
> >> index, it does not give the term frequencies. Instead it displays '
> frequencies
> >> are:[I@80fa6f '. What's the reason for this. The code I have written
> and the
> >> display, can be given as follows.
> >>
> >> Code:
> >>
> >> *
> >>
> >> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> >> *
> >>
> >> import* org.apache.lucene.document.Document;
> >> *
> >>
> >> import* org.apache.lucene.document.Field;
> >> *
> >>
> >> import* org.apache.lucene.index.IndexWriter;
> >> *
> >>
> >> import* org.apache.lucene.index.IndexReader;
> >> *
> >>
> >> import* org.apache.lucene.queryParser.ParseException;
> >> *
> >>
> >> import* org.apache.lucene.queryParser.QueryParser;
> >> *
> >>
> >> import* org.apache.lucene.search.*;
> >> *
> >>
> >> import* org.apache.lucene.store.Directory;
> >> *
> >>
> >> import* org.apache.lucene.store.RAMDirectory;
> >> *
> >>
> >> import* org.apache.lucene.util.Version;
> >> *
> >>
> >> import* org.apache.lucene.index.TermFreqVector;
> >>
> >> *
> >>
> >> import* java.io.BufferedReader;
> >> *
> >>
> >> import* java.io.FileReader;
> >> *
> >>
> >> import* java.io.IOException;
> >> *
> >>
> >> import* org.apache.lucene.analysis.StopAnalyzer;
> >> *
> >>
> >> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
> >>
> >>
> >> *
> >>
> >> public* *class* Testing{
> >>
> >> *
> >>
> >> public* *static* *void* main(String[] args) *throws* IOException,
> >> ParseException {
> >>
> >> //StandardAnalyzer analyzer = new
> StandardAnalyzer(Version.LUCENE_CURRENT);
> >>
> >> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English",
> StopAnalyzer.
> >> ENGLISH_STOP_WORDS);
> >>
> >> *try*{
> >>
> >> Directory directory=*new* RAMDirectory();
> >>
> >> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
> >>
> >> IndexWriter.MaxFieldLength.*UNLIMITED*);
> >>
> >> Document doc = *new* Document();
> >>
> >> String text="This is a sample codes code for testing lucene's
> capabilities
> >> over lucene term frequencies";
> >>
> >> doc.add(*new* Field("title", text, Field.Store.*YES*,
> Field.Index.*ANALYZED*
> >> ,Field.TermVector.*YES*));
> >>
> >> w.addDocument(doc);
> >>
> >> w.close();
> >>
> >> IndexReader ir=IndexReader.open(directory);
> >>
> >> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
> >>
> >> // for (int xy = 0; xy < tfv.length; xy++) {
> >>
> >> String[] terms = tfv[0].getTerms();
> >>
> >> *int*[] freqs=tfv[0].getTermFrequencies();
> >>
> >> //System.out.println("terms are:"+tfv[xy]);
> >>
> >> //System.out.println("length is:"+terms.length);
> >>
> >> System.*out*.println("array terms are:"+tfv[0]);
> >>
> >> System.*out*.println("terms are:"+terms);
> >>
> >> System.*out*.println("frequencies are:"+freqs);
> >>
> >> // }
> >>
> >> }*catch*(Exception ex){
> >>
> >> ex.printStackTrace();
> >>
> >> }
> >>
> >> }
> >>
> >> }
> >>
> >>
> >>
> >> Display:
> >>
> >> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> >> sampl/1, term/1, test/1}
> >>
> >> terms are:[Ljava.lang.String;@1e13d52
> >>
> >> frequencies are:[I@80fa6f
> >>
> >>
> >>
> >> If some body can pls. help me to get the desired output.
> >>
> >> Thanx,
> >>
> >> Manjula.
> >>
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> > For additional commands, e-mail: java-user-help@lucene.apache.org
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>
Re: Problem of getTermFrequencies()
Posted by Grant Ingersoll <gs...@apache.org>.
Note, depending on your downstream use, you may consider using a TermVectorMapper that allows you to construct your own data structures as needed.
-Grant
On May 17, 2010, at 3:16 PM, Ian Lea wrote:
> terms and freqs are arrays. Try terms[i] and freqs[i].
>
>
> --
> Ian.
>
>
> On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> <ma...@gmail.com> wrote:
>> Hi,
>>
>> I wrote a code with a view to display the indexed terms and get their term
>> frequencies of a single document. Although it displys those terms in the
>> index, it does not give the term frequencies. Instead it displays ' frequencies
>> are:[I@80fa6f '. What's the reason for this. The code I have written and the
>> display, can be given as follows.
>>
>> Code:
>>
>> *
>>
>> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
>> *
>>
>> import* org.apache.lucene.document.Document;
>> *
>>
>> import* org.apache.lucene.document.Field;
>> *
>>
>> import* org.apache.lucene.index.IndexWriter;
>> *
>>
>> import* org.apache.lucene.index.IndexReader;
>> *
>>
>> import* org.apache.lucene.queryParser.ParseException;
>> *
>>
>> import* org.apache.lucene.queryParser.QueryParser;
>> *
>>
>> import* org.apache.lucene.search.*;
>> *
>>
>> import* org.apache.lucene.store.Directory;
>> *
>>
>> import* org.apache.lucene.store.RAMDirectory;
>> *
>>
>> import* org.apache.lucene.util.Version;
>> *
>>
>> import* org.apache.lucene.index.TermFreqVector;
>>
>> *
>>
>> import* java.io.BufferedReader;
>> *
>>
>> import* java.io.FileReader;
>> *
>>
>> import* java.io.IOException;
>> *
>>
>> import* org.apache.lucene.analysis.StopAnalyzer;
>> *
>>
>> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
>>
>>
>> *
>>
>> public* *class* Testing{
>>
>> *
>>
>> public* *static* *void* main(String[] args) *throws* IOException,
>> ParseException {
>>
>> //StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
>>
>> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
>> ENGLISH_STOP_WORDS);
>>
>> *try*{
>>
>> Directory directory=*new* RAMDirectory();
>>
>> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
>>
>> IndexWriter.MaxFieldLength.*UNLIMITED*);
>>
>> Document doc = *new* Document();
>>
>> String text="This is a sample codes code for testing lucene's capabilities
>> over lucene term frequencies";
>>
>> doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
>> ,Field.TermVector.*YES*));
>>
>> w.addDocument(doc);
>>
>> w.close();
>>
>> IndexReader ir=IndexReader.open(directory);
>>
>> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
>>
>> // for (int xy = 0; xy < tfv.length; xy++) {
>>
>> String[] terms = tfv[0].getTerms();
>>
>> *int*[] freqs=tfv[0].getTermFrequencies();
>>
>> //System.out.println("terms are:"+tfv[xy]);
>>
>> //System.out.println("length is:"+terms.length);
>>
>> System.*out*.println("array terms are:"+tfv[0]);
>>
>> System.*out*.println("terms are:"+terms);
>>
>> System.*out*.println("frequencies are:"+freqs);
>>
>> // }
>>
>> }*catch*(Exception ex){
>>
>> ex.printStackTrace();
>>
>> }
>>
>> }
>>
>> }
>>
>>
>>
>> Display:
>>
>> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
>> sampl/1, term/1, test/1}
>>
>> terms are:[Ljava.lang.String;@1e13d52
>>
>> frequencies are:[I@80fa6f
>>
>>
>>
>> If some body can pls. help me to get the desired output.
>>
>> Thanx,
>>
>> Manjula.
>>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org
Re: Problem of getTermFrequencies()
Posted by manjula wijewickrema <ma...@gmail.com>.
Dear Ian,
I changed it as you said and now it is working nicely. Thanks a lot for your
kind help.
Manjula
On Mon, May 17, 2010 at 6:46 PM, Ian Lea <ia...@gmail.com> wrote:
> terms and freqs are arrays. Try terms[i] and freqs[i].
>
>
> --
> Ian.
>
>
> On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
> <ma...@gmail.com> wrote:
> > Hi,
> >
> > I wrote a code with a view to display the indexed terms and get their
> term
> > frequencies of a single document. Although it displys those terms in the
> > index, it does not give the term frequencies. Instead it displays '
> frequencies
> > are:[I@80fa6f '. What's the reason for this. The code I have written and
> the
> > display, can be given as follows.
> >
> > Code:
> >
> > *
> >
> > import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> > *
> >
> > import* org.apache.lucene.document.Document;
> > *
> >
> > import* org.apache.lucene.document.Field;
> > *
> >
> > import* org.apache.lucene.index.IndexWriter;
> > *
> >
> > import* org.apache.lucene.index.IndexReader;
> > *
> >
> > import* org.apache.lucene.queryParser.ParseException;
> > *
> >
> > import* org.apache.lucene.queryParser.QueryParser;
> > *
> >
> > import* org.apache.lucene.search.*;
> > *
> >
> > import* org.apache.lucene.store.Directory;
> > *
> >
> > import* org.apache.lucene.store.RAMDirectory;
> > *
> >
> > import* org.apache.lucene.util.Version;
> > *
> >
> > import* org.apache.lucene.index.TermFreqVector;
> >
> > *
> >
> > import* java.io.BufferedReader;
> > *
> >
> > import* java.io.FileReader;
> > *
> >
> > import* java.io.IOException;
> > *
> >
> > import* org.apache.lucene.analysis.StopAnalyzer;
> > *
> >
> > import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
> >
> >
> > *
> >
> > public* *class* Testing{
> >
> > *
> >
> > public* *static* *void* main(String[] args) *throws* IOException,
> > ParseException {
> >
> > //StandardAnalyzer analyzer = new
> StandardAnalyzer(Version.LUCENE_CURRENT);
> >
> > SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English",
> StopAnalyzer.
> > ENGLISH_STOP_WORDS);
> >
> > *try*{
> >
> > Directory directory=*new* RAMDirectory();
> >
> > IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
> >
> > IndexWriter.MaxFieldLength.*UNLIMITED*);
> >
> > Document doc = *new* Document();
> >
> > String text="This is a sample codes code for testing lucene's
> capabilities
> > over lucene term frequencies";
> >
> > doc.add(*new* Field("title", text, Field.Store.*YES*,
> Field.Index.*ANALYZED*
> > ,Field.TermVector.*YES*));
> >
> > w.addDocument(doc);
> >
> > w.close();
> >
> > IndexReader ir=IndexReader.open(directory);
> >
> > TermFreqVector[] tfv=ir.getTermFreqVectors(0);
> >
> > // for (int xy = 0; xy < tfv.length; xy++) {
> >
> > String[] terms = tfv[0].getTerms();
> >
> > *int*[] freqs=tfv[0].getTermFrequencies();
> >
> > //System.out.println("terms are:"+tfv[xy]);
> >
> > //System.out.println("length is:"+terms.length);
> >
> > System.*out*.println("array terms are:"+tfv[0]);
> >
> > System.*out*.println("terms are:"+terms);
> >
> > System.*out*.println("frequencies are:"+freqs);
> >
> > // }
> >
> > }*catch*(Exception ex){
> >
> > ex.printStackTrace();
> >
> > }
> >
> > }
> >
> > }
> >
> >
> >
> > Display:
> >
> > array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> > sampl/1, term/1, test/1}
> >
> > terms are:[Ljava.lang.String;@1e13d52
> >
> > frequencies are:[I@80fa6f
> >
> >
> >
> > If some body can pls. help me to get the desired output.
> >
> > Thanx,
> >
> > Manjula.
> >
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>
Re: Problem of getTermFrequencies()
Posted by Ian Lea <ia...@gmail.com>.
terms and freqs are arrays. Try terms[i] and freqs[i].
--
Ian.
On Mon, May 17, 2010 at 12:23 PM, manjula wijewickrema
<ma...@gmail.com> wrote:
> Hi,
>
> I wrote a code with a view to display the indexed terms and get their term
> frequencies of a single document. Although it displys those terms in the
> index, it does not give the term frequencies. Instead it displays ' frequencies
> are:[I@80fa6f '. What's the reason for this. The code I have written and the
> display, can be given as follows.
>
> Code:
>
> *
>
> import* org.apache.lucene.analysis.standard.StandardAnalyzer;
> *
>
> import* org.apache.lucene.document.Document;
> *
>
> import* org.apache.lucene.document.Field;
> *
>
> import* org.apache.lucene.index.IndexWriter;
> *
>
> import* org.apache.lucene.index.IndexReader;
> *
>
> import* org.apache.lucene.queryParser.ParseException;
> *
>
> import* org.apache.lucene.queryParser.QueryParser;
> *
>
> import* org.apache.lucene.search.*;
> *
>
> import* org.apache.lucene.store.Directory;
> *
>
> import* org.apache.lucene.store.RAMDirectory;
> *
>
> import* org.apache.lucene.util.Version;
> *
>
> import* org.apache.lucene.index.TermFreqVector;
>
> *
>
> import* java.io.BufferedReader;
> *
>
> import* java.io.FileReader;
> *
>
> import* java.io.IOException;
> *
>
> import* org.apache.lucene.analysis.StopAnalyzer;
> *
>
> import* org.apache.lucene.analysis.snowball.SnowballAnalyzer;
>
>
> *
>
> public* *class* Testing{
>
> *
>
> public* *static* *void* main(String[] args) *throws* IOException,
> ParseException {
>
> //StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
>
> SnowballAnalyzer analyzer = *new* SnowballAnalyzer("English", StopAnalyzer.
> ENGLISH_STOP_WORDS);
>
> *try*{
>
> Directory directory=*new* RAMDirectory();
>
> IndexWriter w = *new* IndexWriter(directory, analyzer, *true*,
>
> IndexWriter.MaxFieldLength.*UNLIMITED*);
>
> Document doc = *new* Document();
>
> String text="This is a sample codes code for testing lucene's capabilities
> over lucene term frequencies";
>
> doc.add(*new* Field("title", text, Field.Store.*YES*, Field.Index.*ANALYZED*
> ,Field.TermVector.*YES*));
>
> w.addDocument(doc);
>
> w.close();
>
> IndexReader ir=IndexReader.open(directory);
>
> TermFreqVector[] tfv=ir.getTermFreqVectors(0);
>
> // for (int xy = 0; xy < tfv.length; xy++) {
>
> String[] terms = tfv[0].getTerms();
>
> *int*[] freqs=tfv[0].getTermFrequencies();
>
> //System.out.println("terms are:"+tfv[xy]);
>
> //System.out.println("length is:"+terms.length);
>
> System.*out*.println("array terms are:"+tfv[0]);
>
> System.*out*.println("terms are:"+terms);
>
> System.*out*.println("frequencies are:"+freqs);
>
> // }
>
> }*catch*(Exception ex){
>
> ex.printStackTrace();
>
> }
>
> }
>
> }
>
>
>
> Display:
>
> array terms are:{title: capabl/1, code/2, frequenc/1, lucen/2, over/1,
> sampl/1, term/1, test/1}
>
> terms are:[Ljava.lang.String;@1e13d52
>
> frequencies are:[I@80fa6f
>
>
>
> If some body can pls. help me to get the desired output.
>
> Thanx,
>
> Manjula.
>
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org