You are viewing a plain text version of this content. The canonical link for it is here.

Posted to java-user@lucene.apache.org by manjula wijewickrema <ma...@gmail.com> on 2010/06/11 12:20:12 UTC

How to get file names instead of paths?

Hi,

Using the following programme I was able to get the entire file path of
indexed files which matched with the given queries. But my intention is to
get only the file names even without .txt extention as I need to send these
file names as labels to another application. So, pls. let me know how can I
get only the file names in the following code.

Thanx in advance!
Manjula.


My code:

*

public* *class* LuceneDemo {

*public* *static* *final* String *FILES_TO_INDEX_DIRECTORY* = "filesToIndex"
;

*public* *static* *final* String *INDEX_DIRECTORY* = "indexDirectory";

*public* *static* *final* String *FIELD_PATH* = "path";

*public* *static* *final* String *FIELD_CONTENTS* = "contents";

*public* *static* *void* main(String[] args) *throws* Exception {

*createIndex*();

*searchIndex*("rice");

*searchIndex*("milk");

*searchIndex*("banana");

*searchIndex*("foo");

 }

*public* *static* *void* createIndex() *throws* CorruptIndexException,
LockObtainFailedException, IOException {

 SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
StopAnalyzer.ENGLISH_STOP_WORDS);

*boolean* recreateIndexIfExists = *true*;

IndexWriter indexWriter = *new* IndexWriter(*INDEX_DIRECTORY*, analyzer,
recreateIndexIfExists);

File dir = *new* File(*FILES_TO_INDEX_DIRECTORY*);

File[] files = dir.listFiles();

*for* (File file : files) {

Document document = *new* Document();

String path = file.getCanonicalPath();

document.add(*new* Field(*FIELD_PATH*, path, Field.Store.*YES*, Field.Index.
UN_TOKENIZED,Field.TermVector.*YES*));

Reader reader = *new* FileReader(file);

document.add(*new* Field(*FIELD_CONTENTS*, reader));

indexWriter.addDocument(document);

 }

indexWriter.optimize();

indexWriter.close();

}

*public* *static* *void* searchIndex(String searchString)
*throws*IOException, ParseException {

System.*out*.println("Searching for '" + searchString + "'");

Directory directory = FSDirectory.getDirectory(*INDEX_DIRECTORY*);

IndexReader indexReader = IndexReader.open(directory);

IndexSearcher indexSearcher = *new* IndexSearcher(indexReader);

 SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
StopAnalyzer.ENGLISH_STOP_WORDS);

QueryParser queryParser = *new* QueryParser(*FIELD_CONTENTS*, analyzer);

Query query = queryParser.parse(searchString);

Hits hits = indexSearcher.search(query);

System.*out*.println("Number of hits: " + hits.length());

TopDocs results = indexSearcher.search(query,10);

ScoreDoc[] hits1 = results.scoreDocs;

*for* (ScoreDoc hit : hits1) {

Document doc = indexSearcher.doc(hit.doc);

System.*out*.printf("%5.3f %s\n",hit.score,doc.get(*FIELD_CONTENTS*));

}

 Iterator<Hit> it = hits.iterator();

*while* (it.hasNext()) {

Hit hit = it.next();

Document document = hit.getDocument();

String path = document.get(*FIELD_PATH*);

System.*out*.println("Hit: " + path);

}

}

}

Re: How to get file names instead of paths?

Posted by manjula wijewickrema <ma...@gmail.com>.

Dear Ian,

The segment you have suggested, working nicely. Thanx a lot for your kind
help.

Manjula.

On Fri, Jun 11, 2010 at 4:00 PM, Ian Lea <ia...@gmail.com> wrote:

> Something like this
>
> File f = new File(path);
> String fn = f.getName();
> return fn.substring(0, fn.lastIndexOf("."));
>
>
> --
> Ian.
>
>
> On Fri, Jun 11, 2010 at 11:20 AM, manjula wijewickrema
> <ma...@gmail.com> wrote:
> > Hi,
> >
> > Using the following programme I was able to get the entire file path of
> > indexed files which matched with the given queries. But my intention is
> to
> > get only the file names even without .txt extention as I need to send
> these
> > file names as labels to another application. So, pls. let me know how can
> I
> > get only the file names in the following code.
> >
> > Thanx in advance!
> > Manjula.
> >
> >
> > My code:
> >
> > *
> >
> > public* *class* LuceneDemo {
> >
> > *public* *static* *final* String *FILES_TO_INDEX_DIRECTORY* =
> "filesToIndex"
> > ;
> >
> > *public* *static* *final* String *INDEX_DIRECTORY* = "indexDirectory";
> >
> > *public* *static* *final* String *FIELD_PATH* = "path";
> >
> > *public* *static* *final* String *FIELD_CONTENTS* = "contents";
> >
> > *public* *static* *void* main(String[] args) *throws* Exception {
> >
> > *createIndex*();
> >
> > *searchIndex*("rice");
> >
> > *searchIndex*("milk");
> >
> > *searchIndex*("banana");
> >
> > *searchIndex*("foo");
> >
> >  }
> >
> > *public* *static* *void* createIndex() *throws* CorruptIndexException,
> > LockObtainFailedException, IOException {
> >
> >  SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
> > StopAnalyzer.ENGLISH_STOP_WORDS);
> >
> > *boolean* recreateIndexIfExists = *true*;
> >
> > IndexWriter indexWriter = *new* IndexWriter(*INDEX_DIRECTORY*, analyzer,
> > recreateIndexIfExists);
> >
> > File dir = *new* File(*FILES_TO_INDEX_DIRECTORY*);
> >
> > File[] files = dir.listFiles();
> >
> > *for* (File file : files) {
> >
> > Document document = *new* Document();
> >
> > String path = file.getCanonicalPath();
> >
> > document.add(*new* Field(*FIELD_PATH*, path, Field.Store.*YES*,
> Field.Index.
> > UN_TOKENIZED,Field.TermVector.*YES*));
> >
> > Reader reader = *new* FileReader(file);
> >
> > document.add(*new* Field(*FIELD_CONTENTS*, reader));
> >
> > indexWriter.addDocument(document);
> >
> >  }
> >
> > indexWriter.optimize();
> >
> > indexWriter.close();
> >
> > }
> >
> > *public* *static* *void* searchIndex(String searchString)
> > *throws*IOException, ParseException {
> >
> > System.*out*.println("Searching for '" + searchString + "'");
> >
> > Directory directory = FSDirectory.getDirectory(*INDEX_DIRECTORY*);
> >
> > IndexReader indexReader = IndexReader.open(directory);
> >
> > IndexSearcher indexSearcher = *new* IndexSearcher(indexReader);
> >
> >  SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
> > StopAnalyzer.ENGLISH_STOP_WORDS);
> >
> > QueryParser queryParser = *new* QueryParser(*FIELD_CONTENTS*, analyzer);
> >
> > Query query = queryParser.parse(searchString);
> >
> > Hits hits = indexSearcher.search(query);
> >
> > System.*out*.println("Number of hits: " + hits.length());
> >
> > TopDocs results = indexSearcher.search(query,10);
> >
> > ScoreDoc[] hits1 = results.scoreDocs;
> >
> > *for* (ScoreDoc hit : hits1) {
> >
> > Document doc = indexSearcher.doc(hit.doc);
> >
> > System.*out*.printf("%5.3f %s\n",hit.score,doc.get(*FIELD_CONTENTS*));
> >
> > }
> >
> >  Iterator<Hit> it = hits.iterator();
> >
> > *while* (it.hasNext()) {
> >
> > Hit hit = it.next();
> >
> > Document document = hit.getDocument();
> >
> > String path = document.get(*FIELD_PATH*);
> >
> > System.*out*.println("Hit: " + path);
> >
> > }
> >
> > }
> >
> > }
> >
>
>  ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>

Re: How to get file names instead of paths?

Posted by Ian Lea <ia...@gmail.com>.

Something like this

File f = new File(path);
String fn = f.getName();
return fn.substring(0, fn.lastIndexOf("."));


--
Ian.


On Fri, Jun 11, 2010 at 11:20 AM, manjula wijewickrema
<ma...@gmail.com> wrote:
> Hi,
>
> Using the following programme I was able to get the entire file path of
> indexed files which matched with the given queries. But my intention is to
> get only the file names even without .txt extention as I need to send these
> file names as labels to another application. So, pls. let me know how can I
> get only the file names in the following code.
>
> Thanx in advance!
> Manjula.
>
>
> My code:
>
> *
>
> public* *class* LuceneDemo {
>
> *public* *static* *final* String *FILES_TO_INDEX_DIRECTORY* = "filesToIndex"
> ;
>
> *public* *static* *final* String *INDEX_DIRECTORY* = "indexDirectory";
>
> *public* *static* *final* String *FIELD_PATH* = "path";
>
> *public* *static* *final* String *FIELD_CONTENTS* = "contents";
>
> *public* *static* *void* main(String[] args) *throws* Exception {
>
> *createIndex*();
>
> *searchIndex*("rice");
>
> *searchIndex*("milk");
>
> *searchIndex*("banana");
>
> *searchIndex*("foo");
>
>  }
>
> *public* *static* *void* createIndex() *throws* CorruptIndexException,
> LockObtainFailedException, IOException {
>
>  SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
> StopAnalyzer.ENGLISH_STOP_WORDS);
>
> *boolean* recreateIndexIfExists = *true*;
>
> IndexWriter indexWriter = *new* IndexWriter(*INDEX_DIRECTORY*, analyzer,
> recreateIndexIfExists);
>
> File dir = *new* File(*FILES_TO_INDEX_DIRECTORY*);
>
> File[] files = dir.listFiles();
>
> *for* (File file : files) {
>
> Document document = *new* Document();
>
> String path = file.getCanonicalPath();
>
> document.add(*new* Field(*FIELD_PATH*, path, Field.Store.*YES*, Field.Index.
> UN_TOKENIZED,Field.TermVector.*YES*));
>
> Reader reader = *new* FileReader(file);
>
> document.add(*new* Field(*FIELD_CONTENTS*, reader));
>
> indexWriter.addDocument(document);
>
>  }
>
> indexWriter.optimize();
>
> indexWriter.close();
>
> }
>
> *public* *static* *void* searchIndex(String searchString)
> *throws*IOException, ParseException {
>
> System.*out*.println("Searching for '" + searchString + "'");
>
> Directory directory = FSDirectory.getDirectory(*INDEX_DIRECTORY*);
>
> IndexReader indexReader = IndexReader.open(directory);
>
> IndexSearcher indexSearcher = *new* IndexSearcher(indexReader);
>
>  SnowballAnalyzer analyzer = *new* SnowballAnalyzer( "English",
> StopAnalyzer.ENGLISH_STOP_WORDS);
>
> QueryParser queryParser = *new* QueryParser(*FIELD_CONTENTS*, analyzer);
>
> Query query = queryParser.parse(searchString);
>
> Hits hits = indexSearcher.search(query);
>
> System.*out*.println("Number of hits: " + hits.length());
>
> TopDocs results = indexSearcher.search(query,10);
>
> ScoreDoc[] hits1 = results.scoreDocs;
>
> *for* (ScoreDoc hit : hits1) {
>
> Document doc = indexSearcher.doc(hit.doc);
>
> System.*out*.printf("%5.3f %s\n",hit.score,doc.get(*FIELD_CONTENTS*));
>
> }
>
>  Iterator<Hit> it = hits.iterator();
>
> *while* (it.hasNext()) {
>
> Hit hit = it.next();
>
> Document document = hit.getDocument();
>
> String path = document.get(*FIELD_PATH*);
>
> System.*out*.println("Hit: " + path);
>
> }
>
> }
>
> }
>

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org