You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pd...@apache.org on 2021/09/10 11:51:00 UTC
[zeppelin] branch branch-0.10 updated: [ZEPPELIN-5507] Polish
LuceneSerach
This is an automated email from the ASF dual-hosted git repository.
pdallig pushed a commit to branch branch-0.10
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/branch-0.10 by this push:
new 4dbcf38 [ZEPPELIN-5507] Polish LuceneSerach
4dbcf38 is described below
commit 4dbcf3830750db6738de3c442f780316a0fd0f10
Author: Philipp Dallig <ph...@gmail.com>
AuthorDate: Thu Sep 2 16:00:14 2021 +0200
[ZEPPELIN-5507] Polish LuceneSerach
### What is this PR for?
- This PR polish the LuceneSearch class.
- RAMDirectory is deprecated, we are switching to ByteBuffersDirectory
- Re-throw IOException instead of catching it so we get a better stacktrace
### What type of PR is it?
- Refactoring
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-5507
### How should this be tested?
* via CI
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Philipp Dallig <ph...@gmail.com>
Closes #4215 from Reamer/search_cleanup and squashes the following commits:
12baa5d27 [Philipp Dallig] Polish LuceneSerach
(cherry picked from commit 27609b9ae17372215f48c91c6cea546625372ae7)
Signed-off-by: Philipp Dallig <ph...@gmail.com>
---
.../org/apache/zeppelin/search/LuceneSearch.java | 101 +++++++++++----------
1 file changed, 52 insertions(+), 49 deletions(-)
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
index 586b5d9..5c2110c 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
@@ -16,12 +16,11 @@
*/
package org.apache.zeppelin.search;
-import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@@ -30,6 +29,7 @@ import java.util.stream.Stream;
import javax.annotation.PreDestroy;
import javax.inject.Inject;
+import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -55,9 +55,9 @@ import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.Paragraph;
@@ -77,34 +77,30 @@ public class LuceneSearch extends SearchService {
private static final String PARAGRAPH = "paragraph";
private static final String ID_FIELD = "id";
- private Path indexPath;
- private Directory indexDirectory;
- private Analyzer analyzer;
- private IndexWriterConfig indexWriterConfig;
- private IndexWriter indexWriter;
+ private final Directory indexDirectory;
+ private final IndexWriter indexWriter;
@Inject
- public LuceneSearch(ZeppelinConfiguration conf) {
+ public LuceneSearch(ZeppelinConfiguration conf) throws IOException {
super("LuceneSearch");
if (conf.isZeppelinSearchUseDisk()) {
try {
- this.indexPath = Paths.get(conf.getZeppelinSearchIndexPath());
+ final Path indexPath = Paths.get(conf.getZeppelinSearchIndexPath());
this.indexDirectory = FSDirectory.open(indexPath);
- LOGGER.info("Use {} for storing lucene search index", this.indexPath);
+ LOGGER.info("Use {} for storing lucene search index", indexPath);
} catch (IOException e) {
- throw new RuntimeException(
- "Failed to create index directory for search service. Use memory instead", e);
+ throw new IOException("Failed to create index directory for search service.", e);
}
} else {
- this.indexDirectory = new RAMDirectory();
+ this.indexDirectory = new ByteBuffersDirectory();
}
- this.analyzer = new StandardAnalyzer();
- this.indexWriterConfig = new IndexWriterConfig(analyzer);
+ final Analyzer analyzer = new StandardAnalyzer();
+ final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
try {
this.indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
} catch (IOException e) {
- LOGGER.error("Failed to create new IndexWriter", e);
+ throw new IOException("Failed to create new IndexWriter", e);
}
}
@@ -125,7 +121,10 @@ public class LuceneSearch extends SearchService {
new MultiFieldQueryParser(new String[] {SEARCH_FIELD_TEXT, SEARCH_FIELD_TITLE}, analyzer);
Query query = parser.parse(queryStr);
- LOGGER.debug("Searching for: {}", query.toString(SEARCH_FIELD_TEXT));
+
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Searching for: {}", query.toString(SEARCH_FIELD_TEXT));
+ }
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
@@ -141,7 +140,7 @@ public class LuceneSearch extends SearchService {
private List<Map<String, String>> doSearch(
IndexSearcher searcher, Query query, Analyzer analyzer, Highlighter highlighter) {
- List<Map<String, String>> matchingParagraphs = Lists.newArrayList();
+ List<Map<String, String>> matchingParagraphs = new ArrayList<>();
ScoreDoc[] hits;
try {
hits = searcher.search(query, 20).scoreDocs;
@@ -166,14 +165,14 @@ public class LuceneSearch extends SearchService {
TokenStream tokenStream =
TokenSources.getTokenStream(
searcher.getIndexReader(), id, SEARCH_FIELD_TEXT, analyzer);
- TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3);
- LOGGER.debug(" {} fragments found for query '{}'", frag.length, query);
- for (int j = 0; j < frag.length; j++) {
- if ((frag[j] != null) && (frag[j].getScore() > 0)) {
- LOGGER.debug(" Fragment: {}", frag[j].toString());
+ TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, text, true, 3);
+ LOGGER.debug(" {} fragments found for query '{}'", frags.length, query);
+ for (TextFragment frag : frags) {
+ if ((frag != null) && (frag.getScore() > 0)) {
+ LOGGER.debug(" Fragment: {}", frag);
}
}
- fragment = (frag != null && frag.length > 0) ? frag[0].toString() : "";
+ fragment = (frags != null && frags.length > 0) ? frags[0].toString() : "";
}
if (header != null) {
@@ -228,10 +227,10 @@ public class LuceneSearch extends SearchService {
* Updates index for the given note: either note.name or a paragraph If paragraph is <code>null
* </code> - updates only for the note.name
*
- * @param noteId
- * @param noteName
- * @param p
- * @throws IOException
+ * @param noteId id of the note
+ * @param noteName name of the note
+ * @param p paragraph
+ * @throws IOException if there is a low-level IO error
*/
private void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
String id = formatId(noteId, p);
@@ -240,7 +239,7 @@ public class LuceneSearch extends SearchService {
indexWriter.updateDocument(new Term(ID_FIELD, id), doc);
indexWriter.commit();
} catch (IOException e) {
- LOGGER.error("Failed to update index of notebook {}", noteId, e);
+ throw new IOException("Failed to update index of notebook " + noteId, e);
}
}
@@ -251,7 +250,7 @@ public class LuceneSearch extends SearchService {
static String formatId(String noteId, Paragraph p) {
String id = noteId;
if (null != p) {
- id = Joiner.on('/').join(id, PARAGRAPH, p.getId());
+ id = String.join("/", id, PARAGRAPH, p.getId());
}
return id;
}
@@ -259,7 +258,7 @@ public class LuceneSearch extends SearchService {
static String formatDeleteId(String noteId, Paragraph p) {
String id = noteId;
if (null != p) {
- id = Joiner.on('/').join(id, PARAGRAPH, p.getId());
+ id = String.join("/", id, PARAGRAPH, p.getId());
} else {
id = id + "*";
}
@@ -272,7 +271,7 @@ public class LuceneSearch extends SearchService {
* @param id id of the document, different for Note name and paragraph
* @param noteName name of the note
* @param p paragraph
- * @return
+ * @return a document
*/
private Document newDocument(String id, String noteName, Paragraph p) {
Document doc = new Document();
@@ -300,12 +299,12 @@ public class LuceneSearch extends SearchService {
* @see org.apache.zeppelin.search.Search#addIndexDoc(org.apache.zeppelin.notebook.Note)
*/
@Override
- public void addNoteIndex(Note note) {
+ public void addNoteIndex(Note note) throws IOException {
try {
addIndexDocAsync(note);
indexWriter.commit();
} catch (IOException e) {
- LOGGER.error("Failed to add note {} to index", note, e);
+ throw new IOException("Failed to add note " + note + " to index", e);
}
}
@@ -317,11 +316,11 @@ public class LuceneSearch extends SearchService {
/**
* Indexes the given notebook, but does not commit changes.
*
- * @param note
- * @throws IOException
+ * @param note Note to add the index
+ * @throws IOException if there is a low-level IO error
*/
private void addIndexDocAsync(Note note) throws IOException {
- indexNoteName(indexWriter, note.getId(), note.getName());
+ indexNoteName(note.getId(), note.getName());
for (Paragraph paragraph : note.getParagraphs()) {
updateDoc(note.getId(), note.getName(), paragraph);
}
@@ -331,7 +330,7 @@ public class LuceneSearch extends SearchService {
* @see org.apache.zeppelin.search.Search#deleteIndexDocs(org.apache.zeppelin.notebook.Note)
*/
@Override
- public void deleteNoteIndex(Note note) {
+ public void deleteNoteIndex(Note note) throws IOException{
if (note == null) {
return;
}
@@ -346,24 +345,24 @@ public class LuceneSearch extends SearchService {
* #deleteIndexDoc(org.apache.zeppelin.notebook.Note, org.apache.zeppelin.notebook.Paragraph)
*/
@Override
- public void deleteParagraphIndex(String noteId, Paragraph p) {
+ public void deleteParagraphIndex(String noteId, Paragraph p) throws IOException{
deleteDoc(noteId, p);
}
/**
* Delete note index of paragraph index (when p is not null).
*
- * @param noteId
- * @param p
+ * @param noteId id of the note
+ * @param p paragraph
*/
- private void deleteDoc(String noteId, Paragraph p) {
+ private void deleteDoc(String noteId, Paragraph p) throws IOException {
String fullNoteOrJustParagraph = formatDeleteId(noteId, p);
LOGGER.debug("Deleting note {}, out of: {}", noteId, indexWriter.getDocStats().numDocs);
try {
indexWriter.deleteDocuments(new WildcardQuery(new Term(ID_FIELD, fullNoteOrJustParagraph)));
indexWriter.commit();
} catch (IOException e) {
- LOGGER.error("Failed to delete {} from index by '{}'", noteId, fullNoteOrJustParagraph, e);
+ throw new IOException("Failed to delete " + noteId + " from index by '" + fullNoteOrJustParagraph + "'", e);
}
LOGGER.debug("Done, index contains {} docs now", indexWriter.getDocStats().numDocs);
}
@@ -387,12 +386,12 @@ public class LuceneSearch extends SearchService {
/**
* Indexes a notebook name
*
- * @throws IOException
+ * @throws IOException if there is a low-level IO error
*/
- private void indexNoteName(IndexWriter w, String noteId, String noteName) throws IOException {
+ private void indexNoteName(String noteId, String noteName) throws IOException {
LOGGER.debug("Indexing Notebook {}, '{}'", noteId, noteName);
- if (null == noteName || noteName.isEmpty()) {
- LOGGER.debug("Skipping empty notebook name");
+ if (StringUtils.isBlank(noteName)) {
+ LOGGER.debug("Skipping empty or blank notebook name");
return;
}
updateDoc(noteId, noteName, null);
@@ -403,7 +402,11 @@ public class LuceneSearch extends SearchService {
Thread thread = new Thread(() -> {
LOGGER.info("Starting rebuild index");
notes.forEach(note -> {
- addNoteIndex(note);
+ try {
+ addNoteIndex(note);
+ } catch (IOException e) {
+ LOGGER.error("Unable to add Note " + note.getName() + " to Index", e);
+ }
note.unLoad();
});
LOGGER.info("Finish rebuild index");