You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2019/08/03 09:34:37 UTC
[lucene-solr] branch branch_8x updated: LUCENE-8764: Add "export
all terms" feature to Luke
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new b4ef1b2 LUCENE-8764: Add "export all terms" feature to Luke
b4ef1b2 is described below
commit b4ef1b279c1f831294aab8a24e6bcb0279f9402f
Author: Leonardo Menezes <ma...@lmenezes.com>
AuthorDate: Sat Aug 3 18:20:26 2019 +0900
LUCENE-8764: Add "export all terms" feature to Luke
Co-authored-by: Tomoko Uchida <to...@apache.org>
---
lucene/CHANGES.txt | 2 +
.../app/desktop/components/MenuBarProvider.java | 19 ++
.../dialog/menubar/ExportTermsDialogFactory.java | 275 +++++++++++++++++++++
.../lucene/luke/models/tools/IndexTools.java | 9 +
.../lucene/luke/models/tools/IndexToolsImpl.java | 29 +++
.../luke/app/desktop/messages/messages.properties | 10 +
6 files changed, 344 insertions(+)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7a989ad..bd1ee2b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -14,6 +14,8 @@ New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
+* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
+
Improvements
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
index 2a5008f..3090283 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
@@ -33,6 +33,7 @@ import org.apache.lucene.luke.app.desktop.PreferencesFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.AboutDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CheckIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CreateIndexDialogFactory;
+import org.apache.lucene.luke.app.desktop.components.dialog.menubar.ExportTermsDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OpenIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OptimizeIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
@@ -57,6 +58,8 @@ public final class MenuBarProvider {
private final OptimizeIndexDialogFactory optimizeIndexDialogFactory;
+ private final ExportTermsDialogFactory exportTermsDialogFactory;
+
private final CheckIndexDialogFactory checkIndexDialogFactory;
private final AboutDialogFactory aboutDialogFactory;
@@ -81,6 +84,8 @@ public final class MenuBarProvider {
private final JMenuItem optimizeIndexMItem = new JMenuItem();
+ private final JMenuItem exportTermsMItem = new JMenuItem();
+
private final JMenuItem checkIndexMItem = new JMenuItem();
private final JMenuItem aboutMItem = new JMenuItem();
@@ -95,6 +100,7 @@ public final class MenuBarProvider {
this.openIndexDialogFactory = OpenIndexDialogFactory.getInstance();
this.createIndexDialogFactory = CreateIndexDialogFactory.getInstance();
this.optimizeIndexDialogFactory = OptimizeIndexDialogFactory.getInstance();
+ this.exportTermsDialogFactory = ExportTermsDialogFactory.getInstance();
this.checkIndexDialogFactory = CheckIndexDialogFactory.getInstance();
this.aboutDialogFactory = AboutDialogFactory.getInstance();
@@ -173,6 +179,10 @@ public final class MenuBarProvider {
checkIndexMItem.setEnabled(false);
checkIndexMItem.addActionListener(listeners::showCheckIndexDialog);
toolsMenu.add(checkIndexMItem);
+ exportTermsMItem.setText(MessageUtils.getLocalizedMessage("menu.item.export.terms"));
+ exportTermsMItem.setEnabled(false);
+ exportTermsMItem.addActionListener(listeners::showExportTermsDialog);
+ toolsMenu.add(exportTermsMItem);
return toolsMenu;
}
@@ -258,6 +268,12 @@ public final class MenuBarProvider {
});
}
+ void showExportTermsDialog(ActionEvent e) {
+ new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
+ factory -> {
+ });
+ }
+
}
private class Observer implements IndexObserver, DirectoryObserver {
@@ -267,6 +283,7 @@ public final class MenuBarProvider {
reopenIndexMItem.setEnabled(false);
closeIndexMItem.setEnabled(false);
optimizeIndexMItem.setEnabled(false);
+ exportTermsMItem.setEnabled(false);
checkIndexMItem.setEnabled(true);
}
@@ -279,6 +296,7 @@ public final class MenuBarProvider {
public void openIndex(LukeState state) {
reopenIndexMItem.setEnabled(true);
closeIndexMItem.setEnabled(true);
+ exportTermsMItem.setEnabled(true);
if (!state.readOnly() && state.hasDirectoryReader()) {
optimizeIndexMItem.setEnabled(true);
}
@@ -297,6 +315,7 @@ public final class MenuBarProvider {
closeIndexMItem.setEnabled(false);
optimizeIndexMItem.setEnabled(false);
checkIndexMItem.setEnabled(false);
+ exportTermsMItem.setEnabled(false);
}
}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
new file mode 100644
index 0000000..07fe3cf
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.dialog.menubar;
+
+import javax.swing.BorderFactory;
+import javax.swing.BoxLayout;
+import javax.swing.JButton;
+import javax.swing.JComboBox;
+import javax.swing.JDialog;
+import javax.swing.JFileChooser;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JTextField;
+import javax.swing.SwingWorker;
+import java.awt.Color;
+import java.awt.Dialog;
+import java.awt.Dimension;
+import java.awt.FlowLayout;
+import java.awt.GridLayout;
+import java.awt.Insets;
+import java.awt.Window;
+import java.awt.event.ActionEvent;
+import java.io.File;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.luke.app.IndexHandler;
+import org.apache.lucene.luke.app.IndexObserver;
+import org.apache.lucene.luke.app.LukeState;
+import org.apache.lucene.luke.app.desktop.Preferences;
+import org.apache.lucene.luke.app.desktop.PreferencesFactory;
+import org.apache.lucene.luke.app.desktop.util.DialogOpener;
+import org.apache.lucene.luke.app.desktop.util.ImageUtils;
+import org.apache.lucene.luke.app.desktop.util.MessageUtils;
+import org.apache.lucene.luke.app.desktop.util.StyleConstants;
+import org.apache.lucene.luke.models.LukeException;
+import org.apache.lucene.luke.models.tools.IndexTools;
+import org.apache.lucene.luke.models.tools.IndexToolsFactory;
+import org.apache.lucene.luke.models.util.IndexUtils;
+import org.apache.lucene.luke.util.LoggerFactory;
+import org.apache.lucene.util.NamedThreadFactory;
+import org.apache.lucene.util.SuppressForbidden;
+
+/**
+ * Factory of export terms dialog
+ */
+public final class ExportTermsDialogFactory implements DialogOpener.DialogFactory {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static ExportTermsDialogFactory instance;
+
+ private final IndexToolsFactory indexToolsFactory = new IndexToolsFactory();
+
+ private final Preferences prefs;
+
+ private final IndexHandler indexHandler;
+
+ private final JComboBox<String> fieldCombo = new JComboBox<String>();
+
+ private final JTextField destDir = new JTextField();
+
+ private final JLabel statusLbl = new JLabel();
+
+ private final JLabel indicatorLbl = new JLabel();
+
+ private final ListenerFunctions listeners = new ListenerFunctions();
+
+ private JDialog dialog;
+
+ private IndexTools toolsModel;
+
+ public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
+ if (instance == null) {
+ instance = new ExportTermsDialogFactory();
+ }
+ return instance;
+ }
+
+ private ExportTermsDialogFactory() throws IOException {
+ this.prefs = PreferencesFactory.getInstance();
+ this.indexHandler = IndexHandler.getInstance();
+ indexHandler.addObserver(new Observer());
+ }
+
+ @Override
+ public JDialog create(Window owner, String title, int width, int height) {
+ dialog = new JDialog(owner, title, Dialog.ModalityType.APPLICATION_MODAL);
+ dialog.add(content());
+ dialog.setSize(new Dimension(width, height));
+ dialog.setLocationRelativeTo(owner);
+ dialog.getContentPane().setBackground(prefs.getColorTheme().getBackgroundColor());
+ return dialog;
+ }
+
+ private JPanel content() {
+ JPanel panel = new JPanel(new GridLayout(5, 1));
+ panel.setOpaque(false);
+ panel.setLayout(new BoxLayout(panel, BoxLayout.PAGE_AXIS));
+ panel.setBorder(BorderFactory.createEmptyBorder(15, 15, 15, 15));
+
+ panel.add(currentOpenIndexPanel());
+ panel.add(fieldComboPanel());
+ panel.add(destinationDirPanel());
+ panel.add(statusPanel());
+ panel.add(actionButtonsPanel());
+
+ return panel;
+ }
+
+ private JPanel currentOpenIndexPanel() {
+ JPanel panel = new JPanel(new FlowLayout(FlowLayout.LEADING));
+ panel.setBorder(BorderFactory.createEmptyBorder());
+ panel.setOpaque(false);
+ JLabel label = new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.index_path"));
+ JLabel value = new JLabel(indexHandler.getState().getIndexPath());
+ value.setToolTipText(indexHandler.getState().getIndexPath());
+ panel.add(label);
+ panel.add(value);
+ return panel;
+ }
+
+ private JPanel fieldComboPanel() {
+ JPanel panel = new JPanel(new GridLayout(2, 1));
+ panel.setOpaque(false);
+ panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.field")));
+ panel.add(fieldCombo);
+ return panel;
+ }
+
+ private JPanel destinationDirPanel() {
+ JPanel panel = new JPanel(new GridLayout(2, 1));
+ panel.setOpaque(false);
+
+ panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.output_path")));
+
+ JPanel inputPanel = new JPanel(new FlowLayout(FlowLayout.LEADING));
+ inputPanel.setBorder(BorderFactory.createEmptyBorder());
+ inputPanel.setOpaque(false);
+ destDir.setText(System.getProperty("user.home"));
+ destDir.setColumns(60);
+ destDir.setPreferredSize(new Dimension(200, 30));
+ destDir.setFont(StyleConstants.FONT_MONOSPACE_LARGE);
+ destDir.setEditable(false);
+ destDir.setBackground(Color.white);
+ inputPanel.add(destDir);
+
+ JButton browseBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.browse"));
+ browseBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
+ browseBtn.setMargin(new Insets(3, 0, 3, 0));
+ browseBtn.addActionListener(listeners::browseDirectory);
+ inputPanel.add(browseBtn);
+
+ panel.add(inputPanel);
+ return panel;
+ }
+
+ private JPanel actionButtonsPanel() {
+ // Buttons
+ JPanel execButtons = new JPanel(new FlowLayout(FlowLayout.TRAILING));
+ execButtons.setOpaque(false);
+ JButton exportBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.export"));
+ exportBtn.setMargin(new Insets(3, 0, 3, 0));
+ exportBtn.addActionListener(listeners::export);
+ execButtons.add(exportBtn);
+ JButton closeBtn = new JButton(MessageUtils.getLocalizedMessage("button.close"));
+ closeBtn.setMargin(new Insets(3, 0, 3, 0));
+ closeBtn.addActionListener(e -> dialog.dispose());
+ execButtons.add(closeBtn);
+ return execButtons;
+ }
+
+ private JPanel statusPanel() {
+ JPanel status = new JPanel(new FlowLayout(FlowLayout.LEADING));
+ status.setOpaque(false);
+ indicatorLbl.setIcon(ImageUtils.createImageIcon("indicator.gif", 20, 20));
+ indicatorLbl.setVisible(false);
+ status.add(statusLbl);
+ status.add(indicatorLbl);
+ return status;
+ }
+
+ private class ListenerFunctions {
+
+ @SuppressForbidden(reason = "JFilechooser#getSelectedFile() returns java.io.File")
+ void browseDirectory(ActionEvent e) {
+ JFileChooser fileChooser = new JFileChooser();
+ fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+ fileChooser.setFileHidingEnabled(false);
+ int retVal = fileChooser.showOpenDialog(dialog);
+ if (retVal == JFileChooser.APPROVE_OPTION) {
+ File f = fileChooser.getSelectedFile();
+ destDir.setText(f.getAbsolutePath());
+ }
+ }
+
+ void export(ActionEvent e) {
+ ExecutorService executor = Executors.newSingleThreadExecutor(new NamedThreadFactory("export-terms-dialog"));
+
+ SwingWorker<Void, Void> task = new SwingWorker<Void, Void>() {
+
+ String filename;
+
+ @Override
+ protected Void doInBackground() {
+ setProgress(0);
+ statusLbl.setText("Exporting...");
+ indicatorLbl.setVisible(true);
+ String field = (String) fieldCombo.getSelectedItem();
+ String directory = destDir.getText();
+ try {
+ filename = toolsModel.exportTerms(directory, field);
+ } catch (LukeException e) {
+ log.error("Error while exporting terms from field " + field, e);
+ statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
+ } catch (Exception e) {
+ log.error("Error while exporting terms from field " + field, e);
+ statusLbl.setText(MessageUtils.getLocalizedMessage("message.error.unknown"));
+ throw e;
+ } finally {
+ setProgress(100);
+ }
+ return null;
+ }
+
+ @Override
+ protected void done() {
+ indicatorLbl.setVisible(false);
+ if (filename != null) {
+ statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
+ }
+ }
+ };
+
+ executor.submit(task);
+ executor.shutdown();
+ }
+
+ }
+
+ private class Observer implements IndexObserver {
+
+ @Override
+ public void openIndex(LukeState state) {
+ toolsModel = indexToolsFactory.newInstance(state.getIndexReader(), state.useCompound(), state.keepAllCommits());
+ IndexUtils.getFieldNames(state.getIndexReader()).stream().sorted().forEach(fieldCombo::addItem);
+ }
+
+ @Override
+ public void closeIndex() {
+ fieldCombo.removeAllItems();
+ toolsModel = null;
+ }
+
+ }
+
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
index 877646c..72d5384 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
@@ -94,4 +94,13 @@ public interface IndexTools {
* @param dataDir - the directory path which contains sample documents (20 Newsgroups).
*/
void createNewIndex(String dataDir);
+
+
+ /**
+ * Export terms from given field into a new file on the destination directory
+ * @param destDir - destination directory
+ * @param field - field name
+ * @return The file containing the export
+ */
+ String exportTerms(String destDir, String field);
}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
index 166958b..f4ca89e 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
@@ -17,11 +17,15 @@
package org.apache.lucene.luke.models.tools;
+import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintStream;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
+import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
@@ -30,6 +34,9 @@ import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiTerms;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.luke.models.LukeException;
import org.apache.lucene.luke.models.LukeModel;
import org.apache.lucene.luke.models.util.IndexUtils;
@@ -37,6 +44,7 @@ import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
/** Default implementation of {@link IndexTools} */
public final class IndexToolsImpl extends LukeModel implements IndexTools {
@@ -184,4 +192,25 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
}
}
}
+
+ public String exportTerms(String destDir, String field) {
+ String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
+ Path path = Paths.get(destDir, filename);
+ try {
+ Terms terms = MultiTerms.getTerms(reader, field);
+ if (terms == null) {
+ throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
+ }
+ try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
+ TermsEnum termsEnum = terms.iterator();
+ BytesRef term;
+ while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
+ writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
+ }
+ return path.toString();
+ }
+ } catch (IOException e) {
+ throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
+ }
+ }
}
diff --git a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
index e6fed08..f9c8c45 100644
--- a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
+++ b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
@@ -51,6 +51,7 @@ menu.item.create_index=Create new index
menu.item.close_index=Close index
menu.item.exit=Exit
menu.item.optimize=Optimize index
+menu.item.export.terms=Export terms
menu.item.check_index=Check index
menu.item.theme_gray=Gray
menu.item.theme_classic=Classic
@@ -83,6 +84,15 @@ createindex.label.data_link=http://kdd.ics.uci.edu/databases/20newsgroups/20news
createindex.label.datadir=Data directory:
createindex.textarea.data_help1=You can index sample documents from 20 Newsgroups corpus that is available at here:
createindex.textarea.data_help2=Download and extract the tgz file, then select the extracted directory path.\nCreating an index with the full size corpus takes some time... :)
+# Export terms
+export.terms.label.index_path=Index directory path:
+export.terms.label.output_path=Output directory path:
+export.terms.field=Field to export terms from:
+export.terms.button.export=Export
+export.terms.button.browse=Browse
+export.terms.label.success=<html>Terms successfully exported to: <br>{0}<br><br>Output format is: {1}</html>
+export.terms.label.error=<html>Failed to export: <br>{0}</html>
+
# Optimize index
optimize.dialog.title=Optimize index
optimize.label.index_path=Index directory path: