You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2019/08/03 09:34:37 UTC

[lucene-solr] branch branch_8x updated: LUCENE-8764: Add "export all terms" feature to Luke

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new b4ef1b2  LUCENE-8764: Add "export all terms" feature to Luke
b4ef1b2 is described below

commit b4ef1b279c1f831294aab8a24e6bcb0279f9402f
Author: Leonardo Menezes <ma...@lmenezes.com>
AuthorDate: Sat Aug 3 18:20:26 2019 +0900

    LUCENE-8764: Add "export all terms" feature to Luke
    
    Co-authored-by: Tomoko Uchida <to...@apache.org>
---
 lucene/CHANGES.txt                                 |   2 +
 .../app/desktop/components/MenuBarProvider.java    |  19 ++
 .../dialog/menubar/ExportTermsDialogFactory.java   | 275 +++++++++++++++++++++
 .../lucene/luke/models/tools/IndexTools.java       |   9 +
 .../lucene/luke/models/tools/IndexToolsImpl.java   |  29 +++
 .../luke/app/desktop/messages/messages.properties  |  10 +
 6 files changed, 344 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7a989ad..bd1ee2b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -14,6 +14,8 @@ New Features
 
 * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
 
+* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
+
 Improvements
 
 * LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
index 2a5008f..3090283 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
@@ -33,6 +33,7 @@ import org.apache.lucene.luke.app.desktop.PreferencesFactory;
 import org.apache.lucene.luke.app.desktop.components.dialog.menubar.AboutDialogFactory;
 import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CheckIndexDialogFactory;
 import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CreateIndexDialogFactory;
+import org.apache.lucene.luke.app.desktop.components.dialog.menubar.ExportTermsDialogFactory;
 import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OpenIndexDialogFactory;
 import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OptimizeIndexDialogFactory;
 import org.apache.lucene.luke.app.desktop.util.DialogOpener;
@@ -57,6 +58,8 @@ public final class MenuBarProvider {
 
   private final OptimizeIndexDialogFactory optimizeIndexDialogFactory;
 
+  private final ExportTermsDialogFactory exportTermsDialogFactory;
+
   private final CheckIndexDialogFactory checkIndexDialogFactory;
 
   private final AboutDialogFactory aboutDialogFactory;
@@ -81,6 +84,8 @@ public final class MenuBarProvider {
 
   private final JMenuItem optimizeIndexMItem = new JMenuItem();
 
+  private final JMenuItem exportTermsMItem = new JMenuItem();
+
   private final JMenuItem checkIndexMItem = new JMenuItem();
 
   private final JMenuItem aboutMItem = new JMenuItem();
@@ -95,6 +100,7 @@ public final class MenuBarProvider {
     this.openIndexDialogFactory = OpenIndexDialogFactory.getInstance();
     this.createIndexDialogFactory = CreateIndexDialogFactory.getInstance();
     this.optimizeIndexDialogFactory = OptimizeIndexDialogFactory.getInstance();
+    this.exportTermsDialogFactory = ExportTermsDialogFactory.getInstance();
     this.checkIndexDialogFactory = CheckIndexDialogFactory.getInstance();
     this.aboutDialogFactory = AboutDialogFactory.getInstance();
 
@@ -173,6 +179,10 @@ public final class MenuBarProvider {
     checkIndexMItem.setEnabled(false);
     checkIndexMItem.addActionListener(listeners::showCheckIndexDialog);
     toolsMenu.add(checkIndexMItem);
+    exportTermsMItem.setText(MessageUtils.getLocalizedMessage("menu.item.export.terms"));
+    exportTermsMItem.setEnabled(false);
+    exportTermsMItem.addActionListener(listeners::showExportTermsDialog);
+    toolsMenu.add(exportTermsMItem);
     return toolsMenu;
   }
 
@@ -258,6 +268,12 @@ public final class MenuBarProvider {
           });
     }
 
+    void showExportTermsDialog(ActionEvent e) {
+      new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
+          factory -> {
+          });
+    }
+
   }
 
   private class Observer implements IndexObserver, DirectoryObserver {
@@ -267,6 +283,7 @@ public final class MenuBarProvider {
       reopenIndexMItem.setEnabled(false);
       closeIndexMItem.setEnabled(false);
       optimizeIndexMItem.setEnabled(false);
+      exportTermsMItem.setEnabled(false);
       checkIndexMItem.setEnabled(true);
     }
 
@@ -279,6 +296,7 @@ public final class MenuBarProvider {
     public void openIndex(LukeState state) {
       reopenIndexMItem.setEnabled(true);
       closeIndexMItem.setEnabled(true);
+      exportTermsMItem.setEnabled(true);
       if (!state.readOnly() && state.hasDirectoryReader()) {
         optimizeIndexMItem.setEnabled(true);
       }
@@ -297,6 +315,7 @@ public final class MenuBarProvider {
       closeIndexMItem.setEnabled(false);
       optimizeIndexMItem.setEnabled(false);
       checkIndexMItem.setEnabled(false);
+      exportTermsMItem.setEnabled(false);
     }
 
   }
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
new file mode 100644
index 0000000..07fe3cf
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.dialog.menubar;
+
+import javax.swing.BorderFactory;
+import javax.swing.BoxLayout;
+import javax.swing.JButton;
+import javax.swing.JComboBox;
+import javax.swing.JDialog;
+import javax.swing.JFileChooser;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JTextField;
+import javax.swing.SwingWorker;
+import java.awt.Color;
+import java.awt.Dialog;
+import java.awt.Dimension;
+import java.awt.FlowLayout;
+import java.awt.GridLayout;
+import java.awt.Insets;
+import java.awt.Window;
+import java.awt.event.ActionEvent;
+import java.io.File;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.luke.app.IndexHandler;
+import org.apache.lucene.luke.app.IndexObserver;
+import org.apache.lucene.luke.app.LukeState;
+import org.apache.lucene.luke.app.desktop.Preferences;
+import org.apache.lucene.luke.app.desktop.PreferencesFactory;
+import org.apache.lucene.luke.app.desktop.util.DialogOpener;
+import org.apache.lucene.luke.app.desktop.util.ImageUtils;
+import org.apache.lucene.luke.app.desktop.util.MessageUtils;
+import org.apache.lucene.luke.app.desktop.util.StyleConstants;
+import org.apache.lucene.luke.models.LukeException;
+import org.apache.lucene.luke.models.tools.IndexTools;
+import org.apache.lucene.luke.models.tools.IndexToolsFactory;
+import org.apache.lucene.luke.models.util.IndexUtils;
+import org.apache.lucene.luke.util.LoggerFactory;
+import org.apache.lucene.util.NamedThreadFactory;
+import org.apache.lucene.util.SuppressForbidden;
+
+/**
+ * Factory of export terms dialog
+ */
+public final class ExportTermsDialogFactory implements DialogOpener.DialogFactory {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private static ExportTermsDialogFactory instance;
+
+  private final IndexToolsFactory indexToolsFactory = new IndexToolsFactory();
+
+  private final Preferences prefs;
+
+  private final IndexHandler indexHandler;
+
+  private final JComboBox<String> fieldCombo = new JComboBox<String>();
+
+  private final JTextField destDir = new JTextField();
+
+  private final JLabel statusLbl = new JLabel();
+
+  private final JLabel indicatorLbl = new JLabel();
+
+  private final ListenerFunctions listeners = new ListenerFunctions();
+
+  private JDialog dialog;
+
+  private IndexTools toolsModel;
+
+  public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
+    if (instance == null) {
+      instance = new ExportTermsDialogFactory();
+    }
+    return instance;
+  }
+
+  private ExportTermsDialogFactory() throws IOException {
+    this.prefs = PreferencesFactory.getInstance();
+    this.indexHandler = IndexHandler.getInstance();
+    indexHandler.addObserver(new Observer());
+  }
+
+  @Override
+  public JDialog create(Window owner, String title, int width, int height) {
+    dialog = new JDialog(owner, title, Dialog.ModalityType.APPLICATION_MODAL);
+    dialog.add(content());
+    dialog.setSize(new Dimension(width, height));
+    dialog.setLocationRelativeTo(owner);
+    dialog.getContentPane().setBackground(prefs.getColorTheme().getBackgroundColor());
+    return dialog;
+  }
+
+  private JPanel content() {
+    JPanel panel = new JPanel(new GridLayout(5, 1));
+    panel.setOpaque(false);
+    panel.setLayout(new BoxLayout(panel, BoxLayout.PAGE_AXIS));
+    panel.setBorder(BorderFactory.createEmptyBorder(15, 15, 15, 15));
+
+    panel.add(currentOpenIndexPanel());
+    panel.add(fieldComboPanel());
+    panel.add(destinationDirPanel());
+    panel.add(statusPanel());
+    panel.add(actionButtonsPanel());
+
+    return panel;
+  }
+
+  private JPanel currentOpenIndexPanel() {
+    JPanel panel = new JPanel(new FlowLayout(FlowLayout.LEADING));
+    panel.setBorder(BorderFactory.createEmptyBorder());
+    panel.setOpaque(false);
+    JLabel label = new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.index_path"));
+    JLabel value = new JLabel(indexHandler.getState().getIndexPath());
+    value.setToolTipText(indexHandler.getState().getIndexPath());
+    panel.add(label);
+    panel.add(value);
+    return panel;
+  }
+
+  private JPanel fieldComboPanel() {
+    JPanel panel = new JPanel(new GridLayout(2, 1));
+    panel.setOpaque(false);
+    panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.field")));
+    panel.add(fieldCombo);
+    return panel;
+  }
+
+  private JPanel destinationDirPanel() {
+    JPanel panel = new JPanel(new GridLayout(2, 1));
+    panel.setOpaque(false);
+
+    panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.output_path")));
+
+    JPanel inputPanel = new JPanel(new FlowLayout(FlowLayout.LEADING));
+    inputPanel.setBorder(BorderFactory.createEmptyBorder());
+    inputPanel.setOpaque(false);
+    destDir.setText(System.getProperty("user.home"));
+    destDir.setColumns(60);
+    destDir.setPreferredSize(new Dimension(200, 30));
+    destDir.setFont(StyleConstants.FONT_MONOSPACE_LARGE);
+    destDir.setEditable(false);
+    destDir.setBackground(Color.white);
+    inputPanel.add(destDir);
+
+    JButton browseBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.browse"));
+    browseBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
+    browseBtn.setMargin(new Insets(3, 0, 3, 0));
+    browseBtn.addActionListener(listeners::browseDirectory);
+    inputPanel.add(browseBtn);
+
+    panel.add(inputPanel);
+    return panel;
+  }
+
+  private JPanel actionButtonsPanel() {
+    // Buttons
+    JPanel execButtons = new JPanel(new FlowLayout(FlowLayout.TRAILING));
+    execButtons.setOpaque(false);
+    JButton exportBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.export"));
+    exportBtn.setMargin(new Insets(3, 0, 3, 0));
+    exportBtn.addActionListener(listeners::export);
+    execButtons.add(exportBtn);
+    JButton closeBtn = new JButton(MessageUtils.getLocalizedMessage("button.close"));
+    closeBtn.setMargin(new Insets(3, 0, 3, 0));
+    closeBtn.addActionListener(e -> dialog.dispose());
+    execButtons.add(closeBtn);
+    return execButtons;
+  }
+
+  private JPanel statusPanel() {
+    JPanel status = new JPanel(new FlowLayout(FlowLayout.LEADING));
+    status.setOpaque(false);
+    indicatorLbl.setIcon(ImageUtils.createImageIcon("indicator.gif", 20, 20));
+    indicatorLbl.setVisible(false);
+    status.add(statusLbl);
+    status.add(indicatorLbl);
+    return status;
+  }
+
+  private class ListenerFunctions {
+
+    @SuppressForbidden(reason = "JFilechooser#getSelectedFile() returns java.io.File")
+    void browseDirectory(ActionEvent e) {
+      JFileChooser fileChooser = new JFileChooser();
+      fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+      fileChooser.setFileHidingEnabled(false);
+      int retVal = fileChooser.showOpenDialog(dialog);
+      if (retVal == JFileChooser.APPROVE_OPTION) {
+        File f = fileChooser.getSelectedFile();
+        destDir.setText(f.getAbsolutePath());
+      }
+    }
+
+    void export(ActionEvent e) {
+      ExecutorService executor = Executors.newSingleThreadExecutor(new NamedThreadFactory("export-terms-dialog"));
+
+      SwingWorker<Void, Void> task = new SwingWorker<Void, Void>() {
+
+        String filename;
+
+        @Override
+        protected Void doInBackground() {
+          setProgress(0);
+          statusLbl.setText("Exporting...");
+          indicatorLbl.setVisible(true);
+          String field = (String) fieldCombo.getSelectedItem();
+          String directory = destDir.getText();
+          try {
+            filename = toolsModel.exportTerms(directory, field);
+          } catch (LukeException e) {
+            log.error("Error while exporting terms from field " + field, e);
+            statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
+          } catch (Exception e) {
+            log.error("Error while exporting terms from field " + field, e);
+            statusLbl.setText(MessageUtils.getLocalizedMessage("message.error.unknown"));
+            throw e;
+          } finally {
+            setProgress(100);
+          }
+          return null;
+        }
+
+        @Override
+        protected void done() {
+          indicatorLbl.setVisible(false);
+          if (filename != null) {
+            statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
+          }
+        }
+      };
+
+      executor.submit(task);
+      executor.shutdown();
+    }
+
+  }
+
+  private class Observer implements IndexObserver {
+
+    @Override
+    public void openIndex(LukeState state) {
+      toolsModel = indexToolsFactory.newInstance(state.getIndexReader(), state.useCompound(), state.keepAllCommits());
+      IndexUtils.getFieldNames(state.getIndexReader()).stream().sorted().forEach(fieldCombo::addItem);
+    }
+
+    @Override
+    public void closeIndex() {
+      fieldCombo.removeAllItems();
+      toolsModel = null;
+    }
+
+  }
+
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
index 877646c..72d5384 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
@@ -94,4 +94,13 @@ public interface IndexTools {
    * @param dataDir - the directory path which contains sample documents (20 Newsgroups).
    */
   void createNewIndex(String dataDir);
+
+
+  /**
+   * Export terms from given field into a new file on the destination directory
+   * @param destDir - destination directory
+   * @param field - field name
+   * @return The file containing the export
+   */
+  String exportTerms(String destDir, String field);
 }
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
index 166958b..f4ca89e 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
@@ -17,11 +17,15 @@
 
 package org.apache.lucene.luke.models.tools;
 
+import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -30,6 +34,9 @@ import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiTerms;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.luke.models.LukeException;
 import org.apache.lucene.luke.models.LukeModel;
 import org.apache.lucene.luke.models.util.IndexUtils;
@@ -37,6 +44,7 @@ import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
 import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 
 /** Default implementation of {@link IndexTools} */
 public final class IndexToolsImpl extends LukeModel implements IndexTools {
@@ -184,4 +192,25 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
       }
     }
   }
+
+  public String exportTerms(String destDir, String field) {
+    String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
+    Path path = Paths.get(destDir, filename);
+    try {
+      Terms terms = MultiTerms.getTerms(reader, field);
+      if (terms == null) {
+        throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
+      }
+      try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
+        TermsEnum termsEnum = terms.iterator();
+        BytesRef term;
+        while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
+          writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
+        }
+        return path.toString();
+      }
+    } catch (IOException e) {
+      throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
+    }
+  }
 }
diff --git a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
index e6fed08..f9c8c45 100644
--- a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
+++ b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
@@ -51,6 +51,7 @@ menu.item.create_index=Create new index
 menu.item.close_index=Close index
 menu.item.exit=Exit
 menu.item.optimize=Optimize index
+menu.item.export.terms=Export terms
 menu.item.check_index=Check index
 menu.item.theme_gray=Gray
 menu.item.theme_classic=Classic
@@ -83,6 +84,15 @@ createindex.label.data_link=http://kdd.ics.uci.edu/databases/20newsgroups/20news
 createindex.label.datadir=Data directory:
 createindex.textarea.data_help1=You can index sample documents from 20 Newsgroups corpus that is available at here:
 createindex.textarea.data_help2=Download and extract the tgz file, then select the extracted directory path.\nCreating an index with the full size corpus takes some time... :)
+# Export terms
+export.terms.label.index_path=Index directory path:
+export.terms.label.output_path=Output directory path:
+export.terms.field=Field to export terms from:
+export.terms.button.export=Export
+export.terms.button.browse=Browse
+export.terms.label.success=<html>Terms successfully exported to: <br>{0}<br><br>Output format is: {1}</html>
+export.terms.label.error=<html>Failed to export: <br>{0}</html>
+
 # Optimize index
 optimize.dialog.title=Optimize index
 optimize.label.index_path=Index directory path: