You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2019/06/22 07:43:08 UTC
[lucene-solr] 01/02: LUCENE-8793: Luke enhanced UI for
CustomAnalyzer: show detailed analysis steps.
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 8e81f47ca69251cb88393626645525b57219474c
Author: Tomoko Uchida <to...@apache.org>
AuthorDate: Sat Jun 22 16:22:26 2019 +0900
LUCENE-8793: Luke enhanced UI for CustomAnalyzer: show detailed analysis steps.
Co-authored-by: Jun Ohtani
Co-authored-by: Tomoko Uchida
---
lucene/CHANGES.txt | 2 +
.../desktop/components/AnalysisPanelProvider.java | 197 ++++------
.../desktop/components/SearchPanelProvider.java | 4 +-
.../analysis/SimpleAnalyzeResultPanelOperator.java | 32 ++
.../analysis/SimpleAnalyzeResultPanelProvider.java | 196 ++++++++++
.../StepByStepAnalyzeResultPanelOperator.java | 31 ++
.../StepByStepAnalyzeResultPanelProvider.java | 415 +++++++++++++++++++++
.../lucene/luke/models/analysis/Analysis.java | 76 ++++
.../lucene/luke/models/analysis/AnalysisImpl.java | 191 +++++++++-
.../luke/app/desktop/messages/messages.properties | 2 +
.../luke/models/analysis/AnalysisImplTest.java | 40 ++
11 files changed, 1041 insertions(+), 145 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 946706e..4e94c5d 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -46,6 +46,8 @@ Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi)
+* LUCENE-8793: Luke enhanced UI for CustomAnalyzer: show detailed analysis steps. (Jun Ohtani via Tomoko Uchida)
+
======================= Lucene 8.1.0 =======================
API Changes
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java
index 70c2291..864dfcd 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java
@@ -20,14 +20,13 @@ package org.apache.lucene.luke.app.desktop.components;
import javax.swing.BorderFactory;
import javax.swing.ButtonGroup;
import javax.swing.JButton;
+import javax.swing.JCheckBox;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JRadioButton;
import javax.swing.JScrollPane;
import javax.swing.JSplitPane;
-import javax.swing.JTable;
import javax.swing.JTextArea;
-import javax.swing.ListSelectionModel;
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.FlowLayout;
@@ -37,11 +36,9 @@ import java.awt.event.ActionEvent;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.io.IOException;
-import java.util.List;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
@@ -54,13 +51,16 @@ import org.apache.lucene.luke.app.desktop.components.fragments.analysis.CustomAn
import org.apache.lucene.luke.app.desktop.components.fragments.analysis.CustomAnalyzerPanelProvider;
import org.apache.lucene.luke.app.desktop.components.fragments.analysis.PresetAnalyzerPanelOperator;
import org.apache.lucene.luke.app.desktop.components.fragments.analysis.PresetAnalyzerPanelProvider;
+import org.apache.lucene.luke.app.desktop.components.fragments.analysis.SimpleAnalyzeResultPanelOperator;
+import org.apache.lucene.luke.app.desktop.components.fragments.analysis.SimpleAnalyzeResultPanelProvider;
+import org.apache.lucene.luke.app.desktop.components.fragments.analysis.StepByStepAnalyzeResultPanelOperator;
+import org.apache.lucene.luke.app.desktop.components.fragments.analysis.StepByStepAnalyzeResultPanelProvider;
import org.apache.lucene.luke.app.desktop.components.fragments.search.AnalyzerTabOperator;
import org.apache.lucene.luke.app.desktop.components.fragments.search.MLTTabOperator;
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
import org.apache.lucene.luke.app.desktop.util.FontUtils;
import org.apache.lucene.luke.app.desktop.util.MessageUtils;
import org.apache.lucene.luke.app.desktop.util.StyleConstants;
-import org.apache.lucene.luke.app.desktop.util.TableUtils;
import org.apache.lucene.luke.models.analysis.Analysis;
import org.apache.lucene.luke.models.analysis.AnalysisFactory;
import org.apache.lucene.luke.models.analysis.CustomAnalyzerConfig;
@@ -97,11 +97,15 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
private final JTextArea inputArea = new JTextArea();
- private final JTable tokensTable = new JTable();
+ private final JPanel lowerPanel = new JPanel(new BorderLayout());
- private final ListenerFunctions listeners = new ListenerFunctions();
+ private final JPanel simpleResult;
+
+ private final JPanel stepByStepResult;
- private List<Analysis.Token> tokens;
+ private final JCheckBox stepByStepCB = new JCheckBox();
+
+ private final ListenerFunctions listeners = new ListenerFunctions();
private Analysis analysisModel;
@@ -117,11 +121,15 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
this.analysisModel = new AnalysisFactory().newInstance();
analysisModel.createAnalyzerFromClassName(StandardAnalyzer.class.getName());
+ this.simpleResult = new SimpleAnalyzeResultPanelProvider(tokenAttrDialogFactory).get();
+ this.stepByStepResult = new StepByStepAnalyzeResultPanelProvider(tokenAttrDialogFactory).get();
+
operatorRegistry.register(AnalysisTabOperator.class, this);
operatorRegistry.get(PresetAnalyzerPanelOperator.class).ifPresent(operator -> {
// Scanning all Analyzer types will take time...
- ExecutorService executorService = Executors.newFixedThreadPool(1, new NamedThreadFactory("load-preset-analyzer-types"));
+ ExecutorService executorService =
+ Executors.newFixedThreadPool(1, new NamedThreadFactory("load-preset-analyzer-types"));
executorService.execute(() -> {
operator.setPresetAnalyzers(analysisModel.getPresetAnalyzerTypes());
operator.setSelectedAnalyzer(analysisModel.currentAnalyzer().getClass());
@@ -209,53 +217,39 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
inputArea.setText(MessageUtils.getLocalizedMessage("analysis.textarea.prompt"));
input.add(new JScrollPane(inputArea));
- JButton executeBtn = new JButton(FontUtils.elegantIconHtml("", MessageUtils.getLocalizedMessage("analysis.button.test")));
+ JButton executeBtn = new JButton(FontUtils.elegantIconHtml("",
+ MessageUtils.getLocalizedMessage("analysis.button.test")));
executeBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
executeBtn.setMargin(new Insets(3, 3, 3, 3));
executeBtn.addActionListener(listeners::executeAnalysis);
input.add(executeBtn);
+ stepByStepCB.setText(MessageUtils.getLocalizedMessage("analysis.checkbox.step_by_step"));
+ stepByStepCB.setSelected(false);
+ stepByStepCB.setOpaque(false);
+ stepByStepCB.setVisible(false);
+ input.add(stepByStepCB);
+
JButton clearBtn = new JButton(MessageUtils.getLocalizedMessage("button.clear"));
clearBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
clearBtn.setMargin(new Insets(5, 5, 5, 5));
clearBtn.addActionListener(e -> {
inputArea.setText("");
- TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(),
- null,
- TokensTableModel.Column.TERM.getColumnWidth(),
- TokensTableModel.Column.ATTR.getColumnWidth());
+ operatorRegistry.get(SimpleAnalyzeResultPanelOperator.class).ifPresent(
+ SimpleAnalyzeResultPanelOperator::clearTable);
+ operatorRegistry.get(StepByStepAnalyzeResultPanelOperator.class).ifPresent(
+ StepByStepAnalyzeResultPanelOperator::clearTable);
});
input.add(clearBtn);
inner1.add(input, BorderLayout.CENTER);
- JPanel inner2 = new JPanel(new BorderLayout());
- inner2.setOpaque(false);
-
- JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING));
- hint.setOpaque(false);
- hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes")));
- inner2.add(hint, BorderLayout.PAGE_START);
+ lowerPanel.setOpaque(false);
+ lowerPanel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3));
+ lowerPanel.add(inner1, BorderLayout.PAGE_START);
+ lowerPanel.add(this.simpleResult, BorderLayout.CENTER);
-
- TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(),
- new MouseAdapter() {
- @Override
- public void mouseClicked(MouseEvent e) {
- listeners.showAttributeValues(e);
- }
- },
- TokensTableModel.Column.TERM.getColumnWidth(),
- TokensTableModel.Column.ATTR.getColumnWidth());
- inner2.add(new JScrollPane(tokensTable), BorderLayout.CENTER);
-
- JPanel panel = new JPanel(new BorderLayout());
- panel.setOpaque(false);
- panel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3));
- panel.add(inner1, BorderLayout.PAGE_START);
- panel.add(inner2, BorderLayout.CENTER);
-
- return panel;
+ return lowerPanel;
}
// control methods
@@ -269,7 +263,8 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
operator.setPresetAnalyzers(analysisModel.getPresetAnalyzerTypes());
operator.setSelectedAnalyzer(analysisModel.currentAnalyzer().getClass());
});
-
+ stepByStepCB.setSelected(false);
+ stepByStepCB.setVisible(false);
} else if (command.equalsIgnoreCase(TYPE_CUSTOM)) {
mainPanel.remove(preset);
mainPanel.add(custom, BorderLayout.CENTER);
@@ -278,6 +273,7 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
operator.setAnalysisModel(analysisModel);
operator.resetAnalysisComponents();
});
+ stepByStepCB.setVisible(true);
}
mainPanel.setVisible(false);
mainPanel.setVisible(true);
@@ -289,11 +285,32 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
messageBroker.showStatusMessage(MessageUtils.getLocalizedMessage("analysis.message.empry_input"));
}
- tokens = analysisModel.analyze(text);
- tokensTable.setModel(new TokensTableModel(tokens));
- tokensTable.setShowGrid(true);
- tokensTable.getColumnModel().getColumn(TokensTableModel.Column.TERM.getIndex()).setPreferredWidth(TokensTableModel.Column.TERM.getColumnWidth());
- tokensTable.getColumnModel().getColumn(TokensTableModel.Column.ATTR.getIndex()).setPreferredWidth(TokensTableModel.Column.ATTR.getColumnWidth());
+ lowerPanel.remove(stepByStepResult);
+ lowerPanel.add(simpleResult, BorderLayout.CENTER);
+
+ operatorRegistry.get(SimpleAnalyzeResultPanelOperator.class).ifPresent(operator -> {
+ operator.setAnalysisModel(analysisModel);
+ operator.executeAnalysis(text);
+ });
+
+ lowerPanel.setVisible(false);
+ lowerPanel.setVisible(true);
+ }
+
+ void executeAnalysisStepByStep() {
+ String text = inputArea.getText();
+ if (Objects.isNull(text) || text.isEmpty()) {
+ messageBroker.showStatusMessage(MessageUtils.getLocalizedMessage("analysis.message.empry_input"));
+ }
+ lowerPanel.remove(simpleResult);
+ lowerPanel.add(stepByStepResult, BorderLayout.CENTER);
+ operatorRegistry.get(StepByStepAnalyzeResultPanelOperator.class).ifPresent(operator -> {
+ operator.setAnalysisModel(analysisModel);
+ operator.executeAnalysisStepByStep(text);
+ });
+
+ lowerPanel.setVisible(false);
+ lowerPanel.setVisible(true);
}
void showAnalysisChainDialog() {
@@ -306,17 +323,6 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
}
}
- void showAttributeValues(int selectedIndex) {
- String term = tokens.get(selectedIndex).getTerm();
- List<Analysis.TokenAttribute> attributes = tokens.get(selectedIndex).getAttributes();
- new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400,
- factory -> {
- factory.setTerm(term);
- factory.setAttributes(attributes);
- });
- }
-
-
@Override
public void setAnalyzerByType(String analyzerType) {
analysisModel.createAnalyzerFromClassName(analyzerType);
@@ -359,81 +365,14 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator {
}
void executeAnalysis(ActionEvent e) {
- AnalysisPanelProvider.this.executeAnalysis();
- }
-
- void showAttributeValues(MouseEvent e) {
- if (e.getClickCount() != 2 || e.isConsumed()) {
- return;
- }
- int selectedIndex = tokensTable.rowAtPoint(e.getPoint());
- if (selectedIndex < 0 || selectedIndex >= tokensTable.getRowCount()) {
- return;
- }
- AnalysisPanelProvider.this.showAttributeValues(selectedIndex);
- }
-
- }
-
- static final class TokensTableModel extends TableModelBase<TokensTableModel.Column> {
-
- enum Column implements TableColumnInfo {
- TERM("Term", 0, String.class, 150),
- ATTR("Attributes", 1, String.class, 1000);
-
- private final String colName;
- private final int index;
- private final Class<?> type;
- private final int width;
-
- Column(String colName, int index, Class<?> type, int width) {
- this.colName = colName;
- this.index = index;
- this.type = type;
- this.width = width;
- }
-
- @Override
- public String getColName() {
- return colName;
- }
-
- @Override
- public int getIndex() {
- return index;
- }
-
- @Override
- public Class<?> getType() {
- return type;
- }
-
- @Override
- public int getColumnWidth() {
- return width;
- }
- }
-
- TokensTableModel() {
- super();
- }
-
- TokensTableModel(List<Analysis.Token> tokens) {
- super(tokens.size());
- for (int i = 0; i < tokens.size(); i++) {
- Analysis.Token token = tokens.get(i);
- data[i][Column.TERM.getIndex()] = token.getTerm();
- List<String> attValues = token.getAttributes().stream()
- .flatMap(att -> att.getAttValues().entrySet().stream()
- .map(e -> e.getKey() + "=" + e.getValue()))
- .collect(Collectors.toList());
- data[i][Column.ATTR.getIndex()] = String.join(",", attValues);
+ if (AnalysisPanelProvider.this.stepByStepCB.isSelected()) {
+ AnalysisPanelProvider.this.executeAnalysisStepByStep();
+ } else {
+ AnalysisPanelProvider.this.executeAnalysis();
}
}
- @Override
- protected Column[] columnInfos() {
- return Column.values();
+ void executeAnalysisStepByStep(ActionEvent e) {
}
}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java
index f94517a..395d835 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java
@@ -255,7 +255,7 @@ public final class SearchPanelProvider implements SearchTabOperator {
c.insets = new Insets(2, 0, 2, 2);
panel.add(termQueryCB, c);
- queryStringTA.setRows(4);
+ queryStringTA.setRows(3);
queryStringTA.setLineWrap(true);
queryStringTA.setText("*:*");
c.gridx = 0;
@@ -273,7 +273,7 @@ public final class SearchPanelProvider implements SearchTabOperator {
c.insets = new Insets(8, 0, 2, 2);
panel.add(labelPQ, c);
- parsedQueryTA.setRows(4);
+ parsedQueryTA.setRows(3);
parsedQueryTA.setLineWrap(true);
parsedQueryTA.setEditable(false);
c.gridx = 0;
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java
new file mode 100644
index 0000000..5641479
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.fragments.analysis;
+
+
+import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry;
+import org.apache.lucene.luke.models.analysis.Analysis;
+
+/** Operator of the simple analyze result panel */
+public interface SimpleAnalyzeResultPanelOperator extends ComponentOperatorRegistry.ComponentOperator {
+
+ void setAnalysisModel(Analysis analysisModel);
+
+ void executeAnalysis(String text);
+
+ void clearTable();
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java
new file mode 100644
index 0000000..5e0c077
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.fragments.analysis;
+
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JTable;
+import javax.swing.ListSelectionModel;
+
+import java.awt.BorderLayout;
+import java.awt.FlowLayout;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry;
+import org.apache.lucene.luke.app.desktop.components.TableColumnInfo;
+import org.apache.lucene.luke.app.desktop.components.TableModelBase;
+import org.apache.lucene.luke.app.desktop.components.dialog.analysis.TokenAttributeDialogFactory;
+import org.apache.lucene.luke.app.desktop.util.DialogOpener;
+import org.apache.lucene.luke.app.desktop.util.MessageUtils;
+import org.apache.lucene.luke.app.desktop.util.TableUtils;
+import org.apache.lucene.luke.models.analysis.Analysis;
+
+/** Provider of the simple analyze result panel */
+public class SimpleAnalyzeResultPanelProvider implements SimpleAnalyzeResultPanelOperator {
+
+ private final ComponentOperatorRegistry operatorRegistry;
+
+ private final TokenAttributeDialogFactory tokenAttrDialogFactory;
+
+ private final JTable tokensTable = new JTable();
+
+ private final ListenerFunctions listeners = new ListenerFunctions();
+
+ private Analysis analysisModel;
+
+ private List<Analysis.Token> tokens;
+
+ public SimpleAnalyzeResultPanelProvider(TokenAttributeDialogFactory tokenAttrDialogFactory) {
+ this.operatorRegistry = ComponentOperatorRegistry.getInstance();
+ operatorRegistry.register(SimpleAnalyzeResultPanelOperator.class, this);
+ this.tokenAttrDialogFactory = tokenAttrDialogFactory;
+ }
+
+ public JPanel get() {
+ JPanel panel = new JPanel(new BorderLayout());
+ panel.setOpaque(false);
+
+ JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING));
+ hint.setOpaque(false);
+ hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes")));
+ panel.add(hint, BorderLayout.PAGE_START);
+
+ TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(),
+ new MouseAdapter() {
+ @Override
+ public void mouseClicked(MouseEvent e) {
+ listeners.showAttributeValues(e);
+ }
+ },
+ TokensTableModel.Column.TERM.getColumnWidth(),
+ TokensTableModel.Column.ATTR.getColumnWidth());
+ panel.add(new JScrollPane(tokensTable), BorderLayout.CENTER);
+
+ return panel;
+ }
+
+ @Override
+ public void setAnalysisModel(Analysis analysisModel) {
+ this.analysisModel = analysisModel;
+ }
+
+ @Override
+ public void executeAnalysis(String text) {
+ tokens = analysisModel.analyze(text);
+ tokensTable.setModel(new TokensTableModel(tokens));
+ tokensTable.setShowGrid(true);
+ tokensTable.getColumnModel().getColumn(TokensTableModel.Column.TERM.getIndex())
+ .setPreferredWidth(TokensTableModel.Column.TERM.getColumnWidth());
+ tokensTable.getColumnModel().getColumn(TokensTableModel.Column.ATTR.getIndex())
+ .setPreferredWidth(TokensTableModel.Column.ATTR.getColumnWidth());
+ }
+
+ @Override
+ public void clearTable() {
+ TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(),
+ null,
+ TokensTableModel.Column.TERM.getColumnWidth(),
+ TokensTableModel.Column.ATTR.getColumnWidth());
+ }
+
+ private void showAttributeValues(int selectedIndex) {
+ String term = tokens.get(selectedIndex).getTerm();
+ List<Analysis.TokenAttribute> attributes = tokens.get(selectedIndex).getAttributes();
+ new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400,
+ factory -> {
+ factory.setTerm(term);
+ factory.setAttributes(attributes);
+ });
+ }
+
+ private class ListenerFunctions {
+
+ void showAttributeValues(MouseEvent e) {
+ if (e.getClickCount() != 2 || e.isConsumed()) {
+ return;
+ }
+ int selectedIndex = tokensTable.rowAtPoint(e.getPoint());
+ if (selectedIndex < 0 || selectedIndex >= tokensTable.getRowCount()) {
+ return;
+ }
+ SimpleAnalyzeResultPanelProvider.this.showAttributeValues(selectedIndex);
+ }
+ }
+
+ /** Table model for simple result */
+ private static class TokensTableModel extends TableModelBase<TokensTableModel.Column> {
+
+ enum Column implements TableColumnInfo {
+ TERM("Term", 0, String.class, 150),
+ ATTR("Attributes", 1, String.class, 1000);
+
+ private final String colName;
+ private final int index;
+ private final Class<?> type;
+ private final int width;
+
+ Column(String colName, int index, Class<?> type, int width) {
+ this.colName = colName;
+ this.index = index;
+ this.type = type;
+ this.width = width;
+ }
+
+ @Override
+ public String getColName() {
+ return colName;
+ }
+
+ @Override
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public Class<?> getType() {
+ return type;
+ }
+
+ @Override
+ public int getColumnWidth() {
+ return width;
+ }
+ }
+
+ TokensTableModel() {
+ super();
+ }
+
+ TokensTableModel(List<Analysis.Token> tokens) {
+ super(tokens.size());
+ for (int i = 0; i < tokens.size(); i++) {
+ Analysis.Token token = tokens.get(i);
+ data[i][Column.TERM.getIndex()] = token.getTerm();
+ List<String> attValues = token.getAttributes().stream()
+ .flatMap(att -> att.getAttValues().entrySet().stream()
+ .map(e -> e.getKey() + "=" + e.getValue()))
+ .collect(Collectors.toList());
+ data[i][Column.ATTR.getIndex()] = String.join(",", attValues);
+ }
+ }
+
+ @Override
+ protected Column[] columnInfos() {
+ return Column.values();
+ }
+ }
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java
new file mode 100644
index 0000000..2311e59
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.fragments.analysis;
+
+import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry;
+import org.apache.lucene.luke.models.analysis.Analysis;
+
+/** Operator of the Step by step analyze result panel */
+public interface StepByStepAnalyzeResultPanelOperator extends ComponentOperatorRegistry.ComponentOperator {
+
+ void setAnalysisModel(Analysis analysisModel);
+
+ void executeAnalysisStepByStep(String text);
+
+ void clearTable();
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java
new file mode 100644
index 0000000..2ef696b
--- /dev/null
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.luke.app.desktop.components.fragments.analysis;
+
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JSplitPane;
+import javax.swing.JTable;
+import javax.swing.ListSelectionModel;
+import javax.swing.table.AbstractTableModel;
+
+import java.awt.BorderLayout;
+import java.awt.Dimension;
+import java.awt.FlowLayout;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry;
+import org.apache.lucene.luke.app.desktop.components.TableColumnInfo;
+import org.apache.lucene.luke.app.desktop.components.TableModelBase;
+import org.apache.lucene.luke.app.desktop.components.dialog.analysis.TokenAttributeDialogFactory;
+import org.apache.lucene.luke.app.desktop.util.DialogOpener;
+import org.apache.lucene.luke.app.desktop.util.MessageUtils;
+import org.apache.lucene.luke.app.desktop.util.TableUtils;
+import org.apache.lucene.luke.models.analysis.Analysis;
+
+/** Provider of the Step by step analyze result panel */
+public class StepByStepAnalyzeResultPanelProvider implements StepByStepAnalyzeResultPanelOperator {
+
+ private final ComponentOperatorRegistry operatorRegistry;
+
+ private final TokenAttributeDialogFactory tokenAttrDialogFactory;
+
+ private final JTable charfilterTextsTable = new JTable();
+
+ private final JTable charfilterTextsRowHeader = new JTable();
+
+ private final JTable namedTokensTable = new JTable();
+
+ private final JTable namedTokensRowHeader = new JTable();
+
+ private final ListenerFunctions listeners = new ListenerFunctions();
+
+ private Analysis analysisModel;
+
+ private Analysis.StepByStepResult result;
+
+ public StepByStepAnalyzeResultPanelProvider(TokenAttributeDialogFactory tokenAttrDialogFactory) {
+ this.operatorRegistry = ComponentOperatorRegistry.getInstance();
+ operatorRegistry.register(StepByStepAnalyzeResultPanelOperator.class, this);
+ this.tokenAttrDialogFactory = tokenAttrDialogFactory;
+ }
+
+ public JPanel get() {
+ JPanel panel = new JPanel(new BorderLayout());
+ panel.setOpaque(false);
+
+ JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING));
+ hint.setOpaque(false);
+ hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes_step_by_step")));
+ panel.add(hint, BorderLayout.PAGE_START);
+
+ TableUtils.setupTable(charfilterTextsRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(),
+ null);
+ TableUtils.setupTable(charfilterTextsTable, ListSelectionModel.SINGLE_SELECTION, new CharfilterTextTableModel(),
+ null);
+
+ TableUtils.setupTable(namedTokensRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(),
+ null);
+ TableUtils.setupTable(namedTokensTable, ListSelectionModel.SINGLE_SELECTION, new NamedTokensTableModel(),
+ new MouseAdapter() {
+ @Override
+ public void mouseClicked(MouseEvent e) {
+ listeners.showAttributeValues(e);
+ }
+ });
+ namedTokensTable.setColumnSelectionAllowed(true);
+ JSplitPane inner = new JSplitPane(JSplitPane.VERTICAL_SPLIT, initResultScroll(charfilterTextsTable, charfilterTextsRowHeader), initResultScroll(namedTokensTable, namedTokensRowHeader));
+ inner.setDividerLocation(60);
+
+ panel.add(inner, BorderLayout.CENTER);
+ return panel;
+ }
+
+ private JScrollPane initResultScroll(JTable table, JTable header) {
+ JScrollPane scroll = new JScrollPane(table);
+ scroll.setRowHeaderView(header);
+ scroll.setCorner(JScrollPane.UPPER_LEFT_CORNER, header.getTableHeader());
+ Dimension tsz = new Dimension(200, header.getPreferredSize().height);
+ scroll.getRowHeader().setPreferredSize(tsz);
+ return scroll;
+ }
+
+
+ @Override
+ public void setAnalysisModel(Analysis analysisModel) {
+ this.analysisModel = analysisModel;
+ }
+
+ @Override
+ public void executeAnalysisStepByStep(String text) {
+ result = analysisModel.analyzeStepByStep(text);
+ RowHeaderTableModel charfilterTextsHeaderModel = new RowHeaderTableModel(result.getCharfilteredTexts());
+ charfilterTextsRowHeader.setModel(charfilterTextsHeaderModel);
+ charfilterTextsRowHeader.setShowGrid(true);
+
+ CharfilterTextTableModel charfilterTextTableModel = new CharfilterTextTableModel(result.getCharfilteredTexts());
+ charfilterTextsTable.setModel(charfilterTextTableModel);
+ charfilterTextsTable.setShowGrid(true);
+
+ RowHeaderTableModel namedTokensHeaderModel = new RowHeaderTableModel(result.getNamedTokens());
+ namedTokensRowHeader.setModel(namedTokensHeaderModel);
+ namedTokensRowHeader.setShowGrid(true);
+
+ NamedTokensTableModel tableModel = new NamedTokensTableModel(result.getNamedTokens());
+ namedTokensTable.setModel(tableModel);
+ namedTokensTable.setShowGrid(true);
+ for (int i = 0; i < tableModel.getColumnCount(); i++) {
+ namedTokensTable.getColumnModel().getColumn(i).setPreferredWidth(tableModel.getColumnWidth(i));
+ }
+ }
+
+ @Override
+ public void clearTable() {
+ TableUtils.setupTable(charfilterTextsRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(),
+ null);
+ TableUtils.setupTable(charfilterTextsTable, ListSelectionModel.SINGLE_SELECTION, new CharfilterTextTableModel(),
+ null);
+
+ TableUtils.setupTable(namedTokensRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(),
+ null);
+ TableUtils.setupTable(namedTokensTable, ListSelectionModel.SINGLE_SELECTION, new NamedTokensTableModel(),
+ null);
+ }
+
+ private void showAttributeValues(int rowIndex, int columnIndex) {
+ Analysis.NamedTokens namedTokens =
+ this.result.getNamedTokens().get(rowIndex);
+ List<Analysis.Token> tokens = namedTokens.getTokens();
+
+ if (rowIndex <= tokens.size()) {
+ String term = "\"" + tokens.get(columnIndex).getTerm() + "\" BY " + namedTokens.getName();
+ List<Analysis.TokenAttribute> attributes = tokens.get(columnIndex).getAttributes();
+ new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400,
+ factory -> {
+ factory.setTerm(term);
+ factory.setAttributes(attributes);
+ });
+ }
+ }
+
+ private class ListenerFunctions {
+ void showAttributeValues(MouseEvent e) {
+ if (e.getClickCount() != 2 || e.isConsumed()) {
+ return;
+ }
+ int rowIndex = namedTokensTable.rowAtPoint(e.getPoint());
+ int columnIndex = namedTokensTable.columnAtPoint(e.getPoint());
+ if (rowIndex < 0 || rowIndex >= namedTokensTable.getRowCount()) {
+ return;
+ } else if (columnIndex < 0 || columnIndex >= namedTokensTable.getColumnCount()) {
+ return;
+ }
+ StepByStepAnalyzeResultPanelProvider.this.showAttributeValues(rowIndex, columnIndex);
+ }
+ }
+
+ /** Table model for row header (display charfilter/tokenizer/filter name) */
+ private static class RowHeaderTableModel extends TableModelBase<RowHeaderTableModel.Column> {
+
+ enum Column implements TableColumnInfo {
+ NAME("Name", 0, String.class, 200);
+
+ private final String colName;
+ private final int index;
+ private final Class<?> type;
+ private final int width;
+
+ Column(String colName, int index, Class<?> type, int width) {
+ this.colName = colName;
+ this.index = index;
+ this.type = type;
+ this.width = width;
+ }
+
+ @Override
+ public String getColName() {
+ return colName;
+ }
+
+ @Override
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public Class<?> getType() {
+ return type;
+ }
+
+ @Override
+ public int getColumnWidth() {
+ return width;
+ }
+ }
+
+ RowHeaderTableModel() {
+ super();
+ }
+
+ RowHeaderTableModel(List<? extends Analysis.NamedObject> namedObjects) {
+ super(namedObjects.size());
+ for (int i = 0; i < namedObjects.size(); i++) {
+ data[i][0] = shortenName(namedObjects.get(i).getName());
+ }
+ }
+
+ @Override
+ protected Column[] columnInfos() {
+ return Column.values();
+ }
+ }
+
+ /** Table model for charfilter result */
+ private static class CharfilterTextTableModel extends TableModelBase<CharfilterTextTableModel.Column> {
+
+ enum Column implements TableColumnInfo {
+ TEXT("Text", 0, String.class, 1000);
+
+ private final String colName;
+ private final int index;
+ private final Class<?> type;
+ private final int width;
+
+ Column(String colName, int index, Class<?> type, int width) {
+ this.colName = colName;
+ this.index = index;
+ this.type = type;
+ this.width = width;
+ }
+
+ @Override
+ public String getColName() {
+ return colName;
+ }
+
+ @Override
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public Class<?> getType() {
+ return type;
+ }
+
+ @Override
+ public int getColumnWidth() {
+ return width;
+ }
+ }
+
+ CharfilterTextTableModel() {
+ super();
+ }
+
+ CharfilterTextTableModel(List<Analysis.CharfilteredText> charfilteredTexts) {
+ super(charfilteredTexts.size());
+ for (int i = 0; i < charfilteredTexts.size(); i++) {
+ data[i][Column.TEXT.getIndex()] = charfilteredTexts.get(i).getText();
+ }
+ }
+
+ @Override
+ protected Column[] columnInfos() {
+ return Column.values();
+ }
+ }
+
+ /** Table model for tokenizer/filter result */
+ private static class NamedTokensTableModel extends AbstractTableModel {
+
+ class Column implements TableColumnInfo {
+
+ private final String colName;
+ private final int index;
+ private final Class<?> type;
+ private final int width;
+
+ Column(String colName, int index, Class<?> type, int width) {
+ this.colName = colName;
+ this.index = index;
+ this.type = type;
+ this.width = width;
+ }
+
+ @Override
+ public String getColName() {
+ return colName;
+ }
+
+ @Override
+ public int getIndex() {
+ return index;
+ }
+
+ @Override
+ public Class<?> getType() {
+ return type;
+ }
+
+ @Override
+ public int getColumnWidth() {
+ return width;
+ }
+ }
+
+ private final Map<Integer, Column> columnMap = new TreeMap<>();
+
+ private final Object[][] data;
+
+
+ NamedTokensTableModel() {
+ this.data = new Object[0][0];
+ }
+
+ // Currently this only show each tokenizer/filters result independently,
+ // so the result doesn't show deletion/separation by next filter,
+ // e.g. "library" by WordDelimiterFilter is different position between other output.
+ NamedTokensTableModel(List<Analysis.NamedTokens> namedTokens) {
+ int maxColumnSize = 0;
+ Analysis.NamedTokens namedToken;
+ for (Analysis.NamedTokens tokens : namedTokens) {
+ namedToken = tokens;
+ if (maxColumnSize < namedToken.getTokens().size()) {
+ maxColumnSize = namedToken.getTokens().size();
+ }
+ }
+ int rowSize = namedTokens.size();
+ this.data = new Object[rowSize][maxColumnSize];
+
+ for (int i = 0; i < namedTokens.size(); i++) {
+ namedToken = namedTokens.get(i);
+ data[i][0] = shortenName(namedToken.getName());
+ for (int j = 0; j < namedToken.getTokens().size(); j++) {
+ Analysis.Token token = namedToken.getTokens().get(j);
+ data[i][j] = token.getTerm();
+ if (maxColumnSize == namedToken.getTokens().size()) {
+ columnMap.put(j, new Column(String.valueOf(j), j, String.class, 200));
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getRowCount() {
+ return data.length;
+ }
+
+ @Override
+ public int getColumnCount() {
+ return columnMap.size();
+ }
+
+
+ @Override
+ public String getColumnName(int colIndex) {
+ if (columnMap.containsKey(colIndex)) {
+ return columnMap.get(colIndex).getColName();
+ }
+ return "";
+ }
+
+ @Override
+ public Class<?> getColumnClass(int colIndex) {
+ if (columnMap.containsKey(colIndex)) {
+ return columnMap.get(colIndex).getType();
+ }
+ return Object.class;
+ }
+
+ @Override
+ public Object getValueAt(int rowIndex, int columnIndex) {
+ return data[rowIndex][columnIndex];
+ }
+
+ public int getColumnWidth(int columnIndex) {
+ return columnMap.get(columnIndex).getColumnWidth();
+ }
+ }
+
+ private static String shortenName(String name) {
+ return name.substring(name.lastIndexOf('.') + 1);
+ }
+
+}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java
index 83edf5b..355223f 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java
@@ -87,6 +87,73 @@ public interface Analysis {
}
}
+
+ /** Base class for named object */
+ abstract class NamedObject {
+ private final String name;
+
+ NamedObject(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ /**
+ * Holder for a pair tokenizer/filter and token list
+ */
+ class NamedTokens extends NamedObject {
+ private final List<Token> tokens;
+
+ NamedTokens(String name, List<Token> tokens) {
+ super(name);
+ this.tokens = tokens;
+ }
+
+ public List<Token> getTokens() {
+ return tokens;
+ }
+ }
+
+ /**
+ * Holder for a charfilter name and text that output by the charfilter
+ */
+ class CharfilteredText extends NamedObject {
+ private final String text;
+
+ public CharfilteredText(String name, String text) {
+ super(name);
+ this.text = text;
+ }
+
+ public String getText() {
+ return text;
+ }
+ }
+
+ /**
+ * Step-by-step analysis result holder.
+ */
+ class StepByStepResult {
+ private List<CharfilteredText> charfilteredTexts;
+ private List<NamedTokens> namedTokens;
+
+ public StepByStepResult(List<CharfilteredText> charfilteredTexts, List<NamedTokens> namedTokens) {
+ this.charfilteredTexts = charfilteredTexts;
+ this.namedTokens = namedTokens;
+ }
+
+ public List<CharfilteredText> getCharfilteredTexts() {
+ return charfilteredTexts;
+ }
+
+ public List<NamedTokens> getNamedTokens() {
+ return namedTokens;
+ }
+ }
+
/**
* Returns built-in {@link Analyzer}s.
*/
@@ -148,4 +215,13 @@ public interface Analysis {
*/
void addExternalJars(List<String> jarFiles);
+
+ /**
+ * Analyzes given text with the current Analyzer.
+ *
+ * @param text - text string to analyze
+ * @return the list of text by charfilter and the list of pair of Tokenizer/TokenFilter name and tokens
+ */
+ StepByStepResult analyzeStepByStep(String text);
+
}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java
index b4d6063..589e90f 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java
@@ -18,6 +18,8 @@
package org.apache.lucene.luke.models.analysis;
import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.net.URLClassLoader;
@@ -38,6 +40,7 @@ import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharFilterFactory;
@@ -46,6 +49,8 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.luke.models.LukeException;
import org.apache.lucene.luke.util.reflection.ClassScanner;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IOUtils;
/** Default implementation of {@link AnalysisImpl} */
public final class AnalysisImpl implements Analysis {
@@ -131,7 +136,6 @@ public final class AnalysisImpl implements Analysis {
try {
List<Token> result = new ArrayList<>();
-
TokenStream stream = analyzer.tokenStream("", text);
stream.reset();
@@ -139,19 +143,7 @@ public final class AnalysisImpl implements Analysis {
// iterate tokens
while (stream.incrementToken()) {
- List<TokenAttribute> attributes = new ArrayList<>();
- Iterator<AttributeImpl> itr = stream.getAttributeImplsIterator();
-
- while (itr.hasNext()) {
- AttributeImpl att = itr.next();
- Map<String, String> attValues = new LinkedHashMap<>();
- att.reflectWith((attClass, key, value) -> {
- if (value != null)
- attValues.put(key, value.toString());
- });
- attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues));
- }
-
+ List<TokenAttribute> attributes = copyAttributes(stream, charAtt);
result.add(new Token(charAtt.toString(), attributes));
}
stream.close();
@@ -162,6 +154,21 @@ public final class AnalysisImpl implements Analysis {
}
}
+ private List<TokenAttribute> copyAttributes(TokenStream tokenStream, CharTermAttribute charAtt) {
+ List<TokenAttribute> attributes = new ArrayList<>();
+ Iterator<AttributeImpl> itr = tokenStream.getAttributeImplsIterator();
+ while(itr.hasNext()) {
+ AttributeImpl att = itr.next();
+ Map<String, String> attValues = new LinkedHashMap<>();
+ att.reflectWith((attClass, key, value) -> {
+ if (value != null)
+ attValues.put(key, value.toString());
+ });
+ attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues));
+ }
+ return attributes;
+ }
+
@Override
public Analyzer createAnalyzerFromClassName(String analyzerType) {
Objects.requireNonNull(analyzerType);
@@ -213,4 +220,160 @@ public final class AnalysisImpl implements Analysis {
return analyzer;
}
+ @Override
+ public StepByStepResult analyzeStepByStep(String text){
+ Objects.requireNonNull(text);
+ if (analyzer == null) {
+ throw new LukeException("Analyzer is not set.");
+ }
+
+ if (!(analyzer instanceof CustomAnalyzer)) {
+ throw new LukeException("Analyzer is not CustomAnalyzer.");
+ }
+
+ List<NamedTokens> namedTokens = new ArrayList<>();
+ List<CharfilteredText> charfilteredTexts = new ArrayList<>();
+
+ try {
+ CustomAnalyzer customAnalyzer = (CustomAnalyzer)analyzer;
+ final List<CharFilterFactory> charFilterFactories = customAnalyzer.getCharFilterFactories();
+ Reader reader = new StringReader(text);
+ String charFilteredSource = text;
+ if (charFilterFactories.size() > 0) {
+ Reader cs = reader;
+ for (CharFilterFactory charFilterFactory : charFilterFactories) {
+ cs = charFilterFactory.create(reader);
+ Reader readerForWriteOut = new StringReader(charFilteredSource);
+ readerForWriteOut = charFilterFactory.create(readerForWriteOut);
+ charFilteredSource = writeCharStream(readerForWriteOut);
+ charfilteredTexts.add(new CharfilteredText(readerForWriteOut.getClass().getName(), charFilteredSource));
+ }
+ reader = cs;
+ }
+
+ final TokenizerFactory tokenizerFactory = customAnalyzer.getTokenizerFactory();
+ final List<TokenFilterFactory> tokenFilterFactories = customAnalyzer.getTokenFilterFactories();
+
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(reader);
+ List<Token> tokens = new ArrayList<>();
+ List<AttributeSource> attributeSources = analyzeTokenStream(tokenStream, tokens);
+ namedTokens.add(new NamedTokens(tokenStream.getClass().getName(), tokens));
+ ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, attributeSources);
+ for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
+ tokenStream = tokenFilterFactory.create(listBasedTokenStream);
+ tokens = new ArrayList<>();
+ attributeSources = analyzeTokenStream(tokenStream, tokens);
+ namedTokens.add(new NamedTokens(tokenStream.getClass().getName(), tokens));
+ try {
+ listBasedTokenStream.close();
+ } catch (IOException e) {
+ // do nothing;
+ }
+ listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, attributeSources);
+ }
+ try {
+ listBasedTokenStream.close();
+ } catch (IOException e) {
+ // do nothing.
+ } finally {
+ reader.close();
+ }
+ return new StepByStepResult(charfilteredTexts, namedTokens);
+ } catch (Exception e) {
+ throw new LukeException(e.getMessage(), e);
+ }
+ }
+
+ /**
+ * Analyzes the given TokenStream, collecting the Tokens it produces.
+ *
+ * @param tokenStream TokenStream to analyze
+ *
+ * @return List of tokens produced from the TokenStream
+ */
+ private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream, List<Token> result) {
+ final List<AttributeSource> tokens = new ArrayList<>();
+ try {
+ tokenStream.reset();
+ CharTermAttribute charAtt = tokenStream.getAttribute(CharTermAttribute.class);
+ while (tokenStream.incrementToken()) {
+ tokens.add(tokenStream.cloneAttributes());
+ List<TokenAttribute> attributes = copyAttributes(tokenStream, charAtt);
+ result.add(new Token(charAtt.toString(), attributes));
+ }
+ tokenStream.end();
+ } catch (IOException ioe) {
+ throw new RuntimeException("Error occurred while iterating over TokenStream", ioe);
+ } finally {
+ IOUtils.closeWhileHandlingException(tokenStream);
+ }
+ return tokens;
+ }
+
+ /**
+ * TokenStream that iterates over a list of pre-existing Tokens
+ * see org.apache.solr.handler.AnalysisRequestHandlerBase#ListBasedTokenStream
+ */
+ protected final static class ListBasedTokenStream extends TokenStream {
+ private final List<AttributeSource> tokens;
+ private Iterator<AttributeSource> tokenIterator;
+
+ /**
+ * Creates a new ListBasedTokenStream which uses the given tokens as its token source.
+ *
+ * @param attributeSource source of the attribute factory and attribute impls
+ * @param tokens Source of tokens to be used
+ */
+ ListBasedTokenStream(AttributeSource attributeSource, List<AttributeSource> tokens) {
+ super(attributeSource.getAttributeFactory());
+ this.tokens = tokens;
+ // Make sure all the attributes of the source are here too
+ addAttributes(attributeSource);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ tokenIterator = tokens.iterator();
+ }
+
+ @Override
+ public boolean incrementToken() {
+ if (tokenIterator.hasNext()) {
+ clearAttributes();
+ AttributeSource next = tokenIterator.next();
+ addAttributes(next);
+ next.copyTo(this);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ void addAttributes(AttributeSource attributeSource) {
+ Iterator<AttributeImpl> atts = attributeSource.getAttributeImplsIterator();
+ while (atts.hasNext()) {
+ addAttributeImpl(atts.next()); // adds both impl & interfaces
+ }
+ }
+ }
+
+ private static String writeCharStream(Reader input ){
+ final int BUFFER_SIZE = 1024;
+ char[] buf = new char[BUFFER_SIZE];
+ int len = 0;
+ StringBuilder sb = new StringBuilder();
+ do {
+ try {
+ len = input.read( buf, 0, BUFFER_SIZE );
+ } catch (IOException e) {
+ throw new RuntimeException("Error occurred while iterating over charfiltering", e);
+ }
+ if( len > 0 )
+ sb.append(buf, 0, len);
+ } while( len == BUFFER_SIZE );
+ return sb.toString();
+ }
+
}
diff --git a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
index 94fe406..e6fed08 100644
--- a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
+++ b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties
@@ -172,6 +172,7 @@ analysis.radio.custom=Custom
analysis.button.browse=Browse
analysis.button.build_analyzser=Build Analyzer
analysis.button.test=Test Analyzer
+analysis.checkbox.step_by_step=Step By Step
analysis.hyperlink.load_jars=Load external jars
analysis.textarea.prompt=Apache Lucene is a high-performance, full-featured text search engine library.
analysis.dialog.title.char_filter_params=CharFilter parameters
@@ -186,6 +187,7 @@ analysis.dialog.chain.label.tokenfilters=Token Filters:
analysis.message.build_success=Custom analyzer built successfully.
analysis.message.empry_input=Please input text to analyze.
analysis.hint.show_attributes=Hint: Double click the row to show all token attributes.
+analysis.hint.show_attributes_step_by_step=Hint: Double click the cell to show all token attributes.
analysis_preset.label.preset=Preset analyzers:
analysis_custom.label.charfilters=Char Filters
analysis_custom.label.tokenizer=Tokenizer
diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java b/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java
index 2ce1084..d15a1e9 100644
--- a/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java
+++ b/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java
@@ -132,5 +132,45 @@ public class AnalysisImplTest extends LuceneTestCase {
analysis.analyze(text);
}
+ @Test(expected = LukeException.class)
+ public void testAnalyzeStepByStep_preset() {
+ AnalysisImpl analysis = new AnalysisImpl();
+ String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+ Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType);
+ assertEquals(analyzerType, analyzer.getClass().getName());
+
+ String text = "This test must fail.";
+ analysis.analyzeStepByStep(text);
+ }
+
+ @Test
+ public void testAnalyzeStepByStep_custom() {
+ AnalysisImpl analysis = new AnalysisImpl();
+ Map<String, String> tkParams = new HashMap<>();
+ tkParams.put("maxTokenLen", "128");
+ CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder("keyword", tkParams)
+ .addTokenFilterConfig("lowercase", Collections.emptyMap())
+ .addCharFilterConfig("htmlstrip", Collections.emptyMap());
+ CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
+ assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
+ assertEquals("org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory",
+ analyzer.getCharFilterFactories().get(0).getClass().getName());
+ assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory",
+ analyzer.getTokenizerFactory().getClass().getName());
+ assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory",
+ analyzer.getTokenFilterFactories().get(0).getClass().getName());
+ String text = "Apache Lucene";
+ Analysis.StepByStepResult result = analysis.analyzeStepByStep(text);
+ assertNotNull(result);
+ assertNotNull(result.getCharfilteredTexts());
+ assertEquals(1,result.getCharfilteredTexts().size());
+ assertEquals("org.apache.lucene.analysis.charfilter.HTMLStripCharFilter", result.getCharfilteredTexts().get(0).getName());
+
+ assertNotNull(result.getNamedTokens());
+ assertEquals(2, result.getNamedTokens().size());
+ //FIXME check each namedTokensList
+ assertEquals("org.apache.lucene.analysis.core.KeywordTokenizer", result.getNamedTokens().get(0).getName());
+ assertEquals("org.apache.lucene.analysis.core.LowerCaseFilter", result.getNamedTokens().get(1).getName());
+ }
}