You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2019/06/20 23:00:44 UTC

[lucene-solr] branch branch_8x updated (45b000e -> 9e017ba)

This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a change to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 45b000e  SOLR-13419 - Better infix for TRA collection names
     new bdaf10f  LUCENE-8863: enhance Kuromoji DictionaryBuilder tool  added tests  enabled ids up to 8191  support loading custom system dictionary from filesystem or classpath
     new 9e017ba  Add missing javadocs for new BinaryDictionary.ResourceScheme

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/analysis/kuromoji/build.xml                 |  4 +-
 .../lucene/analysis/ja/dict/BinaryDictionary.java  | 57 +++++++++++++--
 .../analysis/ja/dict/TokenInfoDictionary.java      | 15 +++-
 .../analysis/ja/util/BinaryDictionaryWriter.java   | 43 ++++++++---
 .../ja/util/TokenInfoDictionaryBuilder.java        |  3 +-
 .../analysis/ja/dict/TokenInfoDictionaryTest.java  | 85 ++++++++++++++++++++++
 .../analysis/ja/dict/UnknownDictionaryTest.java    | 22 +-----
 7 files changed, 188 insertions(+), 41 deletions(-)
 create mode 100644 lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java


[lucene-solr] 02/02: Add missing javadocs for new BinaryDictionary.ResourceScheme

Posted by so...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 9e017ba83cbcf1570886950fbc3c3731409fa82c
Author: Michael Sokolov <so...@amazon.com>
AuthorDate: Fri Jun 21 00:58:32 2019 +0200

    Add missing javadocs for new BinaryDictionary.ResourceScheme
---
 .../java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
index ca4eb2c..a0e314d 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
@@ -39,7 +39,10 @@ import org.apache.lucene.util.IOUtils;
  */
 public abstract class BinaryDictionary implements Dictionary {
   
-  enum ResourceScheme {
+  /**
+   * Used to specify where (dictionary) resources get loaded from.
+   */
+  public enum ResourceScheme {
     CLASSPATH, FILE
   }
 


[lucene-solr] 01/02: LUCENE-8863: enhance Kuromoji DictionaryBuilder tool added tests enabled ids up to 8191 support loading custom system dictionary from filesystem or classpath

Posted by so...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit bdaf10ffa82644c1e2ca74ce6be6e9ef5ee5ba68
Author: Michael Sokolov <so...@amazon.com>
AuthorDate: Sat Jun 15 14:59:46 2019 -0400

    LUCENE-8863: enhance Kuromoji DictionaryBuilder tool
     added tests
     enabled ids up to 8191
     support loading custom system dictionary from filesystem or classpath
---
 lucene/analysis/kuromoji/build.xml                 |  4 +-
 .../lucene/analysis/ja/dict/BinaryDictionary.java  | 54 ++++++++++++--
 .../analysis/ja/dict/TokenInfoDictionary.java      | 15 +++-
 .../analysis/ja/util/BinaryDictionaryWriter.java   | 43 ++++++++---
 .../ja/util/TokenInfoDictionaryBuilder.java        |  3 +-
 .../analysis/ja/dict/TokenInfoDictionaryTest.java  | 85 ++++++++++++++++++++++
 .../analysis/ja/dict/UnknownDictionaryTest.java    | 22 +-----
 7 files changed, 185 insertions(+), 41 deletions(-)

diff --git a/lucene/analysis/kuromoji/build.xml b/lucene/analysis/kuromoji/build.xml
index decfa7a..094e2bd 100644
--- a/lucene/analysis/kuromoji/build.xml
+++ b/lucene/analysis/kuromoji/build.xml
@@ -136,8 +136,8 @@
      </compile>
   </target>
 
-  <target name="test-tools" depends="compile-tools-tests">
-    <test-macro dataDir="src/tools/test" junit.classpath="tools.test.classpath"/>
+  <target name="test-tools" depends="install-junit4-taskdef, compile-tools-tests">
+    <test-macro testsDir="${build.dir}/classes/tools-test" workDir="src/tools/test" junit.classpath="tools.test.classpath"/>
   </target>
 
   <target name="compile-test" depends="module-build.compile-test, compile-tools-tests"/>
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
index 180c0a9..ca4eb2c 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
@@ -23,6 +23,8 @@ import java.io.IOException;
 import java.io.FileNotFoundException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 
@@ -37,6 +39,10 @@ import org.apache.lucene.util.IOUtils;
  */
 public abstract class BinaryDictionary implements Dictionary {
   
+  enum ResourceScheme {
+    CLASSPATH, FILE
+  }
+
   public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
   public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
   public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
@@ -46,6 +52,8 @@ public abstract class BinaryDictionary implements Dictionary {
   public static final String POSDICT_HEADER = "kuromoji_dict_pos";
   public static final int VERSION = 1;
   
+  private final ResourceScheme resourceScheme;
+  private final String resourcePath;
   private final ByteBuffer buffer;
   private final int[] targetMapOffsets, targetMap;
   private final String[] posDict;
@@ -53,6 +61,24 @@ public abstract class BinaryDictionary implements Dictionary {
   private final String[] inflFormDict;
   
   protected BinaryDictionary() throws IOException {
+    this(ResourceScheme.CLASSPATH, null);
+  }
+
+  /**
+   * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
+   * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH scheme only, use
+   * this class's name as the path.
+   */
+  protected BinaryDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException {
+    this.resourceScheme = resourceScheme;
+    if (resourcePath == null) {
+      if (resourceScheme != ResourceScheme.CLASSPATH) {
+        throw new IllegalArgumentException("resourcePath must be supplied with FILE resource scheme");
+      }
+      this.resourcePath = getClass().getName().replace('.', '/');
+    } else {
+      this.resourcePath = resourcePath;
+    }
     InputStream mapIS = null, dictIS = null, posIS = null;
     int[] targetMapOffsets = null, targetMap = null;
     String[] posDict = null;
@@ -78,7 +104,9 @@ public abstract class BinaryDictionary implements Dictionary {
         targetMap[ofs] = accum;
       }
       if (sourceId + 1 != targetMapOffsets.length)
-        throw new IOException("targetMap file format broken");
+        throw new IOException("targetMap file format broken; targetMap.length=" + targetMap.length
+                              + ", targetMapOffsets.length=" + targetMapOffsets.length
+                              + ", sourceId=" + sourceId);
       targetMapOffsets[sourceId] = targetMap.length;
       mapIS.close(); mapIS = null;
       
@@ -135,17 +163,33 @@ public abstract class BinaryDictionary implements Dictionary {
   }
   
   protected final InputStream getResource(String suffix) throws IOException {
-    return getClassResource(getClass(), suffix);
+    switch(resourceScheme) {
+      case CLASSPATH:
+        return getClassResource(resourcePath + suffix);
+      case FILE:
+        return Files.newInputStream(Paths.get(resourcePath + suffix));
+      default:
+        throw new IllegalStateException("unknown resource scheme " + resourceScheme);
+    }
   }
   
   // util, reused by ConnectionCosts and CharacterDefinition
   public static final InputStream getClassResource(Class<?> clazz, String suffix) throws IOException {
     final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix);
-    if (is == null)
+    if (is == null) {
       throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.','/') + suffix);
+    }
     return is;
   }
   
+  private InputStream getClassResource(String path) throws IOException {
+    final InputStream is = BinaryDictionary.class.getClassLoader().getResourceAsStream(path);
+    if (is == null) {
+      throw new FileNotFoundException("Not in classpath: " + path);
+    }
+    return is;
+  }
+
   public void lookupWordIds(int sourceId, IntsRef ref) {
     ref.ints = targetMap;
     ref.offset = targetMapOffsets[sourceId];
@@ -155,12 +199,12 @@ public abstract class BinaryDictionary implements Dictionary {
   
   @Override
   public int getLeftId(int wordId) {
-    return buffer.getShort(wordId) >>> 3;
+    return (buffer.getShort(wordId) & 0xffff) >>> 3;
   }
   
   @Override
   public int getRightId(int wordId) {
-    return buffer.getShort(wordId) >>> 3;
+    return (buffer.getShort(wordId) & 0xffff) >>> 3;
   }
   
   @Override
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
index 6b75138..662ebb5 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@@ -36,10 +36,15 @@ public final class TokenInfoDictionary extends BinaryDictionary {
 
   private final TokenInfoFST fst;
   
-  private TokenInfoDictionary() throws IOException {
-    super();
+  /**
+   * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH).
+   * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH scheme only, use
+   * this class's name as the path.
+   */
+  TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException {
+    super(resourceScheme, resourcePath);
     InputStream is = null;
-    FST<Long> fst = null;
+    FST<Long> fst;
     boolean success = false;
     try {
       is = getResource(FST_FILENAME_SUFFIX);
@@ -56,6 +61,10 @@ public final class TokenInfoDictionary extends BinaryDictionary {
     // TODO: some way to configure?
     this.fst = new TokenInfoFST(fst, true);
   }
+
+  private TokenInfoDictionary() throws IOException {
+    this(ResourceScheme.CLASSPATH, null);
+  }
   
   public TokenInfoFST getFST() {
     return fst;
diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
index a6d48cc..a6ef6bb 100644
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
@@ -35,6 +35,8 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
 
 public abstract class BinaryDictionaryWriter {
+  private final static int ID_LIMIT = 8192;
+
   protected final Class<? extends BinaryDictionary> implClazz;
   protected ByteBuffer buffer;
   private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
@@ -71,7 +73,9 @@ public abstract class BinaryDictionaryWriter {
     }
     
     String posData = sb.toString();
-    
+    if (posData.isEmpty()) {
+        throw new IllegalArgumentException("POS fields are empty");
+    }
     sb.setLength(0);
     sb.append(CSVUtil.quoteEscape(posData));
     sb.append(',');
@@ -100,6 +104,9 @@ public abstract class BinaryDictionaryWriter {
     }
 
     int flags = 0;
+    if (baseForm.isEmpty()) {
+        throw new IllegalArgumentException("base form is empty");
+    }
     if (!("*".equals(baseForm) || baseForm.equals(entry[0]))) {
       flags |= BinaryDictionary.HAS_BASEFORM;
     }
@@ -110,8 +117,12 @@ public abstract class BinaryDictionaryWriter {
       flags |= BinaryDictionary.HAS_PRONUNCIATION;
     }
 
-    assert leftId == rightId;
-    assert leftId < 4096; // there are still unused bits
+    if (leftId != rightId) {
+        throw new IllegalArgumentException("rightId != leftId: " + rightId + " " +leftId);
+    }
+    if (leftId >= ID_LIMIT) {
+        throw new IllegalArgumentException("leftId >= " + ID_LIMIT + ": " + leftId);
+    }
     // add pos mapping
     int toFill = 1+leftId - posDict.size();
     for (int i = 0; i < toFill; i++) {
@@ -119,14 +130,19 @@ public abstract class BinaryDictionaryWriter {
     }
     
     String existing = posDict.get(leftId);
-    assert existing == null || existing.equals(fullPOSData);
+    if (existing != null && existing.equals(fullPOSData) == false) {
+        // TODO: test me
+        throw new IllegalArgumentException("Multiple entries found for leftID=" + leftId);
+    }
     posDict.set(leftId, fullPOSData);
     
     buffer.putShort((short)(leftId << 3 | flags));
     buffer.putShort(wordCost);
 
     if ((flags & BinaryDictionary.HAS_BASEFORM) != 0) {
-      assert baseForm.length() < 16;
+      if (baseForm.length() >= 16) {
+        throw new IllegalArgumentException("Length of base form " + baseForm + " is >= 16");
+      }
       int shared = sharedPrefix(entry[0], baseForm);
       int suffix = baseForm.length() - shared;
       buffer.put((byte) (shared << 4 | suffix));
@@ -204,16 +220,17 @@ public abstract class BinaryDictionaryWriter {
   }
   
   public void addMapping(int sourceId, int wordId) {
-    assert wordId > lastWordId : "words out of order: " + wordId + " vs lastID: " + lastWordId;
+    if (wordId <= lastWordId) {
+      throw new IllegalStateException("words out of order: " + wordId + " vs lastID: " + lastWordId);
+    }
     
     if (sourceId > lastSourceId) {
-      assert sourceId > lastSourceId : "source ids out of order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId;
       targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
       for (int i = lastSourceId + 1; i <= sourceId; i++) {
         targetMapOffsets[i] = targetMapEndOffset;
       }
-    } else {
-      assert sourceId == lastSourceId;
+    } else if (sourceId != lastSourceId) {
+      throw new IllegalStateException("source ids not in increasing order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId);
     }
 
     targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
@@ -265,7 +282,9 @@ public abstract class BinaryDictionaryWriter {
         }
         prev += delta;
       }
-      assert sourceId == numSourceIds : "sourceId:"+sourceId+" != numSourceIds:"+numSourceIds;
+      if (sourceId != numSourceIds) {
+        throw new IllegalStateException("sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
+      }
     } finally {
       os.close();
     }
@@ -286,7 +305,9 @@ public abstract class BinaryDictionaryWriter {
           out.writeByte((byte)0);
         } else {
           String data[] = CSVUtil.parse(s);
-          assert data.length == 3 : "malformed pos/inflection: " + s;
+          if (data.length != 3) {
+            throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
+          }
           out.writeString(data[0]);
           out.writeString(data[1]);
           out.writeString(data[2]);
diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
index 7175308..465a432 100644
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
@@ -96,8 +96,7 @@ public class TokenInfoDictionaryBuilder {
         String[] entry = CSVUtil.parse(line);
 
         if(entry.length < 13) {
-          System.out.println("Entry in CSV is not valid: " + line);
-          continue;
+          throw new IllegalArgumentException("Entry in CSV is not valid (13 field values expected): " + line);
         }
         
         String[] formatted = formatEntry(entry);
diff --git a/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java b/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java
new file mode 100644
index 0000000..0f7609f
--- /dev/null
+++ b/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ja.dict;
+
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
+import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryBuilder;
+import org.apache.lucene.analysis.ja.util.TokenInfoDictionaryWriter;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static java.io.File.separatorChar;
+import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.ResourceScheme;
+
+/**
+ * Tests of TokenInfoDictionary build tools; run using ant test-tools
+ */
+public class TokenInfoDictionaryTest extends LuceneTestCase {
+
+  public void testPut() throws Exception {
+    TokenInfoDictionary dict = newDictionary("名詞,1,1,2,名詞,一般,*,*,*,*,*,*,*",
+                                               // "large" id
+                                               "一般,5000,5000,3,名詞,一般,*,*,*,*,*,*,*");
+    IntsRef wordIdRef = new IntsRefBuilder().get();
+
+    dict.lookupWordIds(0, wordIdRef);
+    int wordId = wordIdRef.ints[wordIdRef.offset];
+    assertEquals(5000, dict.getLeftId(wordId));
+    assertEquals(5000, dict.getRightId(wordId));
+    assertEquals(3, dict.getWordCost(wordId));
+
+    dict.lookupWordIds(1, wordIdRef);
+    wordId = wordIdRef.ints[wordIdRef.offset];
+    assertEquals(1, dict.getLeftId(wordId));
+    assertEquals(1, dict.getRightId(wordId));
+    assertEquals(2, dict.getWordCost(wordId));
+  }
+
+  private TokenInfoDictionary newDictionary(String... entries) throws Exception {
+    Path dir = createTempDir();
+    try (OutputStream out = Files.newOutputStream(dir.resolve("test.csv"));
+         PrintWriter printer = new PrintWriter(new OutputStreamWriter(out, "utf-8"))) {
+      for (String entry : entries) {
+        printer.println(entry);
+      }
+    }
+    TokenInfoDictionaryBuilder builder = new TokenInfoDictionaryBuilder(DictionaryFormat.IPADIC, "utf-8", true);
+    TokenInfoDictionaryWriter writer = builder.build(dir.toString());
+    writer.write(dir.toString());
+    String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', separatorChar);
+    // We must also load the other files (in BinaryDictionary) from the correct path
+    return new TokenInfoDictionary(ResourceScheme.FILE, dir.resolve(dictionaryPath).toString());
+  }
+
+  public void testPutException() throws Exception {
+    // too few columns
+    expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1,1,1,名詞,一般,*,*,*,*,*"));
+    // left id != right id
+    expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1285,1,1,名詞,一般,*,*,*,*,*,*,*"));
+    // left id != right id
+    expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,1285,1,1,名詞,一般,*,*,*,*,*,*,*"));
+    // id too large
+    expectThrows(IllegalArgumentException.class, () -> newDictionary("KANJI,8192,8192,1,名詞,一般,*,*,*,*,*,*,*"));
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java b/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java
index 299e8c6..6330f41 100644
--- a/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java
+++ b/lucene/analysis/kuromoji/src/tools/test/org/apache/lucene/analysis/ja/dict/UnknownDictionaryTest.java
@@ -29,19 +29,9 @@ public class UnknownDictionaryTest extends LuceneTestCase {
   public void testPutCharacterCategory() {
     UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
     
-    try{
-      unkDic.putCharacterCategory(0, "DUMMY_NAME");
-      fail();
-    } catch(Exception e) {
-      
-    }
+    expectThrows(Exception.class, () -> unkDic.putCharacterCategory(0, "DUMMY_NAME"));
     
-    try{
-      unkDic.putCharacterCategory(-1, "KATAKANA");
-      fail();
-    } catch(Exception e) {
-      
-    }
+    expectThrows(Exception.class, () -> unkDic.putCharacterCategory(-1, "KATAKANA"));
     
     unkDic.putCharacterCategory(0, "DEFAULT");
     unkDic.putCharacterCategory(1, "GREEK");
@@ -53,12 +43,8 @@ public class UnknownDictionaryTest extends LuceneTestCase {
   @Test
   public void testPut() {
     UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
-    try{
-      unkDic.put(CSVUtil.parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*"));
-      fail();
-    } catch(Exception e){
-      
-    }
+    expectThrows(NumberFormatException.class, () ->
+                 unkDic.put(CSVUtil.parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*")));
 
     String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
     String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";