You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/02/25 20:22:42 UTC

[incubator-nlpcraft] branch NLPCRAFT-483 updated: RU adapters added (example)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push:
     new 3d2eb56  RU adapters added (example)
3d2eb56 is described below

commit 3d2eb567551a18bf37d694d6aa78d23fff7b5c46
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Feb 25 23:22:30 2022 +0300

    RU adapters added (example)
---
 ...witchModelRu.scala => LightSwitchRuModel.scala} | 15 +++------
 .../examples/lightswitch/NCRuPipeline.scala        | 37 ++++++++++++++++++++++
 .../semantic/NCRuSemanticEntityParser.scala}       | 21 ++++++++----
 .../enricher/NCRuStopWordsTokenEnricher.scala}     | 18 ++++++-----
 .../token/parser/NCRuTokenParser.scala}            | 10 +++---
 .../lightswitch/NCModelValidationSpec.scala        |  4 +--
 6 files changed, 71 insertions(+), 34 deletions(-)

diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
similarity index 89%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
index b36b1b2..5a65ec9 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
@@ -18,12 +18,12 @@
 package org.apache.nlpcraft.examples.lightswitch
 
 import org.apache.nlpcraft.*
-import org.apache.nlpcraft.examples.lightswitch.ru.*
+import org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic.NCRuSemanticEntityParser
 import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser
 import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
 import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
-import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
 
 /**
   * This example provides very simple implementation for NLI-powered light switch.
@@ -38,16 +38,9 @@ import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
   * See 'README.md' file in the same folder for running and testing instructions.
   */
 
-class LightSwitchModelRu extends NCModel:
+class LightSwitchRuModel extends NCModel:
     override val getConfig: NCModelConfig = new NCModelConfig("nlpcraft.lightswitch.ru.ex", "LightSwitch Example Model RU", "1.0")
-    override val getPipeline: NCModelPipeline =
-        val tp = new NCTokenParserRu
-        new NCModelPipelineBuilder(
-            tp,
-            new NCSemanticEntityParser(new NCSemanticStemmerRu(), tp, "lightswitch_model_ru.yaml")
-        ).
-            withTokenEnricher(new NCStopWordsTokenEnricherRu()).
-            build()
+    override val getPipeline: NCModelPipeline = new NCRuPipeline(new NCRuSemanticEntityParser("lightswitch_model_ru.yaml"))
 
     /**
       * Intent and its on-match callback.
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala
new file mode 100644
index 0000000..3fe37d1
--- /dev/null
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.examples.lightswitch
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.NCRuStopWordsTokenEnricher
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.enricher.en.*
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
+
+import java.util
+import java.util.*
+import scala.jdk.CollectionConverters.*
+
+/**
+  * Default RU implementation based on Open Nlp token parser, and stopword token enricher.
+  * Also at least one entity parser must be defined. */
+class NCRuPipeline(parser: NCEntityParser) extends NCModelPipeline:
+    override val getTokenParser: NCTokenParser = new NCRuTokenParser()
+    override val getEntityParsers: util.List[NCEntityParser] = Seq(parser).asJava
+    override val getTokenEnrichers: util.List[NCTokenEnricher] = Seq(new NCRuStopWordsTokenEnricher()).asJava
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
similarity index 59%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
index e49c72c..a0cdd9a 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
@@ -15,12 +15,21 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic
 
 import opennlp.tools.stemmer.snowball.SnowballStemmer
-import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
+import org.apache.nlpcraft.nlp.entity.parser.semantic.*
 
-class NCSemanticStemmerRu extends NCSemanticStemmer:
-    private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN)
-
-    override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString }
+/**
+  *
+  * @param src
+  */
+class NCRuSemanticEntityParser(src: String) extends NCSemanticEntityParser(
+    new NCSemanticStemmer:
+        private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN)
+        override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString }
+    ,
+    new NCRuTokenParser(),
+    src
+)
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
similarity index 74%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
index 0e9c064..e21c3dc 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher
 
 import org.apache.lucene.analysis.ru.RussianAnalyzer
 import org.apache.nlpcraft.*
@@ -26,18 +26,20 @@ import scala.jdk.CollectionConverters.*
 /**
   *
   */
-class NCStopWordsTokenEnricherRu extends NCTokenEnricher:
+class NCRuStopWordsTokenEnricher extends NCTokenEnricher:
     private final val stops = RussianAnalyzer.getDefaultStopSet
 
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: util.List[NCToken]): Unit =
-        toks.asScala.foreach(t =>
+        for (t <- toks.asScala)
+            val lemma = t.getLemma
+            lazy val pos = t.getPos
+
             t.put(
                 "stopword",
-                t.getLemma.length == 1 && !Character.isLetter(t.getLemma.head) ||
-                t.getPos.startsWith("PARTICLE") ||
-                t.getPos.startsWith("INTERJECTION") ||
-                t.getPos.startsWith("PREP") ||
+                lemma.length == 1 && !Character.isLetter(lemma.head) ||
+                pos.startsWith("PARTICLE") ||
+                pos.startsWith("INTERJECTION") ||
+                pos.startsWith("PREP") ||
                 stops.contains(t.getLemma) ||
                 stops.contains(t.getText.toLowerCase)
             )
-        )
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
similarity index 93%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
index 5bda243..d1ddf1b 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.token.parser
 
 import org.apache.lucene.analysis.ru.RussianAnalyzer
 import org.apache.nlpcraft.*
@@ -28,11 +28,9 @@ import org.languagetool.tokenizers.WordTokenizer
 import java.util
 import scala.jdk.CollectionConverters.*
 
-object NCTokenParserRu:
+object NCRuTokenParser:
     private val tokenizer = new WordTokenizer
-
     private case class Span(word: String, start: Int, end: Int)
-
     private def nvl(v: String, dflt : => String): String = if v != null then v else dflt
 
     private def split(text: String): Seq[Span] =
@@ -45,9 +43,9 @@ object NCTokenParserRu:
 
         spans.toSeq
 
-import org.apache.nlpcraft.examples.lightswitch.ru.NCTokenParserRu.*
+import NCRuTokenParser.*
 
-class NCTokenParserRu extends NCTokenParser:
+class NCRuTokenParser extends NCTokenParser:
     override def tokenize(text: String): util.List[NCToken] =
         val spans = split(text)
         val tags = RussianTagger.INSTANCE.tag(spans.map(_.word).asJava).asScala
diff --git a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
index b6d9d1b..69b5793 100644
--- a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
@@ -26,7 +26,5 @@ import scala.util.Using
   * JUnit models validation.
   */
 class NCModelValidationSpec:
-    private val MDL = new LightSwitchModelRu
-
     @Test
-    def test(): Unit = Using.resource(new NCModelClient(MDL)) { client => client.validateSamples() }
+    def test(): Unit = Using.resource(new NCModelClient(new LightSwitchRuModel)) { client => client.validateSamples() }