You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/02/25 20:22:42 UTC
[incubator-nlpcraft] branch NLPCRAFT-483 updated: RU adapters added (example)
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push:
new 3d2eb56 RU adapters added (example)
3d2eb56 is described below
commit 3d2eb567551a18bf37d694d6aa78d23fff7b5c46
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Feb 25 23:22:30 2022 +0300
RU adapters added (example)
---
...witchModelRu.scala => LightSwitchRuModel.scala} | 15 +++------
.../examples/lightswitch/NCRuPipeline.scala | 37 ++++++++++++++++++++++
.../semantic/NCRuSemanticEntityParser.scala} | 21 ++++++++----
.../enricher/NCRuStopWordsTokenEnricher.scala} | 18 ++++++-----
.../token/parser/NCRuTokenParser.scala} | 10 +++---
.../lightswitch/NCModelValidationSpec.scala | 4 +--
6 files changed, 71 insertions(+), 34 deletions(-)
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
similarity index 89%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
index b36b1b2..5a65ec9 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala
@@ -18,12 +18,12 @@
package org.apache.nlpcraft.examples.lightswitch
import org.apache.nlpcraft.*
-import org.apache.nlpcraft.examples.lightswitch.ru.*
+import org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic.NCRuSemanticEntityParser
import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
-import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
/**
* This example provides very simple implementation for NLI-powered light switch.
@@ -38,16 +38,9 @@ import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
* See 'README.md' file in the same folder for running and testing instructions.
*/
-class LightSwitchModelRu extends NCModel:
+class LightSwitchRuModel extends NCModel:
override val getConfig: NCModelConfig = new NCModelConfig("nlpcraft.lightswitch.ru.ex", "LightSwitch Example Model RU", "1.0")
- override val getPipeline: NCModelPipeline =
- val tp = new NCTokenParserRu
- new NCModelPipelineBuilder(
- tp,
- new NCSemanticEntityParser(new NCSemanticStemmerRu(), tp, "lightswitch_model_ru.yaml")
- ).
- withTokenEnricher(new NCStopWordsTokenEnricherRu()).
- build()
+ override val getPipeline: NCModelPipeline = new NCRuPipeline(new NCRuSemanticEntityParser("lightswitch_model_ru.yaml"))
/**
* Intent and its on-match callback.
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala
new file mode 100644
index 0000000..3fe37d1
--- /dev/null
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.examples.lightswitch
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.NCRuStopWordsTokenEnricher
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.enricher.en.*
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
+
+import java.util
+import java.util.*
+import scala.jdk.CollectionConverters.*
+
+/**
+ * Default RU implementation based on Open Nlp token parser, and stopword token enricher.
+ * Also at least one entity parser must be defined. */
+class NCRuPipeline(parser: NCEntityParser) extends NCModelPipeline:
+ override val getTokenParser: NCTokenParser = new NCRuTokenParser()
+ override val getEntityParsers: util.List[NCEntityParser] = Seq(parser).asJava
+ override val getTokenEnrichers: util.List[NCTokenEnricher] = Seq(new NCRuStopWordsTokenEnricher()).asJava
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
similarity index 59%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
index e49c72c..a0cdd9a 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala
@@ -15,12 +15,21 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic
import opennlp.tools.stemmer.snowball.SnowballStemmer
-import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer
+import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser
+import org.apache.nlpcraft.nlp.entity.parser.semantic.*
-class NCSemanticStemmerRu extends NCSemanticStemmer:
- private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN)
-
- override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString }
+/**
+ *
+ * @param src
+ */
+class NCRuSemanticEntityParser(src: String) extends NCSemanticEntityParser(
+ new NCSemanticStemmer:
+ private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN)
+ override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString }
+ ,
+ new NCRuTokenParser(),
+ src
+)
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
similarity index 74%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
index 0e9c064..e21c3dc 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher
import org.apache.lucene.analysis.ru.RussianAnalyzer
import org.apache.nlpcraft.*
@@ -26,18 +26,20 @@ import scala.jdk.CollectionConverters.*
/**
*
*/
-class NCStopWordsTokenEnricherRu extends NCTokenEnricher:
+class NCRuStopWordsTokenEnricher extends NCTokenEnricher:
private final val stops = RussianAnalyzer.getDefaultStopSet
override def enrich(req: NCRequest, cfg: NCModelConfig, toks: util.List[NCToken]): Unit =
- toks.asScala.foreach(t =>
+ for (t <- toks.asScala)
+ val lemma = t.getLemma
+ lazy val pos = t.getPos
+
t.put(
"stopword",
- t.getLemma.length == 1 && !Character.isLetter(t.getLemma.head) ||
- t.getPos.startsWith("PARTICLE") ||
- t.getPos.startsWith("INTERJECTION") ||
- t.getPos.startsWith("PREP") ||
+ lemma.length == 1 && !Character.isLetter(lemma.head) ||
+ pos.startsWith("PARTICLE") ||
+ pos.startsWith("INTERJECTION") ||
+ pos.startsWith("PREP") ||
stops.contains(t.getLemma) ||
stops.contains(t.getText.toLowerCase)
)
- )
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
similarity index 93%
rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala
rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
index 5bda243..d1ddf1b 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.examples.lightswitch.ru
+package org.apache.nlpcraft.examples.lightswitch.nlp.token.parser
import org.apache.lucene.analysis.ru.RussianAnalyzer
import org.apache.nlpcraft.*
@@ -28,11 +28,9 @@ import org.languagetool.tokenizers.WordTokenizer
import java.util
import scala.jdk.CollectionConverters.*
-object NCTokenParserRu:
+object NCRuTokenParser:
private val tokenizer = new WordTokenizer
-
private case class Span(word: String, start: Int, end: Int)
-
private def nvl(v: String, dflt : => String): String = if v != null then v else dflt
private def split(text: String): Seq[Span] =
@@ -45,9 +43,9 @@ object NCTokenParserRu:
spans.toSeq
-import org.apache.nlpcraft.examples.lightswitch.ru.NCTokenParserRu.*
+import NCRuTokenParser.*
-class NCTokenParserRu extends NCTokenParser:
+class NCRuTokenParser extends NCTokenParser:
override def tokenize(text: String): util.List[NCToken] =
val spans = split(text)
val tags = RussianTagger.INSTANCE.tag(spans.map(_.word).asJava).asScala
diff --git a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
index b6d9d1b..69b5793 100644
--- a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
@@ -26,7 +26,5 @@ import scala.util.Using
* JUnit models validation.
*/
class NCModelValidationSpec:
- private val MDL = new LightSwitchModelRu
-
@Test
- def test(): Unit = Using.resource(new NCModelClient(MDL)) { client => client.validateSamples() }
+ def test(): Unit = Using.resource(new NCModelClient(new LightSwitchRuModel)) { client => client.validateSamples() }