You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/01 19:57:00 UTC

[incubator-nlpcraft] branch master updated: WIP

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 11b123bc WIP
11b123bc is described below

commit 11b123bc8255eeb44b72464c4f438915f686799d
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Dec 1 11:56:54 2022 -0800

    WIP
---
 .../scala/org/apache/nlpcraft/NCPipeline.scala     | 50 ++++++++--------------
 .../org/apache/nlpcraft/NCPipelineBuilder.scala    | 13 +++---
 .../internal/impl/NCModelPipelineManager.scala     | 14 +++---
 .../apache/nlpcraft/nlp/NCVariantFilterSpec.scala  |  2 +-
 .../apache/nlpcraft/nlp/util/NCTestPipeline.scala  |  4 +-
 5 files changed, 36 insertions(+), 47 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
index c1c24428..bb3d0958 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
@@ -20,47 +20,35 @@ package org.apache.nlpcraft
 /**
   * NLP processing pipeline for the input request. Pipeline is associated with the model.
   *
-  * An NLP pipeline is a container for various processing components that take the input text at the beginning of the
-  * pipeline and produce the list of {@link NCEntity entities} at the end of the pipeline.
+  * An NLP pipeline is a container for the sequence of processing components that take the input text at the beginning
+  * of the pipeline and produce the list of [[NCVariant variants]] at the end of the pipeline.
   * Schematically the pipeline looks like this:
   * <pre>
-  * +----------+        +-----------+
-  * *=========*    +---------+    +---+-------+  |    +---+-------+   |
-  * :  Text   : -> |  Token  | -> | Token     |  | -> | Token      |  | ----.
-  * :  Input  :    |  Parser |    | Enrichers |--+    | Validators |--+      \
-  * *=========*    +---------+    +-----------+       +------------+          \
-  * }
-  * +-----------+        +----------+        +--------+    /
-  * *=========*    +---+--------+  |    +---+-------+  |    +---+-----+  |   /
-  * :  Entity : <- | Entity     |  | <- | Entity    |  | <- | Entity  |  | <-
-  * :  List   :    | Validators |--+    | Enrichers |--+    | Parsers |--+
-  * *=========*    +------------+       +-----------+       +---------+
+  *                                      +----------+        +-----------+         +--------+
+  *   *=========*     +---------+    +---+-------+  |    +---+-------+   |     +---+-----+  |
+  *   :  Text   : ->  |  Token  | -> | Token     |  | -> | Token      |  | ->  | Entity  |  | ----.
+  *   :  Input  :     |  Parser |    | Enrichers |--+    | Validators |--+     | Parsers |--+      \
+  *   *=========*     +---------+    +-----------+       +------------+        +---------+          \
+  *                                                                                                  }
+  *                       +--------+        +--------+        +-----------+        +----------+     /
+  * *============*    +---+-----+  |    +---+-----+  |    +---+--------+  |    +---+-------+  |    /
+  * :  Variants  : <- | Variant |  | <- | Entity  |  | <- | Entity     |  | <- | Entity    |  | <-'
+  * :  List      :    | Filters |--+    | Mappers |--+    | Validators |--+    | Enrichers |--+
+  * *============*    +----- ---+       +----- ---+       +------------+       +-----------+
   * </pre>
   *
   * Pipeline has the following components:
-  * <ul>
-  * <li>
-  * {@link NCTokenParser} is responsible for taking the input text and tokenize it into a list of
-  * {@link NCToken
-  * }. This process is called tokenization, i.e. the process of demarcating and
-  * classifying sections of a string of input characters. There's only one token parser for the pipeline.
-  * </li>
-  * <li>
-  * After the initial list of token is
-  * </li>
-  * </ul>
   *
   */
 trait NCPipeline:
     /**
-      *
-      * @return */
+      * Get the token parser. One token parser is required for the pipeline.
+      */
     def getTokenParser: NCTokenParser
 
     /**
       * Gets the list of entity parser. At least one entity parser is required.
-      *
-      * @return */
+      */
     def getEntityParsers: List[NCEntityParser]
 
     /**
@@ -86,11 +74,9 @@ trait NCPipeline:
     /**
       *
       */
-    def getVariantFilter: Option[NCVariantFilter] = None
+    def getVariantFilters: List[NCVariantFilter] = List.empty
 
     /**
-      * Gets optional list of entity mappers.
-      *
-      * @return Optional list of entity mappers. Can be empty but never `null`.
+      * Gets optional list of entity mappers. Can return an empty list but never `null`.
       */
     def getEntityMappers: List[NCEntityMapper] = List.empty
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
index f49049a3..d15ba80f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
@@ -34,7 +34,7 @@ class NCPipelineBuilder:
     private val tokVals: Buf[NCTokenValidator] = Buf.empty
     private val entVals: Buf[NCEntityValidator] = Buf.empty
     private val entMappers: Buf[NCEntityMapper] = Buf.empty
-    private var varFilter: Option[NCVariantFilter] = None
+    private var varFilters: Buf[NCVariantFilter] = Buf.empty
 
     /**
       *
@@ -134,10 +134,10 @@ class NCPipelineBuilder:
         this
 
     /**
-      * @param varFilter
+      * @param varFilters
       * @return This instance for call chaining. */
-    def withVariantFilter(varFilter: NCVariantFilter): NCPipelineBuilder =
-        this.varFilter = Some(varFilter)
+    def withVariantFilters(varFilters: List[NCVariantFilter]): NCPipelineBuilder =
+        this.varFilters ++= varFilters
         this
 
     /**
@@ -152,7 +152,8 @@ class NCPipelineBuilder:
     /**
       *
       * @param entMappers
-      * @return This instance for call chaining. */
+      * @return This instance for call chaining.
+      */
     def withEntityMappers(entMappers: List[NCEntityMapper]): NCPipelineBuilder =
         require(entMappers != null, "List of entity mappers cannot be null.")
         entMappers.foreach((p: NCEntityMapper) => require(p != null, "Entity mapper cannot be null."))
@@ -234,5 +235,5 @@ class NCPipelineBuilder:
             override def getEntityParsers: List[NCEntityParser] = entParsers.toList
             override def getTokenValidators: List[NCTokenValidator] = tokVals.toList
             override def getEntityValidators: List[NCEntityValidator] = entVals.toList
-            override def getVariantFilter: Option[NCVariantFilter] = varFilter
+            override def getVariantFilters: List[NCVariantFilter] = varFilters.toList
             override def getEntityMappers: List[NCEntityMapper] = entMappers.toList
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
index e97657c1..c2f56c74 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
@@ -31,6 +31,8 @@ import java.util.concurrent.*
 import java.util.concurrent.atomic.*
 import java.util.function.Predicate
 import scala.concurrent.ExecutionContext
+import scala.jdk.CollectionConverters.*
+
 
 /**
   *
@@ -54,10 +56,10 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
     private val tokVals = nvl(pipeline.getTokenValidators)
     private val entVals = nvl(pipeline.getEntityValidators)
     private val entMappers = nvl(pipeline.getEntityMappers)
-    private val varFilterOpt = pipeline.getVariantFilter
+    private val varFilters = nvl(pipeline.getVariantFilters)
 
     private val allComps: Seq[NCLifecycle] =
-        tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilterOpt.toSeq
+        tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilters
 
     /**
       * Processes pipeline components.
@@ -153,7 +155,6 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
             map { case (_, ents) => if ents.sizeIs > 1 then ents.toSet else Set.empty }.filter(_.nonEmpty)
 
         var variants: List[NCVariant] =
-            import scala.jdk.CollectionConverters.*
             if overlapEnts.nonEmpty then
                 NCModelPipelineHelper.
                     findCombinations(overlapEnts.map(_.asJava).asJava, pool).asScala.
@@ -164,15 +165,15 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
             else
                 List(newVariant(entities))
 
-        if varFilterOpt.isDefined then variants = varFilterOpt.get.filter(req, cfg, variants)
+        variants = varFilters.foldRight(variants)((filter, vars) => filter.filter(req, cfg, vars))
 
         // Skips empty variants.
         val vrns = variants.filter(_.getEntities.nonEmpty)
 
-        for ((v, i) <- vrns.zipWithIndex)
+        for (v, i) <- vrns.zipWithIndex do
             val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position", "Properties")
 
-            for (e <- v.getEntities)
+            for e <- v.getEntities do
                 val toks = e.getTokens
                 tbl += (
                     e.getId,
@@ -185,6 +186,7 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
         NCPipelineData(req, vrns, toks)
 
     def start(): Unit = processComponents(_.onStart(cfg), "started")
+
     /**
       *
       */
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
index 10254a13..983089ef 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
@@ -52,7 +52,7 @@ class NCVariantFilterSpec extends AnyFunSuite:
         )
 
         test0(
-            mkPipeline(_.withVariantFilter((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty)),
+            mkPipeline(_.withVariantFilters(List((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty))),
             false
         )
     }
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
index 6ca853c6..f596d580 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
@@ -36,7 +36,7 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter
     val tokVals: Buf[NCTokenValidator] = Buf.empty
     val entVals: Buf[NCEntityValidator] = Buf.empty
     val entMappers: Buf[NCEntityMapper] = Buf.empty
-    var varFilter: Option[NCVariantFilter] = None
+    var varFilters: Buf[NCVariantFilter] = Buf.empty
 
     override def getTokenParser: NCTokenParser = tokParser
     override def getTokenEnrichers: List[NCTokenEnricher] = tokEnrichers.toList
@@ -45,4 +45,4 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter
     override def getTokenValidators: List[NCTokenValidator] = tokVals.toList
     override def getEntityValidators: List[NCEntityValidator] = entVals.toList
     override def getEntityMappers: List[NCEntityMapper] = entMappers.toList
-    override def getVariantFilter: Option[NCVariantFilter] = varFilter
\ No newline at end of file
+    override def getVariantFilters: List[NCVariantFilter] = varFilters.toList
\ No newline at end of file