You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2023/03/02 05:51:15 UTC

[incubator-nlpcraft-website] branch web-site updated: 1.0.0. fixes.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch web-site
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft-website.git


The following commit(s) were added to refs/heads/web-site by this push:
     new c2829fc  1.0.0. fixes.
c2829fc is described below

commit c2829fc5413524d8ca3a4252034cf1cb2e2030a2
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Mar 2 09:51:09 2023 +0400

    1.0.0. fixes.
---
 data-model.html | 426 --------------------------------------------------------
 docs.html       |   2 +-
 feed.xml        |   2 +-
 3 files changed, 2 insertions(+), 428 deletions(-)

diff --git a/data-model.html b/data-model.html
deleted file mode 100644
index 2f46923..0000000
--- a/data-model.html
+++ /dev/null
@@ -1,426 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!doctype html><html lang="en"> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-180663034-1'); </script><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="An open sourc [...]
-package org.apache.nlpcraft.examples.lightswitch
-
-import org.apache.nlpcraft.model.{NCIntentTerm, _}
-
-class LightSwitchModel extends NCModelFileAdapter("lightswitch_model.yaml") {
-    @NCIntentRef("ls")
-    @NCIntentSample(Array(
-        "Turn the lights off in the entire house.",
-        "Switch on the illumination in the master bedroom closet.",
-        "Get the lights on.",
-        "Lights up in the kitchen.",
-        "Please, put the light out in the upstairs bedroom.",
-        "Set the lights on in the entire house.",
-        "Turn the lights off in the guest bedroom.",
-        "Could you please switch off all the lights?",
-        "Dial off illumination on the 2nd floor.",
-        "Please, no lights!",
-        "Kill off all the lights now!",
-        "No lights in the bedroom, please.",
-        "Light up the garage, please!"
-    ))
-    def onMatch(
-        @NCIntentTerm("act") actTok: NCToken,
-        @NCIntentTerm("loc") locToks: List[NCToken]
-    ): NCResult = {
-        val status = if (actTok.getId == "ls:on") "on" else "off"
-        val locations =
-            if (locToks.isEmpty)
-                "entire house"
-            else
-                locToks.map(_.meta[String]("nlpcraft:nlp:origtext")).mkString(", ")
-
-        // Add HomeKit, Arduino or other integration here.
-
-        // By default - return a descriptive action string.
-        NCResult.text(s"Lights are [$status] in [${locations.toLowerCase}].")
-    }
-}
-                </pre></div><div class="tab-pane fade show" id="lightswitch_yaml_model" role="tabpanel"><pre class="brush: js">
-id: "nlpcraft.lightswitch.ex"
-name: "Light Switch Example Model"
-version: "1.0"
-description: "NLI-powered light switch example model."
-macros:
-  - name: "&lt;ACTION&gt;"
-    macro: "{turn|switch|dial|let|set|get|put}"
-  - name: "&lt;KILL&gt;"
-    macro: "{shut|kill|stop|eliminate}"
-  - name: "&lt;ENTIRE_OPT&gt;"
-    macro: "{entire|full|whole|total|_}"
-  - name: "&lt;FLOOR_OPT&gt;"
-    macro: "{upstairs|downstairs|{1st|first|2nd|second|3rd|third|4th|fourth|5th|fifth|top|ground} floor|_}"
-  - name: "&lt;TYPE&gt;"
-    macro: "{room|closet|attic|loft|{store|storage} {room|_}}"
-  - name: "&lt;LIGHT&gt;"
-    macro: "{all|_} {it|them|light|illumination|lamp|lamplight}"
-enabledBuiltInTokens: [] # This example doesn't use any built-in tokens.
-
-#
-# Allows for multi-word synonyms in this entire model
-# to be sparse and permutate them for better detection.
-# These two properties generally enable a free-form
-# natural language comprehension.
-#
-permutateSynonyms: true
-sparse: true
-
-elements:
-  - id: "ls:loc"
-    description: "Location of lights."
-    synonyms:
-      - "&lt;ENTIRE_OPT&gt; &lt;FLOOR_OPT&gt; {kitchen|library|closet|garage|office|playroom|{dinning|laundry|play} &lt;TYPE&gt;}"
-      - "&lt;ENTIRE_OPT&gt; &lt;FLOOR_OPT&gt; {master|kid|children|child|guest|_} {bedroom|bathroom|washroom|storage} {&lt;TYPE&gt;|_}"
-      - "&lt;ENTIRE_OPT&gt; {house|home|building|{1st|first} floor|{2nd|second} floor}"
-
-  - id: "ls:on"
-    groups:
-      - "act"
-    description: "Light switch ON action."
-    synonyms:
-      - "&lt;ACTION&gt; {on|up|_} &lt;LIGHT&gt; {on|up|_}"
-      - "&lt;LIGHT&gt; {on|up}"
-
-  - id: "ls:off"
-    groups:
-      - "act"
-    description: "Light switch OFF action."
-    synonyms:
-      - "&lt;ACTION&gt; &lt;LIGHT&gt; {off|out|down}"
-      - "{&lt;ACTION&gt;|&lt;KILL&gt;} {off|out|down} &lt;LIGHT&gt;"
-      - "&lt;KILL&gt; &lt;LIGHT&gt;"
-      - "&lt;LIGHT&gt; &lt;KILL&gt;"
-      - "{out|no|off|down} &lt;LIGHT&gt;"
-      - "&lt;LIGHT&gt; {out|off|down}"
-
-intents:
-  - "intent=ls term(act)={has(tok_groups, 'act')} term(loc)={# == 'ls:loc'}*"                            
-                        </pre></div></div></div><div class="tab-pane fade show" id="alarm" role="tabpanel"><nav><div class="nav nav-tabs" role="tablist"> <a class="nav-item nav-link active" data-toggle="tab" href="#alarm_java_model" role="tab"><code>AlarmModel.java</code></a> <a class="nav-item nav-link" data-toggle="tab" href="#alarm_intents_idl" role="tab"><code>intents.idl</code></a> <a class="nav-item nav-link" data-toggle="tab" href="#alarm_json_model" role="tab"><code>alarm_model.j [...]
-package org.apache.nlpcraft.examples.alarm;
-
-import org.apache.nlpcraft.model.*;
-
-import java.time.*;
-import java.util.*;
-
-import static java.time.temporal.ChronoUnit.MILLIS;
-
-public class AlarmModel extends NCModelFileAdapter {
-    private static final DateTimeFormatter FMT =
-        DateTimeFormatter.ofPattern("HH'h' mm'm' ss's'").withZone(ZoneId.systemDefault());
-
-    private final Timer timer = new Timer();
-
-    public AlarmModel() {
-        // Loading the model from the file.
-        super("alarm_model.json");
-    }
-
-    @NCIntentRef("alarm") // Intent is defined in JSON model file (alarm_model.json and intents.idl).
-    @NCIntentSampleRef("alarm_samples.txt") // Samples supplied in an external file.
-    NCResult onMatch(
-        NCIntentMatch ctx,
-        @NCIntentTerm("nums") List&lt;NCToken&gt; numToks
-    ) {
-        long ms = calculateTime(numToks);
-
-        assert ms >= 0;
-
-        timer.schedule(
-            new TimerTask() {
-                @Override
-                public void run() {
-                    System.out.println(
-                        "BEEP BEEP BEEP for: " + ctx.getContext().getRequest().getNormalizedText() + ""
-                    );
-                }
-            },
-            ms
-        );
-
-        return NCResult.text("Timer set for: " + FMT.format(LocalDateTime.now().plus(ms, MILLIS)));
-    }
-
-    @Override
-    public void onDiscard() {
-        // Clean up when model gets discarded (e.g. during testing).
-        timer.cancel();
-    }
-
-    public static long calculateTime(List&lt;NCToken&gt; numToks) {
-        LocalDateTime now = LocalDateTime.now();
-        LocalDateTime dt = now;
-
-        for (NCToken num : numToks) {
-            String unit = num.meta("nlpcraft:num:unit");
-
-            // Skip possible fractional to simplify.
-            long v = ((Double)num.meta("nlpcraft:num:from")).longValue();
-
-            if (v <= 0)
-                throw new NCRejection("Value must be positive: " + unit);
-
-            switch (unit) {
-                case "second": { dt = dt.plusSeconds(v); break; }
-                case "minute": { dt = dt.plusMinutes(v); break; }
-                case "hour": { dt = dt.plusHours(v); break; }
-                case "day": { dt = dt.plusDays(v); break; }
-                case "week": { dt = dt.plusWeeks(v); break; }
-                case "month": { dt = dt.plusMonths(v); break; }
-                case "year": { dt = dt.plusYears(v); break; }
-
-                default:
-                    // It shouldn't be an assertion, because 'datetime' unit can be extended outside.
-                    throw new NCRejection("Unsupported time unit: " + unit);
-            }
-        }
-
-        return now.until(dt, MILLIS);
-    }
-}
-                        </pre></div><div class="tab-pane fade show" id="alarm_intents_idl" role="tabpanel"><pre class="brush: idl">
-// Fragments (mostly for demo purposes here).
-fragment=buzz term~{# == 'x:alarm'}
-fragment=when
-    term(nums)~{
-        // Demonstrating term variables.
-        @type = meta_tok('nlpcraft:num:unittype')
-        @iseq = meta_tok('nlpcraft:num:isequalcondition') // Excludes conditional statements.
-
-        # == 'nlpcraft:num' && @type == 'datetime' && @iseq == true
-    }[1,7]
-
-// Intents (using fragments).
-intent=alarm
-    fragment(buzz)
-    fragment(when)
-                        </pre></div><div class="tab-pane fade show" id="alarm_json_model" role="tabpanel"><pre class="brush: js">
-{
-    "id": "nlpcraft.alarm.ex",
-    "name": "Alarm Example Model",
-    "version": "1.0",
-    "description": "Alarm example model.",
-    "enabledBuiltInTokens": [
-        "nlpcraft:num"
-    ],
-    "elements": [
-        {
-            "id": "x:alarm",
-            "description": "Alarm token indicator.",
-            "synonyms": [
-                "{ping|buzz|wake|call|hit} {me|up|me up|_}",
-                "{set|_} {my|_} {wake|wake up|_} {alarm|timer|clock|buzzer|call} {clock|_} {up|_}"
-            ]
-        }
-    ],
-    "intents": [
-        "import('intents.idl')" // Import intents from external file.
-    ]
-}
-                        </pre></div></div></div></div><p> Further sub-sections will provide details on model's static configuration and dynamic programmable logic implementation.</p></section><section id="dataflow"><h2 class="section-title">Model Dataflow <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2><figure> <img alt="data model dataflow" class="img-fluid" src="/images/homepage-fig1.1.png"><figcaption><b>Fig 1.</b> NLPCraft Architecture</figcaption></figure>< [...]
-{
-     "id": "user.defined.id",
-     "name": "User Defined Name",
-     "version": "1.0",
-     "description": "Short model description.",
-     "enabledBuiltInTokens": ["google:person", "google:location"]
-     "macros": [],
-     "metadata": {},
-     "elements": [
-         {
-             "id": "x:id",
-             "description": "",
-             "groups": [],
-             "parentId": "",
-             "synonyms": [],
-             "metadata": {},
-             "values": []
-         }
-     ],
-     ...
-     "intents": []
-}
-                </pre></div><div class="tab-pane fade show" id="model-yaml" role="tabpanel"><pre class="brush: js">
-id: "user.defined.id"
-name: "User Defined Name"
-version: "1.0"
-description: "Short model description."
-macros:
-enabledBuiltInTokens:
-elements:
-  - id: "x:id"
-    description: ""
-    synonyms:
-    groups:
-    values:
-    parentId:
-    metadata:
-...
-intents:
-                </pre></div></div><div class="bq success"><div class="bq-idea-container"><div><div>💡</div></div><div> Note that using JSON/YAML-based configuration is a <b>canonical way</b> for creating data models in NLPCraft as it allows to cleanly separate static configuration from model's programmable logic.</div></div></div></section><section id="ne"><h2 class="section-title">Named Entities <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2><p> Named entity, a [...]
-            ...
-            "elements": [
-                {
-                    "id": "transport.vehicle",
-                    "description": "Transportation vehicle",
-                    "synonyms": [
-                        "car",
-                        "truck",
-                        "light duty truck"
-                        "heavy duty truck"
-                        "sedan",
-                        "coupe"
-                    ]
-                }
-            ]
-            ...
-        </pre><p> While adding multi-word synonyms looks somewhat trivial - in real models, the naive approach can lead to thousands and even tens of thousands of possible synonyms due to words, grammar, and linguistic permutations - which quickly becomes untenable if performed manually.</p><p> NLPCraft provides an effective tool for a compact synonyms representation. Instead of listing all possible multi-word synonyms one by one you can use combination of following techniques:</p><ul><l [...]
-            ...
-            "macros": [
-                {
-                    "name": "&lt;TRUCK_TYPE&gt;",
-                    "macro": "{light duty|heavy duty|half ton|1/2 ton|3/4 ton|one ton|super duty}"
-                }
-             ]
-            "elements": [
-                {
-                    "id": "transport.vehicle",
-                    "description": "Transportation vehicle",
-                    "synonyms": [
-                        "car",
-                        "{&lt;TRUCK_TYPE&gt;|_} {pickup|_} truck"
-                        "sedan",
-                        "coupe"
-                    ],
-                    "values": [
-                        {
-                            "value": "mercedes",
-                            "synonyms": ["mercedes-ben{z|s}", "mb", "ben{z|s}"]
-                        },
-                        {
-                            "value": "bmw",
-                            "synonyms": ["{bimmer|bimer|beemer}", "bayerische motoren werke"]
-                        }
-                        {
-                            "value": "chevrolet",
-                            "synonyms": ["chevy"]
-                        }
-                    ]
-                }
-            ]
-            ...
-        </pre><p> With that setup <code>transport.vehicle</code> element will be recognized by any of the following input string:</p><ul><li><code>car</code><li><code>benz</code> (with value <code>mercedes</code>)<li><code>3/4 ton pickup truck</code><li><code>light duty truck</code><li><code>chevy</code> (with value <code>chevrolet</code>)<li><code>bimmer</code> (with value <code>bmw</code>)<li><code>transport.vehicle</code></ul><span id="groups" class="section-sub-title">Element Groups  [...]
-+-- vehicle
-|     +--truck
-|     |    |-- light.duty.truck
-|     |    |-- heavy.duty.truck
-|     |    +-- medium.duty.truck
-|     +--car
-|     |   |-- coupe
-|     |   |-- sedan
-|     |   |-- hatchback
-|     |   +-- wagon
-        </pre><p> Then in our intent, for example, we could look for any token with root parent ID <code>vehicle</code> or immediate parent ID <code>truck</code> or <code>car</code> without a need to match on all current and future individual sub-IDs. For example:</p><pre class="brush: idl">
-            intent=vehicle.intent term~{has(tok_ancestors, 'vehicle')}
-            intent=truck.intent term~{tok_parent == 'truck'}
-            intent=car.intent term~{tok_parent == 'car'}
-        </pre></section><section id="syns-tools"> <span id="macros" class="section-sub-title">Macros <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></span><p> Listing all possible multi-word synonyms for a given element can be a time-consuming task. Macros together with option groups allow for significant simplification of this task. Macros allow you to give a name to an often used set of words or option groups and reuse it without repeating those words or option gr [...]
-            "macros": [
-                {
-                    "name": "&lt;A&gt;",
-                    "macro": "aaa"
-                },
-                {
-                    "name": "&lt;B&gt;",
-                    "macro": "&lt;A&gt; bbb"
-                },
-                {
-                    "name": "&lt;C&gt;",
-                    "macro": "&lt;A&gt; bbb {z|w}"
-                }
-             ]
-        </pre><span id="option-groups" class="section-sub-title">Option Groups <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></span><p> Option groups are similar to wildcard patterns that operates on a single word base. One line of option group expands into one or more individual synonyms. Option groups is the key mechanism for shortened synonyms notation. The following examples demonstrate how to use option groups.</p><p> Consider the following macros defined belo [...]
-            ...
-            "macros": [
-                {
-                    "name": "&lt;TRUCK_TYPE&gt;",
-                    "macro": "{ {light|super|heavy|medium} duty|half ton|1/2 ton|3/4 ton|one ton}"
-                }
-             ]
-            "elements": [
-                {
-                    "id": "transport.vehicle",
-                    "description": "Transportation vehicle",
-                    "synonyms": [
-                        "car",
-                        "{&lt;TRUCK_TYPE&gt;|_} {pickup|_} truck"
-                        "sedan",
-                        "coupe"
-                    ]
-                }
-            ]
-            ...
-        </pre><span id="regex" class="section-sub-title">Regular Expressions <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></span><p> Any individual synonym word that starts and ends with <code>//</code> (two forward slashes) is considered to be Java regular expression as defined in <code>java.util.regex.Pattern</code>. Note that regular expression can only span a single word, i.e. only individual words from the user input will be matched against given regular expr [...]
-        "synonyms": [
-            "{foo|//[bar].+//}}"
-        ]
-        </pre><p> will match word <code>foo</code> or any other strings that start with <code>bar</code> as long as this string doesn't contain whitespaces.</p><div class="bq info"> <b>Regular Expressions Performance</b><p> It's important to note that regular expressions can significantly affect the performance of the NLPCraft processing if used uncontrolled. Use it with caution and test the performance of your model to ensure it meets your requirements.</p></div><h2 id="dsl" class="sect [...]
-            ...
-            "elements": [
-                {
-                    "id": "transport.vehicle",
-                    "description": "Transportation vehicle",
-                    "synonyms": [
-                        "car",
-                        "truck",
-                        "{light|heavy|super|medium} duty {pickup|_} truck"
-                        "sedan",
-                        "coupe"
-                    ]
-                },
-                {
-                    "id": "race.vehicle",
-                    "description": "Race vehicle",
-                    "synonyms": [
-                        "{race|speed|track} ^^{# == 'transport.vehicle'}^^"
-                    ]
-                }
-
-            ]
-            ...
-        </pre><div class="bq warn"><p> <b>Greedy NERs <span class="amp">&</span> Synonyms Conflicts</b></p><p> Note that in the above example you need to ensure that words <code>race</code>, <code>speed</code> or <code>track</code> are not part of the <code>transport.vehicle</code> token. It is particular important for the 3rd party NERs where specific rules about what words can or cannot be part of the token are unclear or undefined. In such cases the only remedy is to extensively test  [...]
-            ...
-            "elements": [
-                {
-                    "id": "google.loc.wrap",
-                    "description": "Wrapper for google location",
-                    "groups": ["my_group"],
-                    "synonyms": [
-                        "^^{# == 'google:location'}^^"
-                    ]
-                }
-            ]
-            ...
-        </pre><b>IDL Expression Syntax</b><p> IDL expressions are a subset of overall <a href="/intent-matching.html#idl">IDL syntax</a>. You can review formal <a target="github" href="https://github.com/apache/incubator-nlpcraft/blob/master/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/antlr4/NCIdl.g4">ANTLR4 grammar</a> but basically an IDL expression for synonym is a term expression with the optional alias at the beginning. Here's an example of IDL expression defin [...]
-            "synonyms": [
-                "population {of|for} ^^[city]{# == 'nlpcraft:city' && lowercase(meta_tok('city:country')) == 'france'}^^"
-            ]
-        </pre><b>NOTES:</b><ul><li>Optional alias <code>city</code> can be used to access a constituent part token (with ID <code>nlpcraft:city</code>).<li> The expression between <code>{</code> and <code>}</code> brackets is a standard IDL term expression.</ul><h2 id="custom_ners" class="section-sub-title">Custom NERs <a href="#"><i class="top-link fas fa-fw fa-angle-double-up"></i></a></h2><p> By default, the data model detects its elements by their synonyms, regexp or IDL expressions. [...]
diff --git a/docs.html b/docs.html
index 100ea75..cb0d108 100644
--- a/docs.html
+++ b/docs.html
@@ -15,4 +15,4 @@
  limitations under the License.
 -->
 
-<!doctype html><html lang="en"> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-180663034-1'); </script><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="An open sourc [...]
+<!doctype html><html lang="en"> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-180663034-1"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-180663034-1'); </script><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="An open sourc [...]
diff --git a/feed.xml b/feed.xml
index c8dbccc..bf3b113 100644
--- a/feed.xml
+++ b/feed.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.2.2">Jekyll</generator><link href="http://localhost:4000/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/" rel="alternate" type="text/html" /><updated>2023-03-01T10:11:31+04:00</updated><id>http://localhost:4000/feed.xml</id><title type="html">Apache NLPCraft</title></feed>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.2.2">Jekyll</generator><link href="http://localhost:4000/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/" rel="alternate" type="text/html" /><updated>2023-03-02T09:48:25+04:00</updated><id>http://localhost:4000/feed.xml</id><title type="html">Apache NLPCraft</title></feed>
\ No newline at end of file