You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/08/02 12:06:57 UTC
[incubator-nlpcraft] branch NLPCRAFT-98 updated: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-98
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-98 by this push:
new bd3890d WIP.
bd3890d is described below
commit bd3890de9f50a27f0848bad4dd49b69a61f1134f
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Aug 2 15:06:50 2020 +0300
WIP.
---
nlpcraft/src/main/resources/geo/continents.yaml | 43 -----
nlpcraft/src/main/resources/geo/countries/GB.yaml | 43 -----
nlpcraft/src/main/resources/geo/countries/RU.yaml | 49 ------
nlpcraft/src/main/resources/geo/countries/US.yaml | 57 -------
.../src/main/resources/geo/exceptions/dict.yaml | 34 ----
nlpcraft/src/main/resources/geo/metro.yaml | 32 ----
.../geo/synonyms/case_sensitive/list.yaml | 32 ----
nlpcraft/src/main/resources/geo/synonyms/list.yaml | 51 ------
.../src/main/resources/geo/synonyms/states.yaml | 37 -----
nlpcraft/src/main/resources/geo/us_top.yaml | 38 -----
nlpcraft/src/main/resources/geo/world_top.yaml | 32 ----
nlpcraft/src/main/resources/nlpcraft.conf | 3 +-
nlpcraft/src/main/resources/spell/dictionary.yaml | 34 ----
.../common/resources/NCExtResourceManager.scala | 181 ++++++++-------------
.../apache/nlpcraft/server/geo/NCGeoManager.scala | 2 +-
.../server/nlp/spell/NCSpellCheckManager.scala | 2 +-
16 files changed, 69 insertions(+), 601 deletions(-)
diff --git a/nlpcraft/src/main/resources/geo/continents.yaml b/nlpcraft/src/main/resources/geo/continents.yaml
deleted file mode 100644
index 0d52d8c..0000000
--- a/nlpcraft/src/main/resources/geo/continents.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-Europe:
- Eastern Europe:
- - name: "Russia"
- iso3: "RUS"
- iso: "RU"
- Northern Europe:
- - name: "United Kingdom"
- iso3: "GBR"
- iso: "GB"
-Americas:
- Northern America:
- - name: "United States"
- iso3: "USA"
- iso: "US"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/countries/GB.yaml b/nlpcraft/src/main/resources/geo/countries/GB.yaml
deleted file mode 100644
index 1279052..0000000
--- a/nlpcraft/src/main/resources/geo/countries/GB.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-name: "United Kingdom"
-iso: "GB"
-iso3: "GBR"
-code: "826"
-continent: "EU"
-regions:
-- name: England
- cities:
- - name: "London"
- latitude: 51.50853
- longitude: -0.12574
- population: 7556900
- dem: 25
- timezone: "Europe/London"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/countries/RU.yaml b/nlpcraft/src/main/resources/geo/countries/RU.yaml
deleted file mode 100644
index 2d87536..0000000
--- a/nlpcraft/src/main/resources/geo/countries/RU.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-name: "Russia"
-iso: "RU"
-iso3: "RUS"
-code: "643"
-continent: "EU"
-regions:
-- name: "Moskva"
- cities:
- - name: "Moscow"
- latitude: 55.75222
- longitude: 37.61556
- population: 10381222
- dem: 144
-- name: "Sankt-Peterburg"
- cities:
- - name: "Saint Petersburg"
- latitude: 59.93863
- longitude: 30.31413
- population: 5028000
- dem: 11
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/countries/US.yaml b/nlpcraft/src/main/resources/geo/countries/US.yaml
deleted file mode 100644
index 33c6df4..0000000
--- a/nlpcraft/src/main/resources/geo/countries/US.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-name: "United States"
-iso: "US"
-iso3: "USA"
-code: "840"
-continent: "NA"
-regions:
-- name: "California"
- cities:
- - name: "San Francisco"
- latitude: 37.77493
- longitude: -122.41942
- population: 864816
- dem: 28
- - name: "Los Angeles"
- latitude: 34.05223
- longitude: -118.24368
- population: 3971883
- dem: 96
- timezone: "America/Los_Angeles"
-- name: "New York"
- cities:
- - name: "New York City"
- latitude: 40.71427
- longitude: -74.00597
- population: 8175133
- elevation: 10
- dem: 57
- timezone: "America/New_York"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/exceptions/dict.yaml b/nlpcraft/src/main/resources/geo/exceptions/dict.yaml
deleted file mode 100644
index 894d9b2..0000000
--- a/nlpcraft/src/main/resources/geo/exceptions/dict.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-CITY:
-- "as"
-- "mission"
-REGION:
-- "forest"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/metro.yaml b/nlpcraft/src/main/resources/geo/metro.yaml
deleted file mode 100644
index 0f8ec41..0000000
--- a/nlpcraft/src/main/resources/geo/metro.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- name: "San Francisco-Oakland-San Jose CA"
-- name: "New York NY"
-- name: "Los Angeles CA"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/synonyms/case_sensitive/list.yaml b/nlpcraft/src/main/resources/geo/synonyms/case_sensitive/list.yaml
deleted file mode 100644
index ab56d5f..0000000
--- a/nlpcraft/src/main/resources/geo/synonyms/case_sensitive/list.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- country: "United States"
- synonyms:
- - "US"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/synonyms/list.yaml b/nlpcraft/src/main/resources/geo/synonyms/list.yaml
deleted file mode 100644
index 5846395..0000000
--- a/nlpcraft/src/main/resources/geo/synonyms/list.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- country: "United States"
- synonyms:
- - "United States of America"
- - "America"
- - "USA"
- - "North American States"
- - "U.S.A."
-- country: "United States"
- region: "New York"
- city: "New York City"
- synonyms:
- - "NY city"
-- country: "United States"
- region: "California"
- city: "Los Angeles"
- synonyms:
- - "LA"
-- country: "United States"
- region: "California"
- city: "San Francisco"
- synonyms:
- - "SF"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/synonyms/states.yaml b/nlpcraft/src/main/resources/geo/synonyms/states.yaml
deleted file mode 100644
index 7570ae5..0000000
--- a/nlpcraft/src/main/resources/geo/synonyms/states.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- region: "California"
- country: "United States"
- synonyms:
- - "CA"
- - "state of California"
- - "California state"
- - "CA state"
- - "state of CA"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/us_top.yaml b/nlpcraft/src/main/resources/geo/us_top.yaml
deleted file mode 100644
index 17ac158..0000000
--- a/nlpcraft/src/main/resources/geo/us_top.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- name: "los angeles"
- country: "united states"
- region: "california"
-- name: "san francisco"
- country: "united states"
- region: "california"
-- name: "new york city"
- country: "united states"
- region: "new york"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/geo/world_top.yaml b/nlpcraft/src/main/resources/geo/world_top.yaml
deleted file mode 100644
index d544219..0000000
--- a/nlpcraft/src/main/resources/geo/world_top.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-- name: "london"
- country: "united kingdom"
- region: "england"
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/nlpcraft.conf b/nlpcraft/src/main/resources/nlpcraft.conf
index feb0bd2..b6aa99d 100644
--- a/nlpcraft/src/main/resources/nlpcraft.conf
+++ b/nlpcraft/src/main/resources/nlpcraft.conf
@@ -290,7 +290,8 @@ nlpcraft {
# External configuration resources.
extConfig {
# Mandatory.
- extUrl = "http://localhost:8080"
+ # TODO: change url to master.
+ extUrl = "https://github.com/apache/incubator-nlpcraft/raw/NLPCRAFT-98/external"
# Optional.
# Default value is $USER_HOME/.nlpcraft/extcfg
diff --git a/nlpcraft/src/main/resources/spell/dictionary.yaml b/nlpcraft/src/main/resources/spell/dictionary.yaml
deleted file mode 100644
index b7a0465..0000000
--- a/nlpcraft/src/main/resources/spell/dictionary.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This is the default configuration. Feel free to add to and extend it.
-#
-# You can also download pre-packaged configuration that is based on the content
-# licensed under Creative Commons Attribution 4.0 International (CC BY 4.0)
-# from https://nlpcraft.apache.org/download.html#3rd_party_config
-#
-# NOTE: these external pre-packaged configurations are not part of the official
-# Apache source release due to CC BY 4.0 licensing.
-#
-
----
-accoring: "according"
-benifit: "benefit"
-wokr: "work"
-yuonger: "younger"
-zeebra: "zebra"
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/resources/NCExtResourceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/resources/NCExtResourceManager.scala
index 08eac23..cfb52a8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/resources/NCExtResourceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/resources/NCExtResourceManager.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.common.resources
import java.io._
import java.net.URL
import java.nio.file.Files
+import java.util.concurrent.ConcurrentHashMap
import io.opencensus.trace.Span
import org.apache.commons.codec.digest.DigestUtils
@@ -28,16 +29,15 @@ import org.apache.nlpcraft.common.config.NCConfigurable
import org.apache.nlpcraft.common.resources.NCResourceType._
import org.apache.nlpcraft.common.{NCE, NCService, U}
import resource.managed
-
+import scala.collection.JavaConverters._
import scala.io.Source
/**
* External resources manager.
*/
object NCExtResourceManager extends NCService {
- private final val DFLT_EXT_ROOT = ".nlpcraft/extcfg"
- private final val REMOTE_MD5_FILE = "md5.txt"
- private final val EXT_USER_TYPES = Seq(GEO, SPELL).map(_.toString)
+ private final val DFLT_DIR = ".nlpcraft/extcfg"
+ private final val MD5_FILE = "md5.txt"
private final val FILES =
Map(
@@ -62,32 +62,18 @@ object NCExtResourceManager extends NCService {
)
private object Config extends NCConfigurable {
- private val userExtsTypes: Seq[String] = getStringListOpt("nlpcraft.extres.useExts").getOrElse(Seq.empty)
-
- val url: String = getString("nlpcraft.extres.url")
- val checkVersion: Boolean = getBool("nlpcraft.extres.checkVersion")
- val extRootDir: File =
- new File(getStringOpt("nlpcraft.extres.dir").
- getOrElse(s"${U.homeFileName(DFLT_EXT_ROOT)}"))
-
- lazy val allExtsTypes: Seq[NCResourceType] = userExtsTypes.map(string2Type) ++ Seq(OPENNLP)
+ val url: String = getString("nlpcraft.extConfig.extUrl")
+ val checkMd5: Boolean = getBool("nlpcraft.extConfig.checkMd5")
+ val dir: File = new File(getStringOpt("nlpcraft.extConfig.locDir").getOrElse(s"${U.homeFileName(DFLT_DIR)}"))
@throws[NCE]
- def check(): Unit = {
- checkAndPrepareDir(Config.extRootDir)
-
- userExtsTypes.foreach(p ⇒
- // TODO: print out what is allowed...
- if (!EXT_USER_TYPES.contains(p.toUpperCase))
- throw new NCE(s"Unexpected type: '$p'")
- )
- }
+ def check(): Unit = checkAndPrepareDir(Config.dir)
}
Config.check()
private case class Download(fileName: String, typ: NCResourceType) {
- val destDir: File = new File(Config.extRootDir, type2String(typ))
+ val destDir: File = new File(Config.dir, type2String(typ))
val file: File = new File(destDir, fileName)
val isZip: Boolean = {
val lc = file.getName.toLowerCase
@@ -96,11 +82,19 @@ object NCExtResourceManager extends NCService {
}
}
+ case class FileHolder(name: String, typ: NCResourceType) {
+ val dir = new File(Config.dir, type2String(typ))
+
+ checkAndPrepareDir(dir)
+
+ val file: File = new File(dir, name)
+ }
+
private object Md5 {
case class Key(typ: NCResourceType, resource: String)
private lazy val m: Map[Key, String] = {
- val url = s"${Config.url}/$REMOTE_MD5_FILE"
+ val url = s"${Config.url}/$MD5_FILE"
try
managed(Source.fromURL(url)) acquireAndGet { src ⇒
@@ -153,33 +147,13 @@ object NCExtResourceManager extends NCService {
override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ ⇒
require(NCResourceType.values.forall(FILES.contains))
- val downTypes = collection.mutable.HashMap.empty[NCResourceType, File]
-
- for (typ ← Config.allExtsTypes) {
- val typDir = new File(Config.extRootDir, type2String(typ))
+ val m = new ConcurrentHashMap[NCResourceType, File]
- checkAndPrepareDir(typDir)
-
- for (name ← FILES(typ)) {
- val file = new File(typDir, name)
-
- if (file.exists()) {
- if (file.isDirectory)
- throw new NCE(s"Unexpected folder: '${file.getAbsolutePath}'")
+ U.executeParallel(
+ NCResourceType.values.flatMap(t ⇒ FILES(t).map(FileHolder(_, t))).toSeq.map(f ⇒ () ⇒ processFile(f, m)): _*
+ )
- if (file.length() == 0 || Config.checkVersion && !Md5.isValid(file, typ)) {
- logger.warn(
- s"File: '${file.getAbsolutePath}' corrupted. " +
- s"All files of: '$typ' will be deleted and downloaded again"
- )
-
- downTypes += typ → typDir
- }
- }
- else
- downTypes += typ → typDir
- }
- }
+ val downTypes = m.asScala
if (downTypes.nonEmpty) {
U.executeParallel(downTypes.values.toSeq.map(d ⇒ () ⇒ clearDir(d)): _*)
@@ -200,12 +174,7 @@ object NCExtResourceManager extends NCService {
@throws[NCE]
def getContent(typ: NCResourceType, res: String, parent: Span = null): String =
startScopedSpan("getContent", parent, "res" → res) { _ ⇒
- mkString(
- if (Config.allExtsTypes.contains(typ))
- U.readFile(mkExtFile(typ, res), "UTF-8")
- else
- U.readStream(U.getStream(getResourcePath(typ, res)), "UTF-8")
- )
+ mkString(U.readFile(mkExtFile(typ, res), "UTF-8"))
}
/**
@@ -217,10 +186,7 @@ object NCExtResourceManager extends NCService {
@throws[NCE]
def getStream(typ: NCResourceType, res: String, parent: Span = null): InputStream =
startScopedSpan("getStream", parent, "res" → res) { _ ⇒
- if (Config.allExtsTypes.contains(typ))
- new BufferedInputStream(new FileInputStream(mkExtFile(typ, res)))
- else
- U.getStream(getResourcePath(typ, res))
+ new BufferedInputStream(new FileInputStream(mkExtFile(typ, res)))
}
/**
@@ -237,42 +203,42 @@ object NCExtResourceManager extends NCService {
startScopedSpan("getDirContent", parent, "resDir" → resDir) { _ ⇒
val resDirPath = getResourcePath(typ, resDir)
- val extData =
- if (Config.allExtsTypes.contains(typ)) {
- val d = new File(Config.extRootDir, resDirPath)
+ val d = new File(Config.dir, resDirPath)
- if (d.exists && d.isDirectory) {
- val arr =
- d.listFiles(new FileFilter {
- override def accept(f: File): Boolean = f.isFile && resFilter(f.getName)
- })
+ if (!d.exists || !d.isDirectory)
+ throw new NCE(s"'${d.getAbsolutePath}' is not valid folder")
- (if (arr != null) arr.toSeq else Seq.empty).map(f ⇒ f.getName → f).toMap
- }
- else
- Map.empty
- }
- else
- Map.empty
-
- extData.foreach { case (_, f) ⇒ logExtFile(f) }
-
- val resData: Map[String, String] =
- if (U.hasResource(resDirPath))
- U.getFilesResources(resDirPath).filter(resFilter).
- map(p ⇒ new File(p).getName → p).toMap -- extData.keySet
- else
- Map.empty
-
- extData.values.toStream.map(
- f ⇒ NCResourceTxtContent(typ, f.getName, mkString(U.readFile(f, "UTF-8")))
- ) ++
- resData.toStream.map {
- case (path, fullPath) ⇒
- NCResourceTxtContent(typ, path, mkString(U.readStream(U.getStream(fullPath), "UTF-8")))
- }
+ val files =
+ d.listFiles(new FileFilter { override def accept(f: File): Boolean = f.isFile && resFilter(f.getName) })
+
+ if (files != null)
+ files.toStream.map(f ⇒ NCResourceTxtContent(typ, f.getName, mkString(U.readFile(f, "UTF-8"))))
+ else
+ Stream.empty
}
+ /**
+ *
+ * @param h
+ * @param m
+ */
+ @throws[NCE]
+ private def processFile(h: FileHolder, m: ConcurrentHashMap[NCResourceType, File]): Unit =
+ if (h.file.exists()) {
+ if (h.file.isDirectory)
+ throw new NCE(s"Unexpected folder: '${h.file.getAbsolutePath}'")
+
+ if (h.file.length() == 0 || Config.checkMd5 && !Md5.isValid(h.file, h.typ)) {
+ logger.warn(
+ s"File: '${h.file.getAbsolutePath}' corrupted. " +
+ s"All files of: '${h.typ}' will be deleted and downloaded again"
+ )
+
+ m.put(h.typ, h.dir)
+ }
+ }
+ else
+ m.put(h.typ, h.dir)
/**
*
@@ -295,8 +261,12 @@ object NCExtResourceManager extends NCService {
case e: IOException ⇒ throw new NCE(s"Error downloading file [url='$url', file='$filePath']", e)
}
- if (Config.checkVersion && !Md5.isValid(d.file, d.typ)) {
- safeDelete(d.file)
+ def safeDelete(): Unit =
+ if (!d.file.delete())
+ logger.warn(s"Couldn't delete file: '$filePath'")
+
+ if (Config.checkMd5 && !Md5.isValid(d.file, d.typ)) {
+ safeDelete()
throw new NCE(s"Unexpected md5 sum for downloaded file: '$filePath'")
}
@@ -311,7 +281,7 @@ object NCExtResourceManager extends NCService {
}
catch {
case e: NCE ⇒
- safeDelete(d.file)
+ safeDelete()
throw e
}
@@ -369,28 +339,7 @@ object NCExtResourceManager extends NCService {
* @param typ
* @param res
*/
- private def mkExtFile(typ: NCResourceType, res: String): File = {
- val f = new File(Config.extRootDir, getResourcePath(typ, res))
-
- logExtFile(f)
-
- f
- }
-
- /**
- *
- * @param f
- */
- // User should be aware when external configuration file read.
- private def logExtFile(f: File): Unit = logger.debug(s"External file read: '${f.getAbsolutePath}'")
-
- /**
- *
- * @param f
- */
- private def safeDelete(f: File): Unit =
- if (!f.delete())
- logger.warn(s"Couldn't delete file: '${f.getAbsolutePath}'")
+ private def mkExtFile(typ: NCResourceType, res: String): File = new File(Config.dir, getResourcePath(typ, res))
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
index a7e3d11..2b42b87 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
@@ -442,7 +442,7 @@ object NCGeoManager extends NCService {
val topWorld = mkTopCities("world_top.yaml")
val topUsa = mkTopCities("us_top.yaml")
- logger.info(s"GEO data loaded [" +
+ logger.debug(s"GEO data loaded [" +
s"continents=${conts.size}, " +
s"subcontinents=${subs.size}, " +
s"countries=${cntrs.size}, " +
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
index e5e66bb..8f192a6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
@@ -52,7 +52,7 @@ object NCSpellCheckManager extends NCService {
new TypeReference[Map[String, String]] {}
)
- logger.info(s"Spell checker dictionary loaded: ${dict.size} entries")
+ logger.debug(s"Spell checker dictionary loaded: ${dict.size} entries")
super.start()
}