You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hugegraph.apache.org by mi...@apache.org on 2024/02/28 08:56:47 UTC
(incubator-hugegraph-ai) 01/02: Introduce Gradio for creating interactive and visual demo

This is an automated email from the ASF dual-hosted git repository.

ming pushed a commit to branch web
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git

commit 56195e2fe93323ffab8dee528bc233fc66d2c514
Author: zhangshiming <zh...@cvte.com>
AuthorDate: Wed Feb 28 16:56:14 2024 +0800

    Introduce Gradio for creating interactive and visual demo
---
 hugegraph-llm/examples/build_kg_test.py            | 10 +--
 hugegraph-llm/examples/graph_rag_test.py           | 81 ++--------------------
 hugegraph-llm/requirements.txt                     |  1 +
 hugegraph-llm/src/config/config.ini                |  2 +-
 hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py  |  4 +-
 .../src/hugegraph_llm/operators/graph_rag_task.py  | 10 +--
 .../operators/hugegraph_op/commit_to_hugegraph.py  |  5 +-
 .../operators/hugegraph_op/graph_rag_query.py      | 18 +++--
 .../operators/kg_construction_task.py              |  9 ++-
 .../operators/llm_op/answer_synthesize.py          |  2 +-
 .../operators/llm_op/keyword_extract.py            | 15 ++--
 hugegraph-llm/src/hugegraph_llm/utils/config.py    | 10 ++-
 12 files changed, 57 insertions(+), 110 deletions(-)

diff --git a/hugegraph-llm/examples/build_kg_test.py b/hugegraph-llm/examples/build_kg_test.py
index b0b8c51..d076d4f 100644
--- a/hugegraph-llm/examples/build_kg_test.py
+++ b/hugegraph-llm/examples/build_kg_test.py
@@ -49,13 +49,15 @@ if __name__ == "__main__":
     }
 
     (
-        builder.import_schema(from_hugegraph="xxx")
+        builder
+        # .import_schema(from_hugegraph="xxx")
         .print_result()
         # .import_schema(from_extraction="xxx").print_result()
-        # .import_schema(from_user_defined=xxx).print_result()
+        .import_schema(from_user_defined=schema)
+        .print_result()
         .extract_triples(TEXT)
         .print_result()
-        .disambiguate_word_sense()
-        .commit_to_hugegraph()
+        # .disambiguate_word_sense()
+        # .commit_to_hugegraph()
         .run()
     )
diff --git a/hugegraph-llm/examples/graph_rag_test.py b/hugegraph-llm/examples/graph_rag_test.py
index bbd6862..510f85d 100644
--- a/hugegraph-llm/examples/graph_rag_test.py
+++ b/hugegraph-llm/examples/graph_rag_test.py
@@ -19,95 +19,26 @@
 import os
 
 from hugegraph_llm.operators.graph_rag_task import GraphRAG
-from pyhugegraph.client import PyHugeClient
-
-
-def prepare_data():
-    client = PyHugeClient("127.0.0.1", 8080, "hugegraph", "admin", "admin")
-    schema = client.schema()
-    schema.propertyKey("name").asText().ifNotExist().create()
-    schema.propertyKey("birthDate").asText().ifNotExist().create()
-    schema.vertexLabel("Person").properties(
-        "name", "birthDate"
-    ).useCustomizeStringId().ifNotExist().create()
-    schema.vertexLabel("Movie").properties("name").useCustomizeStringId().ifNotExist().create()
-    schema.indexLabel("PersonByName").onV("Person").by("name").secondary().ifNotExist().create()
-    schema.indexLabel("MovieByName").onV("Movie").by("name").secondary().ifNotExist().create()
-    schema.edgeLabel("ActedIn").sourceLabel("Person").targetLabel("Movie").ifNotExist().create()
-
-    graph = client.graph()
-    graph.addVertex("Person", {"name": "Al Pacino", "birthDate": "1940-04-25"}, id="Al Pacino")
-    graph.addVertex(
-        "Person",
-        {"name": "Robert De Niro", "birthDate": "1943-08-17"},
-        id="Robert De Niro",
-    )
-    graph.addVertex("Movie", {"name": "The Godfather"}, id="The Godfather")
-    graph.addVertex("Movie", {"name": "The Godfather Part II"}, id="The Godfather Part II")
-    graph.addVertex(
-        "Movie",
-        {"name": "The Godfather Coda The Death of Michael Corleone"},
-        id="The Godfather Coda The Death of Michael Corleone",
-    )
-
-    graph.addEdge("ActedIn", "Al Pacino", "The Godfather", {})
-    graph.addEdge("ActedIn", "Al Pacino", "The Godfather Part II", {})
-    graph.addEdge("ActedIn", "Al Pacino", "The Godfather Coda The Death of Michael Corleone", {})
-    graph.addEdge("ActedIn", "Robert De Niro", "The Godfather Part II", {})
-
-    graph.close()
-
+from hugegraph_llm.utils.gradio_demo import init_hg_test_data
 
 if __name__ == "__main__":
+    init_hg_test_data()
     os.environ["http_proxy"] = ""
     os.environ["https_proxy"] = ""
     os.environ["OPENAI_API_KEY"] = ""
 
-    # prepare_data()
-
     graph_rag = GraphRAG()
-
-    # configure operator with context dict
-    context = {
-        # hugegraph client
-        "ip": "localhost",  # default to "localhost" if not set
-        "port": 18080,  # default to 8080 if not set
-        "user": "admin",  # default to "admin" if not set
-        "pwd": "admin",  # default to "admin" if not set
-        "graph": "hugegraph",  # default to "hugegraph" if not set
-        # query question
-        "query": "Tell me about Al Pacino.",  # must be set
-        # keywords extraction
-        "max_keywords": 5,  # default to 5 if not set
-        "language": "english",  # default to "english" if not set
-        # graph rag query
-        "prop_to_match": "name",  # default to None if not set
-        "max_deep": 2,  # default to 2 if not set
-        "max_items": 30,  # default to 30 if not set
-        # print intermediate processes result
-        "verbose": True,  # default to False if not set
-    }
-    result = graph_rag.extract_keyword().query_graph_for_rag().synthesize_answer().run(**context)
-    print(f"Query:\n- {context['query']}")
-    print(f"Answer:\n- {result['answer']}")
-
-    print("--------------------------------------------------------")
-
-    # configure operator with parameters
-    graph_client = PyHugeClient("127.0.0.1", 18080, "hugegraph", "admin", "admin")
     result = (
-        graph_rag.extract_keyword(
-            text="Tell me about Al Pacino.",
-            max_keywords=5,  # default to 5 if not set
-            language="english",  # default to "english" if not set
-        )
+        graph_rag.extract_keyword(text="Tell me about Al Pacino.")
+        .print_result()
         .query_graph_for_rag(
-            graph_client=graph_client,
             max_deep=2,  # default to 2 if not set
             max_items=30,  # default to 30 if not set
             prop_to_match=None,  # default to None if not set
         )
+        .print_result()
         .synthesize_answer()
+        .print_result()
         .run(verbose=True)
     )
     print("Query:\n- Tell me about Al Pacino.")
diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
index 03bba7f..7a09ea3 100644
--- a/hugegraph-llm/requirements.txt
+++ b/hugegraph-llm/requirements.txt
@@ -2,3 +2,4 @@ openai==0.28.1
 retry==0.9.2
 tiktoken==0.5.1
 nltk==3.8.1
+gradio==4.19.1
diff --git a/hugegraph-llm/src/config/config.ini b/hugegraph-llm/src/config/config.ini
index 6f9219f..7ff45c4 100644
--- a/hugegraph-llm/src/config/config.ini
+++ b/hugegraph-llm/src/config/config.ini
@@ -27,6 +27,6 @@ graph = hugegraph
 type = openai
 api_key = xxx
 secret_key = xxx
-ernie_url = https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=
+llm_url = https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=
 model_name = gpt-3.5-turbo-16k
 max_token = 4000
diff --git a/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py b/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py
index 085f7a0..00fa6fa 100644
--- a/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py
+++ b/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py
@@ -31,7 +31,7 @@ class ErnieBotClient(BaseLLM):
         self.c = Config(section=Constants.LLM_CONFIG)
         self.api_key = self.c.get_llm_api_key()
         self.secret_key = self.c.get_llm_secret_key()
-        self.base_url = self.c.get_llm_ernie_url()
+        self.base_url = self.c.get_llm_url()
         self.get_access_token()
 
     def get_access_token(self):
@@ -61,7 +61,7 @@ class ErnieBotClient(BaseLLM):
             raise Exception(
                 f"Request failed with code {response.status_code}, message: {response.text}"
             )
-        return response.text
+        return json.loads(response.text)["result"]
 
     def generate_streaming(
         self,
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
index 0088119..a62f4b1 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -20,10 +20,10 @@ from typing import Dict, Any, Optional, List
 
 from hugegraph_llm.llms.base import BaseLLM
 from hugegraph_llm.llms.init_llm import LLMs
+from hugegraph_llm.operators.common_op.print_result import PrintResult
 from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery
 from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize
 from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract
-from pyhugegraph.client import PyHugeClient
 
 
 class GraphRAG:
@@ -52,14 +52,12 @@ class GraphRAG:
 
     def query_graph_for_rag(
         self,
-        graph_client: Optional[PyHugeClient] = None,
         max_deep: int = 2,
         max_items: int = 30,
         prop_to_match: Optional[str] = None,
     ):
         self._operators.append(
             GraphRAGQuery(
-                client=graph_client,
                 max_deep=max_deep,
                 max_items=max_items,
                 prop_to_match=prop_to_match,
@@ -78,6 +76,10 @@ class GraphRAG:
         )
         return self
 
+    def print_result(self):
+        self._operators.append(PrintResult())
+        return self
+
     def run(self, **kwargs) -> Dict[str, Any]:
         if len(self._operators) == 0:
             self.extract_keyword().query_graph_for_rag().synthesize_answer()
@@ -85,5 +87,5 @@ class GraphRAG:
         context = kwargs
         context["llm"] = self._llm
         for op in self._operators:
-            context = op.run(context=context)
+            context = op.run(context)
         return context
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py
index 558a8ba..695c5b7 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+from typing import Dict, Any
 
 from hugegraph_llm.utils.config import Config
 from hugegraph_llm.utils.constants import Constants
@@ -34,7 +34,7 @@ class CommitToKg:
         )
         self.schema = self.client.schema()
 
-    def run(self, data: dict):
+    def run(self, data: dict) -> Dict[str, Any]:
         if "schema" not in data:
             self.schema_free_mode(data["triples"])
         else:
@@ -43,6 +43,7 @@ class CommitToKg:
             edges = data["edges"]
             self.init_schema(schema)
             self.init_graph(vertices, edges)
+        return data
 
     def init_graph(self, vertices, edges):
         vids = {}
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index a59acc1..6d5ede6 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -19,6 +19,8 @@
 import re
 from typing import Any, Dict, Optional, List, Set, Tuple
 
+from hugegraph_llm.utils.config import Config
+from hugegraph_llm.utils.constants import Constants
 from pyhugegraph.client import PyHugeClient
 
 
@@ -65,12 +67,18 @@ class GraphRAGQuery:
 
     def __init__(
         self,
-        client: Optional[PyHugeClient] = None,
         max_deep: int = 2,
         max_items: int = 30,
         prop_to_match: Optional[str] = None,
     ):
-        self._client = client
+        config = Config(section=Constants.HUGEGRAPH_CONFIG)
+        self._client = PyHugeClient(
+            config.get_graph_ip(),
+            config.get_graph_port(),
+            config.get_graph_name(),
+            config.get_graph_user(),
+            config.get_graph_pwd(),
+        )
         self._max_deep = max_deep
         self._max_items = max_items
         self._prop_to_match = prop_to_match
@@ -231,9 +239,9 @@ class GraphRAGQuery:
             return self._schema
 
         schema = self._client.schema()
-        vertex_schema = schema.get_vertex_labels()
-        edge_schema = schema.get_edge_labels()
-        relationships = schema.get_relations()
+        vertex_schema = schema.getVertexLabels()
+        edge_schema = schema.getEdgeLabels()
+        relationships = schema.getRelations()
 
         self._schema = (
             f"Node properties: {vertex_schema}\n"
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py b/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py
index 082058d..283a5a1 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py
@@ -16,6 +16,8 @@
 # under the License.
 
 
+from typing import Dict, Any
+
 from hugegraph_llm.llms.base import BaseLLM
 from hugegraph_llm.operators.common_op.check_schema import CheckSchema
 from hugegraph_llm.operators.common_op.print_result import PrintResult
@@ -58,7 +60,8 @@ class KgBuilder:
         self.operators.append(PrintResult())
         return self
 
-    def run(self):
-        result = ""
+    def run(self) -> Dict[str, Any]:
+        context = ""
         for operator in self.operators:
-            result = operator.run(result)
+            context = operator.run(context)
+        return context
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
index b08adb3..6216494 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
@@ -91,4 +91,4 @@ class AnswerSynthesize:
         if verbose:
             print(f"\033[91mANSWER: {response}\033[0m")
 
-        return context
+        return context["answer"]
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py
index 9a94a11..54d698c 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py
@@ -23,17 +23,10 @@ from hugegraph_llm.llms.base import BaseLLM
 from hugegraph_llm.llms.init_llm import LLMs
 from hugegraph_llm.operators.common_op.nltk_helper import NLTKHelper
 
-DEFAULT_KEYWORDS_EXTRACT_TEMPLATE_TMPL = (
-    "A question is provided below. Given the question, "
-    "extract up to {max_keywords} keywords from the text. "
-    "Focus on extracting the keywords that we can use "
-    "to best lookup answers to the question. "
-    "Avoid stopwords.\n"
-    "---------------------\n"
-    "{question}\n"
-    "---------------------\n"
-    "Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'"
-)
+DEFAULT_KEYWORDS_EXTRACT_TEMPLATE_TMPL = """extract {max_keywords} keywords from the text:
+    {question}
+    Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
+    """
 
 DEFAULT_KEYWORDS_EXPAND_TEMPLATE_TMPL = (
     "Generate synonyms or possible form of keywords up to {max_keywords} in total,\n"
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/config.py b/hugegraph-llm/src/hugegraph_llm/utils/config.py
index d7ec13f..b11585a 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/config.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/config.py
@@ -31,6 +31,12 @@ class Config:
         self.config.read(self.config_file)
         self.section = section
 
+    def update_config(self, updates):
+        for key, value in updates.items():
+            self.config.set(self.section, key, value)
+        with open(self.config_file, "w", encoding="utf-8") as configfile:
+            self.config.write(configfile)
+
     def get_config(self):
         return self.config
 
@@ -55,8 +61,8 @@ class Config:
     def get_llm_secret_key(self):
         return self.config.get(self.section, "secret_key")
 
-    def get_llm_ernie_url(self):
-        return self.config.get(self.section, "ernie_url")
+    def get_llm_url(self):
+        return self.config.get(self.section, "llm_url")
 
     def get_llm_type(self):
         return self.config.get(self.section, "type")