You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by di...@apache.org on 2023/04/04 10:36:27 UTC

[superset] branch feat/sqllab-natural-language created (now 184c6277f6)

This is an automated email from the ASF dual-hosted git repository.

diegopucci pushed a change to branch feat/sqllab-natural-language
in repository https://gitbox.apache.org/repos/asf/superset.git


      at 184c6277f6 Use cosine similarity

This branch includes the following new commits:

     new bf4db70b83 WIP
     new ac7bb02b52 WIP
     new 184c6277f6 Use cosine similarity

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[superset] 01/03: WIP

Posted by di...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

diegopucci pushed a commit to branch feat/sqllab-natural-language
in repository https://gitbox.apache.org/repos/asf/superset.git

commit bf4db70b83a9a2a622ab5083ebcf78375793aa3c
Author: geido <di...@gmail.com>
AuthorDate: Thu Feb 23 18:53:32 2023 +0100

    WIP
---
 .../SqlLab/components/AceEditorWrapper/index.tsx   |   4 +-
 .../src/SqlLab/components/SqlEditor/index.jsx      | 141 +++++++++++++++++++--
 superset-frontend/src/components/index.ts          |   1 +
 superset/sqllab/api.py                             |  61 +++++++++
 superset/sqllab/schemas.py                         |   3 +-
 5 files changed, 195 insertions(+), 15 deletions(-)

diff --git a/superset-frontend/src/SqlLab/components/AceEditorWrapper/index.tsx b/superset-frontend/src/SqlLab/components/AceEditorWrapper/index.tsx
index 0dd3385ea5..412fe48333 100644
--- a/superset-frontend/src/SqlLab/components/AceEditorWrapper/index.tsx
+++ b/superset-frontend/src/SqlLab/components/AceEditorWrapper/index.tsx
@@ -57,6 +57,7 @@ type AceEditorWrapperProps = {
   extendedTables?: Array<{ name: string; columns: any[] }>;
   height: string;
   hotkeys: HotKey[];
+  initialSql?: string;
 };
 
 const StyledAceEditor = styled(AceEditor)`
@@ -90,6 +91,7 @@ const AceEditorWrapper = ({
   extendedTables = [],
   height,
   hotkeys,
+  initialSql,
 }: AceEditorWrapperProps) => {
   const dispatch = useDispatch();
 
@@ -103,7 +105,7 @@ const AceEditorWrapper = ({
     'validationResult',
     'schema',
   ]);
-  const currentSql = queryEditor.sql ?? '';
+  const currentSql = initialSql || queryEditor.sql || '';
   const functionNames = queryEditor.functionNames ?? [];
   const schemas = queryEditor.schemaOptions ?? [];
   const tables = queryEditor.tableOptions ?? [];
diff --git a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
index d7626c8cbf..d517b4e939 100644
--- a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
+++ b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
@@ -31,8 +31,8 @@ import Mousetrap from 'mousetrap';
 import Button from 'src/components/Button';
 import Timer from 'src/components/Timer';
 import ResizableSidebar from 'src/components/ResizableSidebar';
-import { AntdDropdown, AntdSwitch } from 'src/components';
-import { Input } from 'src/components/Input';
+import { AntdDropdown, AntdSwitch, Result } from 'src/components';
+import { Input, TextArea } from 'src/components/Input';
 import { Menu } from 'src/components/Menu';
 import Icons from 'src/components/Icons';
 import { detectOS } from 'src/utils/common';
@@ -86,6 +86,7 @@ import SqlEditorLeftBar from '../SqlEditorLeftBar';
 import AceEditorWrapper from '../AceEditorWrapper';
 import RunQueryActionButton from '../RunQueryActionButton';
 import QueryLimitSelect from '../QueryLimitSelect';
+import { SupersetClient } from '@superset-ui/core';
 
 const bootstrapData = getBootstrapData();
 const validatorMap =
@@ -222,10 +223,13 @@ const SqlEditor = ({
         database: databases[dbId],
         latestQuery: queries[latestQueryId],
         hideLeftBar,
+        tables,
       };
     },
   );
 
+  console.log('tables', tables);
+
   const [height, setHeight] = useState(0);
   const [autorun, setAutorun] = useState(queryEditor.autorun);
   const [ctas, setCtas] = useState('');
@@ -640,6 +644,112 @@ const SqlEditor = ({
     );
   };
 
+  // NLP implementation
+  const [editorType, setEditorType] = useState('sql');
+  const [NLPQuery, setNLPQuery] = useState('');
+  const [NLPResult, setNLPResult] = useState('');
+  const [NLPLoading, setNLPLoading] = useState(false);
+  const handleNLPGeneration = async () => {
+    setNLPLoading(true);
+    let tablesContext = "# Given the following table/s definition\n\n";
+    for(let t = 0; t < tables.length; t += 1) {
+      const table = tables[t];
+      if (table?.columns?.length) {
+        tablesContext += `# Table ${table.name}, columns = [`;
+        for(let c = 0; c < table.columns.length; c += 1) {
+          const col = table.columns[c];
+          tablesContext += col.name;
+          tablesContext += table.columns.at(-1)?.name === col.name ? "]" : ", "
+        }
+        tablesContext += `\n\n`;
+      }
+    }
+    tablesContext += `# Create one valid SQL SELECT statement with the following constraints\n`;
+    tablesContext += `# For example: SELECT column FROM table;\n`;
+    tablesContext += `# Do NOT generate more tha one SELECT statement\n`;
+    tablesContext += `# Do NOT generate any text other than one valid SELECT statement\n`;
+    tablesContext += `# Do ONLY use SELECT\n`;
+    tablesContext += `# Respond with a SQL statement to select ${NLPQuery} from the given tables ->`;
+    const postPayload = {
+      prompt: tablesContext,
+    }
+    SupersetClient.post({
+      endpoint: "api/v1/sqllab/nlp",
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(postPayload),
+      parseMethod: 'json-bigint',
+    })
+      .then(({ json }) => {
+        setNLPResult(json.result);
+        setEditorType('sql')
+        setNLPLoading(false);
+      })
+      .catch(() => {
+        setNLPLoading(false);
+      });
+
+    console.log(tablesContext);
+  }
+  const renderNLPMenu = (
+    <Menu
+      mode="horizontal"
+      defaultSelectedKeys={[editorType]}
+      css={css`
+        margin-bottom: 20px;
+      `}
+    >
+      <Menu.Item
+        disabled={NLPLoading}
+        key="sql"
+        icon={<Icons.ConsoleSqlOutlined />}
+        onClick={({ key }) => setEditorType(key)}
+      >
+        SQL
+      </Menu.Item>
+      <Menu.Item
+        disabled={NLPLoading}
+        key="nlp"
+        icon={<Icons.ExperimentOutlined />}
+        onClick={({ key }) => setEditorType(key)}
+      >
+        Natural language
+      </Menu.Item>
+    </Menu>
+  );
+
+  const renderNLPBottomBar = (
+    tables.length > 0 ? <Button
+      type="primary"
+      size="large"
+      onClick={handleNLPGeneration}
+      css={css`
+        margin-top: 20px;
+        margin-bottom: 20px;
+        padding: 20px;
+        width: 250px;
+      `}
+    >
+      {!NLPLoading ? <Icons.ExperimentOutlined /> : <Icons.LoadingOutlined />}{' '}
+      Generate query
+    </Button> : null
+  );
+
+  const renderNLPForm =
+    tables.length > 0 ? (
+      <TextArea
+        disabled={NLPLoading}
+        rows={6}
+        onChange={e => setNLPQuery(e.target.value)}
+        placeholder="Get all fruits from the tree..."
+      />
+    ) : (
+      <Result
+        status="warning"
+        title={<small>Please select one or more tables</small>}
+        subTitle="Select one or more tables on the left pane for the AI algorithm to gain context about your query"
+      />
+    );
+
   const queryPane = () => {
     const hotkeys = getHotkeyConfig();
     const { aceEditorHeight, southPaneHeight } =
@@ -657,17 +767,22 @@ const SqlEditor = ({
         onDragEnd={onResizeEnd}
       >
         <div ref={northPaneRef} className="north-pane">
-          <AceEditorWrapper
-            autocomplete={autocompleteEnabled}
-            onBlur={setQueryEditorAndSaveSql}
-            onChange={onSqlChanged}
-            queryEditorId={queryEditor.id}
-            database={database}
-            extendedTables={tables}
-            height={`${aceEditorHeight}px`}
-            hotkeys={hotkeys}
-          />
-          {renderEditorBottomBar(hotkeys)}
+          {renderNLPMenu}
+          {editorType === 'sql' && (
+            <AceEditorWrapper
+              autocomplete={autocompleteEnabled}
+              onBlur={setQueryEditorAndSaveSql}
+              onChange={onSqlChanged}
+              queryEditorId={queryEditor.id}
+              database={database}
+              extendedTables={tables}
+              height={`${aceEditorHeight}px`}
+              hotkeys={hotkeys}
+              initialSql={NLPResult}
+            />
+          )}
+          {editorType === 'nlp' && renderNLPForm}
+          {editorType === 'sql' ? renderEditorBottomBar(hotkeys) : renderNLPBottomBar}
         </div>
         <SouthPane
           queryEditorId={queryEditor.id}
diff --git a/superset-frontend/src/components/index.ts b/superset-frontend/src/components/index.ts
index bfa341a9dd..84b10cc979 100644
--- a/superset-frontend/src/components/index.ts
+++ b/superset-frontend/src/components/index.ts
@@ -46,6 +46,7 @@ export {
   Tree,
   Typography,
   Upload,
+  Result,
 } from 'antd';
 
 /*
diff --git a/superset/sqllab/api.py b/superset/sqllab/api.py
index 283c3ab638..dd659d753c 100644
--- a/superset/sqllab/api.py
+++ b/superset/sqllab/api.py
@@ -41,6 +41,7 @@ from superset.sqllab.execution_context_convertor import ExecutionContextConverto
 from superset.sqllab.query_render import SqlQueryRenderImpl
 from superset.sqllab.schemas import (
     ExecutePayloadSchema,
+    NLPPayloadSchema,
     QueryExecutionResponseSchema,
     sql_lab_get_results_schema,
 )
@@ -55,6 +56,7 @@ from superset.superset_typing import FlaskResponse
 from superset.utils import core as utils
 from superset.views.base import json_success
 from superset.views.base_api import BaseSupersetApi, requires_json, statsd_metrics
+import openai
 
 config = app.config
 logger = logging.getLogger(__name__)
@@ -75,10 +77,69 @@ class SqlLabRestApi(BaseSupersetApi):
     }
     openapi_spec_tag = "SQL Lab"
     openapi_spec_component_schemas = (
+        NLPPayloadSchema,
         ExecutePayloadSchema,
         QueryExecutionResponseSchema,
     )
 
+    @expose("/nlp/", methods=["POST"])
+    # @protect()
+    # @statsd_metrics
+    # @requires_json
+    def execute_completion(self) -> FlaskResponse:
+        """Executes a SQL query
+        ---
+        post:
+          description: >-
+            Starts the execution of a SQL query
+          requestBody:
+            description: SQL query and params
+            required: true
+            content:
+              application/json:
+                schema:
+                  $ref: '#/components/schemas/NLPPayloadSchema'
+          responses:
+            200:
+              description: Query execution result
+              content:
+                application/json:
+                  schema:
+                    $ref: '#/components/schemas/NLPPayloadSchema'
+            202:
+              description: Query execution result, query still running
+              content:
+                application/json:
+                  schema:
+                    $ref: '#/components/schemas/NLPPayloadSchema'
+            400:
+              $ref: '#/components/responses/400'
+            401:
+              $ref: '#/components/responses/401'
+            403:
+              $ref: '#/components/responses/403'
+            404:
+              $ref: '#/components/responses/404'
+            500:
+              $ref: '#/components/responses/500'
+        """
+        try:
+            openai.api_key = "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"
+            requestPrompt = request.json
+            completion = openai.Completion.create(
+                engine="code-davinci-002",
+                prompt=requestPrompt['prompt'],
+                max_tokens=100,
+                temperature=0
+            )
+            choice = {**completion.choices[0]}
+            payload = {
+                'result': choice['text'],
+            }
+            return self.response(200, **payload)
+        except Exception as e:
+            print(e);
+
     @expose("/results/")
     @protect()
     @statsd_metrics
diff --git a/superset/sqllab/schemas.py b/superset/sqllab/schemas.py
index f238fda5c9..0ca760b757 100644
--- a/superset/sqllab/schemas.py
+++ b/superset/sqllab/schemas.py
@@ -23,7 +23,8 @@ sql_lab_get_results_schema = {
     },
     "required": ["key"],
 }
-
+class NLPPayloadSchema(Schema):
+    prompt = fields.String(required=True)
 
 class ExecutePayloadSchema(Schema):
     database_id = fields.Integer(required=True)


[superset] 02/03: WIP

Posted by di...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

diegopucci pushed a commit to branch feat/sqllab-natural-language
in repository https://gitbox.apache.org/repos/asf/superset.git

commit ac7bb02b52f382c8f71b6f3b544c30f3e36eca0f
Author: geido <di...@gmail.com>
AuthorDate: Fri Feb 24 16:09:13 2023 +0100

    WIP
---
 .../src/SqlLab/components/SqlEditor/index.jsx         | 19 +++++++------------
 .../components/nativeFilters/FilterBar/Vertical.tsx   |  8 ++++----
 superset/sqllab/api.py                                |  5 +++--
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
index d517b4e939..b1f8c0532c 100644
--- a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
+++ b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
@@ -651,7 +651,7 @@ const SqlEditor = ({
   const [NLPLoading, setNLPLoading] = useState(false);
   const handleNLPGeneration = async () => {
     setNLPLoading(true);
-    let tablesContext = "# Given the following table/s definition\n\n";
+    let tablesContext = "";
     for(let t = 0; t < tables.length; t += 1) {
       const table = tables[t];
       if (table?.columns?.length) {
@@ -664,12 +664,7 @@ const SqlEditor = ({
         tablesContext += `\n\n`;
       }
     }
-    tablesContext += `# Create one valid SQL SELECT statement with the following constraints\n`;
-    tablesContext += `# For example: SELECT column FROM table;\n`;
-    tablesContext += `# Do NOT generate more tha one SELECT statement\n`;
-    tablesContext += `# Do NOT generate any text other than one valid SELECT statement\n`;
-    tablesContext += `# Do ONLY use SELECT\n`;
-    tablesContext += `# Respond with a SQL statement to select ${NLPQuery} from the given tables ->`;
+    tablesContext += `# Create a SQLite query to: ${NLPQuery}`;
     const postPayload = {
       prompt: tablesContext,
     }
@@ -680,8 +675,8 @@ const SqlEditor = ({
       parseMethod: 'json-bigint',
     })
       .then(({ json }) => {
-        setNLPResult(json.result);
-        setEditorType('sql')
+        setEditorType('sql');
+        setNLPResult(json.result.trim());
         setNLPLoading(false);
       })
       .catch(() => {
@@ -690,7 +685,7 @@ const SqlEditor = ({
 
     console.log(tablesContext);
   }
-  const renderNLPMenu = (
+  const renderNLPMenu = useMemo(() => (
     <Menu
       mode="horizontal"
       defaultSelectedKeys={[editorType]}
@@ -715,7 +710,7 @@ const SqlEditor = ({
         Natural language
       </Menu.Item>
     </Menu>
-  );
+  ), [NLPLoading, editorType]);
 
   const renderNLPBottomBar = (
     tables.length > 0 ? <Button
@@ -740,7 +735,7 @@ const SqlEditor = ({
         disabled={NLPLoading}
         rows={6}
         onChange={e => setNLPQuery(e.target.value)}
-        placeholder="Get all fruits from the tree..."
+        placeholder="Select all names from table"
       />
     ) : (
       <Result
diff --git a/superset-frontend/src/dashboard/components/nativeFilters/FilterBar/Vertical.tsx b/superset-frontend/src/dashboard/components/nativeFilters/FilterBar/Vertical.tsx
index 2a6b717836..3b209dfcff 100644
--- a/superset-frontend/src/dashboard/components/nativeFilters/FilterBar/Vertical.tsx
+++ b/superset-frontend/src/dashboard/components/nativeFilters/FilterBar/Vertical.tsx
@@ -244,12 +244,12 @@ const VerticalFilterBar: React.FC<VerticalBarProps> = ({
                 {filterValues.length === 0 ? (
                   <FilterBarEmptyStateContainer>
                     <EmptyStateSmall
-                      title={t('No filters are currently added')}
+                      title={t('No global filters are currently added')}
                       image="filter.svg"
                       description={
                         canEdit &&
                         t(
-                          'Click the button above to add a filter to the dashboard',
+                          'Click on "+Add/Edit Filters" button to create new dashboard filters',
                         )
                       }
                     />
@@ -286,12 +286,12 @@ const VerticalFilterBar: React.FC<VerticalBarProps> = ({
               {filterValues.length === 0 ? (
                 <FilterBarEmptyStateContainer>
                   <EmptyStateSmall
-                    title={t('No filters are currently added')}
+                    title={t('No global filters are currently added')}
                     image="filter.svg"
                     description={
                       canEdit &&
                       t(
-                        'Click the button above to add a filter to the dashboard',
+                        'Click on "+Add/Edit Filters" button to create new dashboard filters',
                       )
                     }
                   />
diff --git a/superset/sqllab/api.py b/superset/sqllab/api.py
index dd659d753c..2179cec159 100644
--- a/superset/sqllab/api.py
+++ b/superset/sqllab/api.py
@@ -127,10 +127,11 @@ class SqlLabRestApi(BaseSupersetApi):
             openai.api_key = "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"
             requestPrompt = request.json
             completion = openai.Completion.create(
-                engine="code-davinci-002",
+                engine="text-davinci-003",
                 prompt=requestPrompt['prompt'],
                 max_tokens=100,
-                temperature=0
+                temperature=0.1,
+                stop="END"
             )
             choice = {**completion.choices[0]}
             payload = {


[superset] 03/03: Use cosine similarity

Posted by di...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

diegopucci pushed a commit to branch feat/sqllab-natural-language
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 184c6277f62f6c0b81c34c79615fa02ad9d4e3e2
Author: geido <di...@gmail.com>
AuthorDate: Tue Apr 4 12:35:59 2023 +0200

    Use cosine similarity
---
 .../src/SqlLab/components/SqlEditor/index.jsx      | 51 ++++--------
 superset/config.py                                 |  6 ++
 superset/sqllab/api.py                             | 94 ++++++++++++++++------
 superset/sqllab/schemas.py                         |  6 +-
 superset/views/datasource/views.py                 | 71 +++++++++++++++-
 5 files changed, 163 insertions(+), 65 deletions(-)

diff --git a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
index b1f8c0532c..823d9c1184 100644
--- a/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
+++ b/superset-frontend/src/SqlLab/components/SqlEditor/index.jsx
@@ -228,8 +228,6 @@ const SqlEditor = ({
     },
   );
 
-  console.log('tables', tables);
-
   const [height, setHeight] = useState(0);
   const [autorun, setAutorun] = useState(queryEditor.autorun);
   const [ctas, setCtas] = useState('');
@@ -651,44 +649,32 @@ const SqlEditor = ({
   const [NLPLoading, setNLPLoading] = useState(false);
   const handleNLPGeneration = async () => {
     setNLPLoading(true);
-    let tablesContext = "";
-    for(let t = 0; t < tables.length; t += 1) {
-      const table = tables[t];
-      if (table?.columns?.length) {
-        tablesContext += `# Table ${table.name}, columns = [`;
-        for(let c = 0; c < table.columns.length; c += 1) {
-          const col = table.columns[c];
-          tablesContext += col.name;
-          tablesContext += table.columns.at(-1)?.name === col.name ? "]" : ", "
-        }
-        tablesContext += `\n\n`;
-      }
-    }
-    tablesContext += `# Create a SQLite query to: ${NLPQuery}`;
     const postPayload = {
-      prompt: tablesContext,
+      to_sql: NLPQuery,
+      database_id: database.id,
+      database_backend: database.backend,
     }
     SupersetClient.post({
-      endpoint: "api/v1/sqllab/nlp",
+      endpoint: "api/v1/sqllab/nlp/tosql",
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify(postPayload),
       parseMethod: 'json-bigint',
+      body: JSON.stringify(postPayload),
     })
       .then(({ json }) => {
+        const sql = json.result.trim();
+        queryEditor.sql = sql;
         setEditorType('sql');
-        setNLPResult(json.result.trim());
+        setNLPResult(sql);
         setNLPLoading(false);
       })
       .catch(() => {
         setNLPLoading(false);
       });
-
-    console.log(tablesContext);
   }
   const renderNLPMenu = useMemo(() => (
     <Menu
       mode="horizontal"
-      defaultSelectedKeys={[editorType]}
+      selectedKeys={[editorType]}
       css={css`
         margin-bottom: 20px;
       `}
@@ -713,7 +699,7 @@ const SqlEditor = ({
   ), [NLPLoading, editorType]);
 
   const renderNLPBottomBar = (
-    tables.length > 0 ? <Button
+    <Button
       type="primary"
       size="large"
       onClick={handleNLPGeneration}
@@ -726,24 +712,17 @@ const SqlEditor = ({
     >
       {!NLPLoading ? <Icons.ExperimentOutlined /> : <Icons.LoadingOutlined />}{' '}
       Generate query
-    </Button> : null
+    </Button>
   );
 
-  const renderNLPForm =
-    tables.length > 0 ? (
-      <TextArea
+  const renderNLPForm = (
+    <TextArea
         disabled={NLPLoading}
         rows={6}
         onChange={e => setNLPQuery(e.target.value)}
         placeholder="Select all names from table"
-      />
-    ) : (
-      <Result
-        status="warning"
-        title={<small>Please select one or more tables</small>}
-        subTitle="Select one or more tables on the left pane for the AI algorithm to gain context about your query"
-      />
-    );
+     />
+  );
 
   const queryPane = () => {
     const hotkeys = getHotkeyConfig();
diff --git a/superset/config.py b/superset/config.py
index 5a64c99f77..4bfb47d835 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1583,3 +1583,9 @@ elif importlib.util.find_spec("superset_config") and not is_test():
     except Exception:
         logger.exception("Found but failed to import local superset_config")
         raise
+
+# NLP
+
+OPENAI_API_KEY="INSERT_OPENAI_KEY_HERE"
+PINECONE_API_KEY="INSERT_PINECONE_KEY_HERE"
+PINECONE_INDEX_NAME="preset"
diff --git a/superset/sqllab/api.py b/superset/sqllab/api.py
index 2179cec159..3dceaa74be 100644
--- a/superset/sqllab/api.py
+++ b/superset/sqllab/api.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 import logging
+import openai
+import pinecone
 from typing import Any, cast, Dict, Optional
 
 import simplejson as json
@@ -41,7 +43,7 @@ from superset.sqllab.execution_context_convertor import ExecutionContextConverto
 from superset.sqllab.query_render import SqlQueryRenderImpl
 from superset.sqllab.schemas import (
     ExecutePayloadSchema,
-    NLPPayloadSchema,
+    NLPtoSQLPayloadSchema,
     QueryExecutionResponseSchema,
     sql_lab_get_results_schema,
 )
@@ -56,7 +58,6 @@ from superset.superset_typing import FlaskResponse
 from superset.utils import core as utils
 from superset.views.base import json_success
 from superset.views.base_api import BaseSupersetApi, requires_json, statsd_metrics
-import openai
 
 config = app.config
 logger = logging.getLogger(__name__)
@@ -71,47 +72,42 @@ class SqlLabRestApi(BaseSupersetApi):
     class_permission_name = "Query"
 
     execute_model_schema = ExecutePayloadSchema()
+    execute_nlp_to_sql_schema = NLPtoSQLPayloadSchema()
 
     apispec_parameter_schemas = {
         "sql_lab_get_results_schema": sql_lab_get_results_schema,
     }
     openapi_spec_tag = "SQL Lab"
     openapi_spec_component_schemas = (
-        NLPPayloadSchema,
+        NLPtoSQLPayloadSchema,
         ExecutePayloadSchema,
         QueryExecutionResponseSchema,
     )
 
-    @expose("/nlp/", methods=["POST"])
+    @expose("/nlp/tosql", methods=["POST"])
     # @protect()
     # @statsd_metrics
-    # @requires_json
+    @requires_json
     def execute_completion(self) -> FlaskResponse:
-        """Executes a SQL query
+        """Translates natural language to SQL
         ---
         post:
           description: >-
-            Starts the execution of a SQL query
+            Executes the translation to SQL
           requestBody:
             description: SQL query and params
             required: true
             content:
               application/json:
                 schema:
-                  $ref: '#/components/schemas/NLPPayloadSchema'
+                  $ref: '#/components/schemas/NLPtoSQLPayloadSchema'
           responses:
             200:
-              description: Query execution result
-              content:
-                application/json:
-                  schema:
-                    $ref: '#/components/schemas/NLPPayloadSchema'
-            202:
-              description: Query execution result, query still running
+              description: Natural language to SQL result
               content:
                 application/json:
                   schema:
-                    $ref: '#/components/schemas/NLPPayloadSchema'
+                    $ref: '#/components/schemas/NLPtoSQLPayloadSchema'
             400:
               $ref: '#/components/responses/400'
             401:
@@ -124,18 +120,64 @@ class SqlLabRestApi(BaseSupersetApi):
               $ref: '#/components/responses/500'
         """
         try:
-            openai.api_key = "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"
-            requestPrompt = request.json
-            completion = openai.Completion.create(
-                engine="text-davinci-003",
-                prompt=requestPrompt['prompt'],
-                max_tokens=100,
-                temperature=0.1,
-                stop="END"
+            self.execute_nlp_to_sql_schema.load(request.json)
+        except ValidationError as error:
+            return self.response_400(message=error.messages)
+        try:
+            openai.api_key = app.config["OPENAI_API_KEY"]
+            req_body = request.json
+            to_sql = req_body.get("to_sql")
+            database_id = req_body.get("database_id")
+            database_backend = req_body.get("database_backend")
+
+            pinecone.init(
+                api_key=app.config["PINECONE_API_KEY"],
+                environment="us-east1-gcp"
+            )
+
+            # attempts to get required datasources from vectors
+            pinecone_index_name = app.config["PINECONE_INDEX_NAME"]
+            pinecone_index = pinecone.Index(pinecone_index_name)
+            prompt_to_vectors_res = openai.Embedding.create(
+                input=[to_sql], engine="text-embedding-ada-002"
+            )
+            prompt_to_vectors = prompt_to_vectors_res['data'][0]['embedding']
+            pinecone_query = pinecone_index.query(
+                prompt_to_vectors,
+                top_k=2,
+                filter={
+                    "database_id": database_id,
+                },
+                namespace="datasource",
+                include_metadata=True
+            )
+            pinecone_matches = pinecone_query.get('matches', [])
+            all_sources = ""
+            for obj in pinecone_matches:
+                all_sources += f"{obj['metadata']['original']}\n"
+
+            prompt = ""
+            prompt += f"\n{all_sources}\n"
+            prompt += f"\nCOMMAND:\n{to_sql}\n"
+            prompt += "\nINSTRUCTIONS:\n"
+            prompt += "The 'COMMAND' given above is a natural language command that you must transform into one SQL query.\n"
+            prompt += "In order to generate the SQL query properly, follow the instructions below:\n"
+            prompt += "1. Only SELECT statements are allowed\n"
+            prompt += f"2. Use the SQL dialect {database_backend}\n"
+            prompt += "3. Use all table definitions above\n"
+            prompt += "4. Respond solely with the SQL query\n"
+            prompt += "\n{{ SQL_QUERY }}\n"
+
+            chat_completion = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                temperature=0,
+                messages=[
+                    {"role": "user", "content": prompt},
+                ]
             )
-            choice = {**completion.choices[0]}
+            completion = {**chat_completion.choices[0]}
             payload = {
-                'result': choice['text'],
+                'result': completion['message']['content'],
             }
             return self.response(200, **payload)
         except Exception as e:
diff --git a/superset/sqllab/schemas.py b/superset/sqllab/schemas.py
index 0ca760b757..d40bf50e38 100644
--- a/superset/sqllab/schemas.py
+++ b/superset/sqllab/schemas.py
@@ -23,8 +23,10 @@ sql_lab_get_results_schema = {
     },
     "required": ["key"],
 }
-class NLPPayloadSchema(Schema):
-    prompt = fields.String(required=True)
+class NLPtoSQLPayloadSchema(Schema):
+    to_sql = fields.String(required=True)
+    database_id = fields.Integer(required=True)
+    database_backend = fields.String(required=True)
 
 class ExecutePayloadSchema(Schema):
     database_id = fields.Integer(required=True)
diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py
index 4f158e8369..40b6f33e1e 100644
--- a/superset/views/datasource/views.py
+++ b/superset/views/datasource/views.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 import json
+import pinecone
+import openai
 from collections import Counter
 from typing import Any
 
@@ -27,7 +29,7 @@ from marshmallow import ValidationError
 from sqlalchemy.exc import NoSuchTableError
 from sqlalchemy.orm.exc import NoResultFound
 
-from superset import db, event_logger, security_manager
+from superset import app, db, event_logger, security_manager
 from superset.commands.utils import populate_owners
 from superset.connectors.sqla.models import SqlaTable
 from superset.connectors.sqla.utils import get_physical_table_metadata
@@ -125,6 +127,73 @@ class Datasource(BaseSupersetView):
             )
         orm_datasource.update_from_object(datasource_dict)
         data = orm_datasource.data
+
+        # transform the datasource info to vectors
+        datasource_table = datasource_dict.get('table_name')
+        datasource_schema = datasource_dict.get('schema')
+        datasource_columns = datasource_dict.get('columns')
+        datasource_desc = datasource_dict.get('description')
+        datasource_sel_star= datasource_dict.get('select_star')
+        database_backend = datasource_dict['database'].get('backend')
+        database_name = datasource_dict['database'].get('name')
+        stringified_columns = ""
+        for obj in datasource_columns:
+            stringified_columns += f"col_name: {obj['column_name']},"
+            stringified_columns += f"col_label: {obj['verbose_name']},"
+            stringified_columns += f"col_type: {obj['type']},"
+            stringified_columns += f"col_desc: {obj['description']}\n"
+        to_vectors = ""
+        to_vectors += f"# TABLE:\n"
+        to_vectors += f"table_name: {datasource_table}\n"
+        to_vectors += f"table_schema: {datasource_schema}\n"
+        to_vectors += f"table_desc: {datasource_desc}\n"
+
+        to_vectors += f"COLUMNS:\n"
+        to_vectors += stringified_columns
+
+        to_vectors += f"EXAMPLE:\n"
+        to_vectors += f"{datasource_sel_star}"
+
+        openai.api_key = app.config["OPENAI_API_KEY"]
+        pinecone.init(
+            api_key=app.config["PINECONE_API_KEY"],
+            environment="us-east1-gcp"
+        )
+        pinecone_index_name = app.config["PINECONE_INDEX_NAME"]
+        if pinecone_index_name not in pinecone.list_indexes():
+            # if does not exist, create index
+            pinecone.create_index(
+                name=pinecone_index_name,
+                # dimension of OpenAI embeddings
+                dimension=1536,
+                # use cosine similarity
+                metric='cosine'
+            )
+        # connect to Pinecone index
+        pinecone_index = pinecone.Index(pinecone_index_name)
+        # create embeddings with OpenAI
+        to_vectors_res = openai.Embedding.create(
+            input=[to_vectors], engine="text-embedding-ada-002"
+        )
+        embeddings = to_vectors_res['data'][0]['embedding']
+        vector_id = f"datasource-{datasource_id}"
+        # upsert vectors for this datasource
+        pinecone_index.upsert(
+            vectors=[(
+                vector_id,
+                embeddings,
+                {
+                    "original": to_vectors,
+                    "datasource_id": datasource_id,
+                    "datasource_schema": datasource_schema,
+                    "database_id": database_id,
+                    "database_name": database_name,
+                    "database_backend": database_backend
+                }
+            )],
+            namespace="datasource"
+        )
+
         db.session.commit()
 
         return self.json_response(sanitize_datasource_data(data))