You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ac...@apache.org on 2024/04/03 08:45:50 UTC

(camel-kamelets-examples) 01/01: Added an AWS Bedrock Example for Knowledge base

This is an automated email from the ASF dual-hosted git repository.

acosentino pushed a commit to branch aws-bedrock-example
in repository https://gitbox.apache.org/repos/asf/camel-kamelets-examples.git

commit 1f447020cbe708d08065dd8e31dda7e2b6f5d991
Author: Andrea Cosentino <an...@gmail.com>
AuthorDate: Wed Apr 3 10:45:18 2024 +0200

    Added an AWS Bedrock Example for Knowledge base
    
    Signed-off-by: Andrea Cosentino <an...@gmail.com>
---
 jbang/aws-bedrock-ingestion/BedrockPrompt.java     |  49 +++++
 jbang/aws-bedrock-ingestion/Counter.java           |  14 ++
 jbang/aws-bedrock-ingestion/README.adoc            | 199 +++++++++++++++++++++
 jbang/aws-bedrock-ingestion/application.properties |   3 +
 jbang/aws-bedrock-ingestion/terraform/main.tf      | 127 +++++++++++++
 5 files changed, 392 insertions(+)

diff --git a/jbang/aws-bedrock-ingestion/BedrockPrompt.java b/jbang/aws-bedrock-ingestion/BedrockPrompt.java
new file mode 100644
index 0000000..41d4e20
--- /dev/null
+++ b/jbang/aws-bedrock-ingestion/BedrockPrompt.java
@@ -0,0 +1,49 @@
+//DEPS org.apache.camel:camel-bom:4.5.0-SNAPSHOT@pom
+//DEPS org.apache.camel:camel-aws-bedrock
+//DEPS org.apache.camel:camel-endpointdsl
+//DEPS org.apache.camel:camel-rest
+
+import org.apache.camel.BindToRegistry;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.model.rest.RestParamType;
+import org.apache.camel.component.aws2.bedrock.BedrockModels;
+import org.apache.camel.component.aws2.bedrock.runtime.BedrockConstants;
+import org.apache.camel.component.aws2.bedrock.agent.BedrockAgentConstants;
+
+public class BedrockPrompt extends RouteBuilder {
+
+@Override
+public void configure() throws Exception {
+
+    Counter counter = new Counter();
+    
+    rest("/bedrock")
+        .get("/prompt").to("direct:invoke-bedrock")
+        .get("/ingestion_status/{id}").param().name("id").type(RestParamType.path).description("The ingestion job id").dataType("string").endParam().to("direct:invoke-ingestion-status");
+
+    from("direct:invoke-bedrock")
+        .setBody(constant("{{prompt}}"))
+        .to("aws-bedrock-agent-runtime:label?useDefaultCredentialsProvider=true&region=us-east-1&operation=retrieveAndGenerate&knowledgeBaseId={{knowledgeBaseId}}&modelId="
+                            + BedrockModels.ANTROPHIC_CLAUDE_INSTANT_V1.model);           
+                            
+    from("direct:invoke-ingestion-status")
+        .toD("aws-bedrock-agent:label?useDefaultCredentialsProvider=true&region=us-east-1&operation=getIngestionJob&knowledgeBaseId={{knowledgeBaseId}}&dataSourceId={{dataSourceId}}&ingestionJobId=${header.id}").setBody(simple("${body.status.name}"));   
+        
+    from("aws2-sqs:arn:aws:sqs:us-east-1:780410022472:sqs-bedrock-test-123?deleteAfterRead=true&useDefaultCredentialsProvider=true")
+       .unmarshal().json()
+       .setProperty("s3-event-name", jsonpath("$.detail.reason"))
+       .choice()
+         .when(simple("${exchangeProperty.s3-event-name} == 'PutObject'"))
+         .bean(counter, "getCount()")
+       .end()
+       .choice()
+         .when(simple("${bean:counter?method=getCount} == 11"))
+              .log("Detected file upload in AWS S3. Starting ingestion process to AWS Bedrock Knowledge Base.")
+              .setHeader(BedrockAgentConstants.KNOWLEDGE_BASE_ID, constant("{{knowledgeBaseId}}"))
+              .setHeader(BedrockAgentConstants.DATASOURCE_ID, constant("{{dataSourceId}}"))
+              .to("aws-bedrock-agent:label?useDefaultCredentialsProvider=true&region=us-east-1&operation=startIngestionJob")
+              .log("The Ingestion Job Id is ${body}")
+         .end();
+         
+}
+}
diff --git a/jbang/aws-bedrock-ingestion/Counter.java b/jbang/aws-bedrock-ingestion/Counter.java
new file mode 100644
index 0000000..153c330
--- /dev/null
+++ b/jbang/aws-bedrock-ingestion/Counter.java
@@ -0,0 +1,14 @@
+import org.apache.camel.BindToRegistry;
+
+@BindToRegistry("counter")
+public class Counter {
+
+    private int count;
+
+    public int getCount() {
+        return ++count;
+    }
+    public void setCount(int count) {
+        this.count = count;
+    }
+}
diff --git a/jbang/aws-bedrock-ingestion/README.adoc b/jbang/aws-bedrock-ingestion/README.adoc
new file mode 100644
index 0000000..7ef329a
--- /dev/null
+++ b/jbang/aws-bedrock-ingestion/README.adoc
@@ -0,0 +1,199 @@
+== AWS Bedrock Knowledge Base example
+
+In this sample you'll use the AWS Bedrock component.
+
+Through the usage of EventBridge and SQS Services you'll be able to consume events from specific bucket and start ingestion job on your knowledge base.
+
+=== Install JBang
+
+First install JBang according to https://www.jbang.dev
+
+When JBang is installed then you should be able to run from a shell:
+
+[source,sh]
+----
+$ jbang --version
+----
+
+This will output the version of JBang.
+
+To run this example you can either install Camel on JBang via:
+
+[source,sh]
+----
+$ jbang app install camel@apache/camel
+----
+
+Which allows to run CamelJBang with `camel` as shown below.
+
+=== Setup the AWS S3 bucket, SQS Queue and EventBridge Rule through Terraform
+
+If you are in a hurry you can also try this example by running the Terraform configuration provided in Terraform folder.
+
+[source,sh]
+----
+cd terraform/
+----
+
+and then run
+
+[source,sh]
+----
+terraform init
+----
+
+At this point you should be to run the configuration
+
+[source,sh]
+----
+terraform apply -var="s3_bucket_name=s3-bedrock-test-123" -var="sqs_queue_name=sqs-bedrock-test-123"
+----
+
+You can specify whatever bucket name or SQS name you want.
+
+At the end the AWS environment on your account will be completed, and you could go ahead with the example.
+
+Now you should be able to create a knowledge base on your AWS Bedrock console by pointing to the just created S3 bucket.
+
+Don't forget to specify the correct sqs queue name in the application.properties and add the correct knowledge base Id and data source Id for AWS Bedrock.
+
+=== How to run
+
+Then you can run this example using:
+
+[source,sh]
+----
+$ jbang -Dcamel.jbang.version=4.6.0-SNAPSHOT camel@apache/camel run --properties=application.properties Counter.java BedrockPrompt.java
+----
+
+Or run it even shorter:
+
+[source,sh]
+----
+$ jbang -Dcamel.jbang.version=4.6.0-SNAPSHOT camel@apache/camel run --properties=application.properties  *
+----
+
+Once started you should see the following log
+
+[source,sh]
+----
+2024-04-03 10:01:19.794  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext : Apache Camel 4.6.0-SNAPSHOT (Counter) is starting
+2024-04-03 10:01:19.984  INFO 23749 --- [           main] .core.spi.resolver.ResolverProvider : Using the default address resolver as the dns resolver could not be loaded
+2024-04-03 10:01:20.110  INFO 23749 --- [ntloop-thread-0] .http.vertx.VertxPlatformHttpServer : Vert.x HttpServer started on 0.0.0.0:8080
+2024-04-03 10:01:20.231  INFO 23749 --- [           main] g.apache.camel.main.BaseMainSupport : Property-placeholders summary
+2024-04-03 10:01:20.231  INFO 23749 --- [           main] g.apache.camel.main.BaseMainSupport :     [application.properties]       knowledgeBaseId=xxxx
+2024-04-03 10:01:20.231  INFO 23749 --- [           main] g.apache.camel.main.BaseMainSupport :     [application.properties]       dataSourceId=xxxx
+2024-04-03 10:01:20.232  INFO 23749 --- [           main] g.apache.camel.main.BaseMainSupport :     [application.properties]       prompt=What is the average price of natural gas between 2010 and 2011?
+2024-04-03 10:01:20.262  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext : Routes startup (total:3 rest-dsl:2)
+2024-04-03 10:01:20.263  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext :     Started route1 (aws2-sqs://arn:aws:sqs:us-east-1:780410022472:sqs-bedrock-test-123)
+2024-04-03 10:01:20.263  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext :     Started route2 (rest://get:/bedrock:/prompt)
+2024-04-03 10:01:20.263  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext :     Started route3 (rest://get:/bedrock:/ingestion_status/%7Bid%7D)
+2024-04-03 10:01:20.263  INFO 23749 --- [           main] el.impl.engine.AbstractCamelContext : Apache Camel 4.6.0-SNAPSHOT (Counter) started in 469ms (build:0ms init:0ms start:469ms)
+2024-04-03 10:01:20.266  INFO 23749 --- [           main] t.platform.http.main.MainHttpServer : HTTP endpoints summary
+2024-04-03 10:01:20.268  INFO 23749 --- [           main] t.platform.http.main.MainHttpServer :     http://0.0.0.0:8080/bedrock/ingestion_status/{id}    (GET)    
+2024-04-03 10:01:20.268  INFO 23749 --- [           main] t.platform.http.main.MainHttpServer :     http://0.0.0.0:8080/bedrock/prompt                   (GET) 
+----
+
+=== Developer Web Console
+
+You can enable the developer console via `--console` flag as show:
+
+[source,sh]
+----
+$ jbang -Dcamel.jbang.version=4.6.0-SNAPSHOT camel@apache/camel run --properties=application.properties Counter.java BedrockPrompt.java --console
+----
+
+Then you can browse: http://localhost:8080/q/dev to introspect the running Camel Application.
+
+=== Invoke the model
+
+You should now try to submit the prompt through the prompt endpoint:
+
+[source,sh]
+----
+$ curl http://0.0.0.0:8080/bedrock/prompt/
+----
+
+And the answer will be the following:
+
+[source,sh]
+----
+$ Sorry, I am unable to assist you with this request.
+----
+
+=== Get the training data set and use it
+
+Download the required data set from the following link: https://www.kaggle.com/datasets/unibahmad/natural-gas-prices/data
+
+You'll download a file named 'archive.zip'
+
+Unzip it somewhere in your file system and then run the following command
+
+[source,sh]
+----
+$ aws s3 cp <path_to_training_data>/ s3://<s3-bucket-name> --recursive
+----
+
+=== Create and delete an object
+
+Once the PutObject event will be received and all the files will be uploaded, in the integration log you should see:
+
+[source,sh]
+----
+2024-04-03 10:10:11.851  INFO 24832 --- [edrock-test-123] BedrockPrompt.java:42               : Detected file upload in AWS S3. Starting ingestion process to AWS Bedrock Knowledge Base.
+2024-04-03 10:10:13.338  INFO 24832 --- [edrock-test-123] BedrockPrompt.java:46               : The Ingestion Job Id is QQ9OGSZOPD
+----
+
+Now check the ingestion job status
+
+[source,sh]
+----
+curl http://0.0.0.0:8080/bedrock/ingestion_status/QQ9OGSZOPD
+IN_PROGRESS
+.
+.
+.
+.
+curl http://0.0.0.0:8080/bedrock/ingestion_status/QQ9OGSZOPD
+COMPLETE
+----
+
+Once the job is complete you can try again to submit the prompt and this time you'll have the answer
+
+[source,sh]
+----
+curl http://0.0.0.0:8080/bedrock/prompt/
+The average price of natural gas between 2010 and 2011 was $3.93 per unit.
+----
+
+=== Cleanup AWS S3 bucket, SQS Queue and EventBridge Rule through Terraform
+
+You'll need to cleanup everything from AWS console or CLI.
+
+If you used terraform it will be enough to run terraform destroy
+
+[source,sh]
+----
+cd terraform/
+----
+
+At this point you should be to run the destroy
+
+[source,sh]
+----
+terraform destroy -var="s3_bucket_name=s3-bedrock-test" -var="sqs_queue_name=sqs-bedrock-test"
+----
+
+You'll need to specify the same var used for terraform apply.
+
+In your AWS Bedrock console, delete the knowledge base and in Opensearch delete the vectorsearch collection.
+
+=== Help and contributions
+
+If you hit any problem using Camel or have some feedback, then please
+https://camel.apache.org/community/support/[let us know].
+
+We also love contributors, so
+https://camel.apache.org/community/contributing/[get involved] :-)
+
+The Camel riders!
diff --git a/jbang/aws-bedrock-ingestion/application.properties b/jbang/aws-bedrock-ingestion/application.properties
new file mode 100644
index 0000000..c5f810d
--- /dev/null
+++ b/jbang/aws-bedrock-ingestion/application.properties
@@ -0,0 +1,3 @@
+prompt=What is the average price of natural gas between 2010 and 2011?
+knowledgeBaseId=<knowledge_base_id>
+dataSourceId=<data_source_id>
diff --git a/jbang/aws-bedrock-ingestion/terraform/main.tf b/jbang/aws-bedrock-ingestion/terraform/main.tf
new file mode 100644
index 0000000..4a5aaa8
--- /dev/null
+++ b/jbang/aws-bedrock-ingestion/terraform/main.tf
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 3.27"
+    }
+  }
+
+  required_version = ">= 0.14.9"
+}
+
+provider "aws" {
+  profile = "default"
+  region  = "us-east-1"
+}
+
+variable "s3_bucket_name" {
+  type = string
+}
+
+variable "sqs_queue_name" {
+  type = string
+}
+
+
+data "aws_caller_identity" "current" {}
+
+# Create a new S3 bucket
+resource "aws_s3_bucket" "MyS3Bucket" {
+  bucket = var.s3_bucket_name
+  force_destroy = true
+}
+
+# Send notifications to EventBridge for all events in the bucket
+resource "aws_s3_bucket_notification" "MyS3BucketNotification" {
+  bucket      = aws_s3_bucket.MyS3Bucket.id
+  eventbridge = true
+}
+
+# Create an EventBridge rule
+resource "aws_cloudwatch_event_rule" "MyEventRule" {
+  description   = "Object create events on bucket s3://${aws_s3_bucket.MyS3Bucket.id}"
+  event_pattern = <<EOF
+{
+  "source": [
+    "aws.s3"
+  ],
+  "detail": {
+    "bucket": {
+      "name": ["${aws_s3_bucket.MyS3Bucket.id}"]
+    }
+  }
+}
+EOF
+}
+
+# Set the SNS topic as a target of the EventBridge rule
+resource "aws_cloudwatch_event_target" "MyEventRuleTarget" {
+  rule      = aws_cloudwatch_event_rule.MyEventRule.name
+  arn       = aws_sqs_queue.sqs-queue.arn
+}
+
+# Create a new SQS queue
+resource "aws_sqs_queue" "sqs-queue" {
+  name = var.sqs_queue_name
+}
+
+# Allow EventBridge to publish to the SQS queue
+resource "aws_sqs_queue_policy" "MySQSQueuePolicy" {
+  queue_url = aws_sqs_queue.sqs-queue.id
+  policy = <<POLICY
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "AWSEventsPermission",
+      "Effect": "Allow",
+      "Principal": {
+        "Service": "events.amazonaws.com"
+      },
+      "Action": "sqs:SendMessage",
+      "Resource": "${aws_sqs_queue.sqs-queue.arn}",
+      "Condition": {
+        "ArnEquals": {
+          "aws:SourceArn": "${aws_cloudwatch_event_rule.MyEventRule.arn}"
+        }
+      }
+    }
+  ]
+}
+POLICY
+}
+
+# Display the EventBridge rule, S3 bucket and SQS queue
+output "EventBridge-Rule-Name" {
+  value       = aws_cloudwatch_event_rule.MyEventRule.name
+  description = "The EventBridge Rule Name"
+}
+output "S3-Bucket" {
+  value       = aws_s3_bucket.MyS3Bucket.id
+  description = "The S3 Bucket"
+}
+output "SQS-Queue-Name" {
+  value       = aws_sqs_queue.sqs-queue.name
+  description = "The SQS Queue Name"
+}
+output "SQS-Queue-ARN" {
+  value       = aws_sqs_queue.sqs-queue.arn
+  description = "The SQS Queue Arn"
+}