You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jspwiki.apache.org by ju...@apache.org on 2021/03/09 21:15:02 UTC
[jspwiki] 01/07: Add AWS Kendra as a Search Provider.
This is an automated email from the ASF dual-hosted git repository.
juanpablo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/jspwiki.git
commit 612befbfc1f3b81f867e86a64314a14a7f47a9ba
Author: rostkadat <ro...@gmail.com>
AuthorDate: Tue Feb 16 16:38:41 2021 +0100
Add AWS Kendra as a Search Provider.
---
jspwiki-kendra-searchprovider/.gitignore | 2 +
jspwiki-kendra-searchprovider/Dockerfile | 88 ++++
jspwiki-kendra-searchprovider/README.md | 40 ++
.../cloudformation/index-and-datasource.yaml | 123 +++++
.../docker-files/jspwiki-custom.properties | 3 +
.../docs/images/JSPWiki_Search.png | Bin 0 -> 61411 bytes
jspwiki-kendra-searchprovider/pom.xml | 104 ++++
.../wiki/search/kendra/KendraSearchProvider.java | 559 +++++++++++++++++++++
.../apache/wiki/search/kendra/content_types.json | 18 +
.../search/kendra/KendraSearchProviderTest.java | 179 +++++++
.../src/test/resources/aaa-diagram.pdf | Bin 0 -> 37465 bytes
.../src/test/resources/favicon.png | Bin 0 -> 631 bytes
.../src/test/resources/jspwiki-custom.properties | 93 ++++
.../src/test/resources/log4j.properties | 9 +
jspwiki-war/pom.xml | 6 +
pom.xml | 1 +
16 files changed, 1225 insertions(+)
diff --git a/jspwiki-kendra-searchprovider/.gitignore b/jspwiki-kendra-searchprovider/.gitignore
new file mode 100644
index 0000000..51e923e
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/.gitignore
@@ -0,0 +1,2 @@
+/.aws-sam/
+/samconfig.toml
diff --git a/jspwiki-kendra-searchprovider/Dockerfile b/jspwiki-kendra-searchprovider/Dockerfile
new file mode 100644
index 0000000..4627e2a
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/Dockerfile
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#FROM maven:3.6-jdk-8 as package
+#WORKDIR /tmp
+#COPY . .
+#RUN set -x \
+## fastest, minimum build
+# && mvn clean package -pl jspwiki-war,jspwiki-kendra-searchprovider,jspwiki-wikipages/en -am -DskipTests
+
+FROM tomcat:9.0
+
+#COPY --from=package /tmp/jspwiki-war/target/JSPWiki.war /tmp
+#COPY --from=package /tmp/jspwiki-wikipages/en/target/jspwiki-wikipages-en-*.zip /tmp
+#COPY --from=package /tmp/jspwiki-kendra-searchprovider/target/jspwiki-kendra-searchprovider-*.jar /tmp
+COPY jspwiki-war/target/JSPWiki.war /tmp
+COPY jspwiki-wikipages/en/target/jspwiki-wikipages-en-*.zip /tmp
+#COPY jspwiki-kendra-searchprovider/target/jspwiki-kendra-searchprovider-*.jar /tmp
+
+COPY docker-files/log4j.properties /tmp
+COPY docker-files/tomcat-users.xml $CATALINA_HOME/conf/tomcat-users.xml
+COPY jspwiki-kendra-searchprovider/docker-files/jspwiki-custom.properties /tmp
+#
+# set default environment entries to configure jspwiki
+ENV CATALINA_OPTS -Djava.security.egd=file:/dev/./urandom
+ENV LANG en_US.UTF-8
+ENV jspwiki_basicAttachmentProvider_storageDir /var/jspwiki/pages
+ENV jspwiki_fileSystemProvider_pageDir /var/jspwiki/pages
+ENV jspwiki_jspwiki_frontPage Main
+ENV jspwiki_pageProvider VersioningFileProvider
+ENV jspwiki_use_external_logconfig true
+ENV jspwiki_workDir /var/jspwiki/work
+ENV jspwiki_xmlUserDatabaseFile /var/jspwiki/etc/userdatabase.xml
+ENV jspwiki_xmlGroupDatabaseFile /var/jspwiki/etc/groupdatabase.xml
+
+RUN set -x \
+ && export DEBIAN_FRONTEND=noninteractive \
+ && apt install --fix-missing --quiet --yes unzip
+
+#
+# install jspwiki
+RUN set -x \
+ && mkdir /var/jspwiki \
+# remove default tomcat applications, we dont need them to run jspwiki
+ && cd $CATALINA_HOME/webapps \
+ && rm -rf examples host-manager manager docs ROOT \
+# remove other stuff we don't need
+ && rm -rf /usr/local/tomcat/bin/*.bat \
+# create subdirectories where all jspwiki stuff will live
+ && cd /var/jspwiki \
+ && mkdir pages logs etc work \
+# deploy jspwiki
+ && mkdir $CATALINA_HOME/webapps/ROOT \
+ && unzip -q -d $CATALINA_HOME/webapps/ROOT /tmp/JSPWiki.war \
+ && rm /tmp/JSPWiki.war \
+# deploy wiki pages
+ && cd /tmp/ \
+ && unzip -q jspwiki-wikipages-en-*.zip \
+ && mv jspwiki-wikipages-en-*/* /var/jspwiki/pages/ \
+ && rm -rf jspwiki-wikipages-en-* \
+# move the userdatabase.xml and groupdatabase.xml to /var/jspwiki/etc
+ && cd $CATALINA_HOME/webapps/ROOT/WEB-INF \
+ && mv userdatabase.xml groupdatabase.xml /var/jspwiki/etc \
+# arrange proper logging (jspwiki.use.external.logconfig = true needs to be set)
+ && mv /tmp/log4j.properties $CATALINA_HOME/lib/log4j.properties \
+# Copy Kendra Search Provider configuration
+ && cp /tmp/jspwiki-custom.properties $CATALINA_HOME/webapps/ROOT/WEB-INF/classes
+
+# make port visible in metadata
+EXPOSE 8080
+
+#
+# by default we start the Tomcat container when the docker container is started.
+CMD ["/usr/local/tomcat/bin/catalina.sh", "run", ">/usr/local/tomcat/logs/catalina.out"]
diff --git a/jspwiki-kendra-searchprovider/README.md b/jspwiki-kendra-searchprovider/README.md
new file mode 100644
index 0000000..f483cbe
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/README.md
@@ -0,0 +1,40 @@
+# JSPWiki Kendra Search provider
+
+## What is AWS Kendra
+
+ Amazon Kendra is an intelligent search service powered by machine learning.
+
+## How to use Kendra with JSPWiki
+
+1. AWS Account
+
+You will need an AWS Account if you have not one already: [create-account](https://aws.amazon.com/resources/create-account/)
+
+2. Create the Kendra Index and DataSource
+
+In you can use the [index-and-datasource](cloudformation/index-and-datasource.yaml) Cloudformation stack to create the Kendra Index and DataSource.
+This require that you have either the [AWS Cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) or the [SAM Cli](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-sam-cli-install.html) installed.
+Once installed you can simply run the following command to create your Kendra Index and DataSource:
+
+```shell
+sam build --template cloudformation/index-and-datasource.yaml
+sam deploy --guided
+```
+
+*Note*: it is important that the name for your Index and DataSource match the names setup in your JSPWiki Installation.
+Namely make sure the properties `jspwiki.kendra.indexName` and `jspwiki.kendra.dataSourceName` are properly configured.
+
+4. Testing in Docker
+
+You can test your search index by running docker (*Note* you'll be using your AWS credentials)
+
+```shell
+mvn package -pl jspwiki-war,jspwiki-wikipages/en -am -DskipTests
+docker build -t jspwiki-kendra-searchprovider:latest -f jspwiki-kendra-searchprovider/Dockerfile .
+docker run -p 8080:8080 -v ~/.aws:/root/.aws jspwiki-kendra-searchprovider:latest
+```
+
+Then you can create a Page, upload some PDF, and search for some content in the PDF document
+
+![JSPWiki Search Results](docs/images/JSPWiki_Search.png)
+
diff --git a/jspwiki-kendra-searchprovider/cloudformation/index-and-datasource.yaml b/jspwiki-kendra-searchprovider/cloudformation/index-and-datasource.yaml
new file mode 100644
index 0000000..9bc0f02
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/cloudformation/index-and-datasource.yaml
@@ -0,0 +1,123 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Transform: AWS::Serverless-2016-10-31
+Description: >
+ index-and-datasource. Create the Kendra Index and Datasource.
+Metadata:
+
+ AWS::CloudFormation::Interface:
+ ParameterGroups:
+ - Label:
+ default: Parameters related to the Kendra Index and DataSource
+ Parameters:
+ - IndexName
+ - DataSourceName
+ - KendraEdition
+ ParameterLabels:
+ IndexName:
+ default: "The Kendra Index's Name"
+ DataSourceName:
+ default: "The Kendra DataSource's Name"
+ KendraEdition:
+ default: "The Kendra Edition"
+
+Parameters:
+
+ IndexName:
+ Description: "The name of the Kendra Index to create"
+ Type: String
+ Default: "JSPWikiIndex"
+
+ DataSourceName:
+ Description: "The name of the Kendra DataSource to create"
+ Type: String
+ Default: "JSPWikiDataSource"
+
+ KendraEdition:
+ Description: "The name of the Kendra DataSource to create"
+ Type: String
+ AllowedValues: [ "DEVELOPER_EDITION", "ENTERPRISE_EDITION" ]
+ Default: "DEVELOPER_EDITION"
+
+Resources:
+
+ KendraServiceRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service:
+ - kendra.amazonaws.com
+ Action:
+ - sts:AssumeRole
+ Path: "/"
+ Policies:
+ - PolicyName: AllowKendra
+ PolicyDocument:
+ Version: 2012-10-17
+ Statement:
+ - Effect: Allow
+ Action:
+ - cloudwatch:PutMetricData
+ Resource: "*"
+ Condition:
+ StringEquals:
+ cloudwatch:namespace: AWS/Kendra
+ - Effect: Allow
+ Action:
+ - logs:DescribeLogGroups
+ Resource: "*"
+ - Effect: Allow
+ Action:
+ - logs:CreateLogGroup
+ Resource:
+ - Fn::Sub: arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/kendra/*
+ - Effect: Allow
+ Action:
+ - logs:DescribeLogStreams
+ - logs:CreateLogStream
+ - logs:PutLogEvents
+ Resource:
+ - Fn::Sub: arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/kendra/*
+
+ Index:
+ Type: AWS::Kendra::Index
+ Properties:
+ Description: "Index for JSPWiki KendraSearchProvider"
+ Edition:
+ Ref: KendraEdition
+ Name:
+ Ref: IndexName
+ RoleArn:
+ Fn::GetAtt: KendraServiceRole.Arn
+ Tags:
+ - Key: Origin
+ Value: JSPWIKI
+
+ DataSource:
+ Type: AWS::Kendra::DataSource
+ Properties:
+ Description: "DataSource for JSPWiki KendraSearchProvider"
+ IndexId:
+ Ref: Index
+ Name:
+ Ref: DataSourceName
+ Tags:
+ - Key: Origin
+ Value: JSPWIKI
+ Type: CUSTOM
+
+Outputs:
+
+ Index:
+ Description: The Index
+ Value:
+ Ref: Index
+
+ DataSource:
+ Description: The DataSource
+ Value:
+ Ref: DataSource
+
diff --git a/jspwiki-kendra-searchprovider/docker-files/jspwiki-custom.properties b/jspwiki-kendra-searchprovider/docker-files/jspwiki-custom.properties
new file mode 100644
index 0000000..b862a79
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/docker-files/jspwiki-custom.properties
@@ -0,0 +1,3 @@
+jspwiki.searchProvider = org.apache.wiki.search.kendra.KendraSearchProvider
+jspwiki.kendra.indexName = JSPWikiIndex
+jspwiki.kendra.dataSourceName = JSPWikiDataSource
diff --git a/jspwiki-kendra-searchprovider/docs/images/JSPWiki_Search.png b/jspwiki-kendra-searchprovider/docs/images/JSPWiki_Search.png
new file mode 100644
index 0000000..cfa5525
Binary files /dev/null and b/jspwiki-kendra-searchprovider/docs/images/JSPWiki_Search.png differ
diff --git a/jspwiki-kendra-searchprovider/pom.xml b/jspwiki-kendra-searchprovider/pom.xml
new file mode 100644
index 0000000..f6d295a
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/pom.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <parent>
+ <groupId>org.apache.jspwiki</groupId>
+ <artifactId>jspwiki-builder</artifactId>
+ <version>2.11.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>jspwiki-kendra-searchprovider</artifactId>
+ <modelVersion>4.0.0</modelVersion>
+ <name>Apache JSPWiki AWS Kendra Search provider</name>
+ <description>Apache JSPWiki Kendra Search provider</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>jspwiki-main</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>jspwiki-main</artifactId>
+ <type>test-jar</type>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-junit-jupiter</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.awaitility</groupId>
+ <artifactId>awaitility</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>net.sourceforge.stripes</groupId>
+ <artifactId>stripes</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-kendra</artifactId>
+ <version>1.11.954</version>
+ </dependency>
+
+ </dependencies>
+</project>
\ No newline at end of file
diff --git a/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java b/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java
new file mode 100644
index 0000000..e032e32
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java
@@ -0,0 +1,559 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ */
+package org.apache.wiki.search.kendra;
+
+import static java.lang.String.format;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.lang.reflect.Type;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.wiki.InternalWikiException;
+import org.apache.wiki.WatchDog;
+import org.apache.wiki.WikiBackgroundThread;
+import org.apache.wiki.api.core.Attachment;
+import org.apache.wiki.api.core.Context;
+import org.apache.wiki.api.core.Engine;
+import org.apache.wiki.api.core.Page;
+import org.apache.wiki.api.exceptions.NoRequiredPropertyException;
+import org.apache.wiki.api.exceptions.ProviderException;
+import org.apache.wiki.api.providers.PageProvider;
+import org.apache.wiki.api.search.SearchResult;
+import org.apache.wiki.api.spi.Wiki;
+import org.apache.wiki.attachment.AttachmentManager;
+import org.apache.wiki.auth.AuthorizationManager;
+import org.apache.wiki.auth.permissions.PagePermission;
+import org.apache.wiki.pages.PageManager;
+import org.apache.wiki.search.SearchProvider;
+import org.apache.wiki.util.TextUtil;
+
+import com.amazonaws.services.kendra.AWSkendra;
+import com.amazonaws.services.kendra.AWSkendraClientBuilder;
+import com.amazonaws.services.kendra.model.BatchDeleteDocumentRequest;
+import com.amazonaws.services.kendra.model.BatchDeleteDocumentResult;
+import com.amazonaws.services.kendra.model.BatchPutDocumentRequest;
+import com.amazonaws.services.kendra.model.BatchPutDocumentResponseFailedDocument;
+import com.amazonaws.services.kendra.model.BatchPutDocumentResult;
+import com.amazonaws.services.kendra.model.ContentType;
+import com.amazonaws.services.kendra.model.DataSourceSummary;
+import com.amazonaws.services.kendra.model.Document;
+import com.amazonaws.services.kendra.model.DocumentAttribute;
+import com.amazonaws.services.kendra.model.DocumentAttributeValue;
+import com.amazonaws.services.kendra.model.IndexConfigurationSummary;
+import com.amazonaws.services.kendra.model.ListDataSourcesRequest;
+import com.amazonaws.services.kendra.model.ListDataSourcesResult;
+import com.amazonaws.services.kendra.model.ListIndicesRequest;
+import com.amazonaws.services.kendra.model.ListIndicesResult;
+import com.amazonaws.services.kendra.model.QueryRequest;
+import com.amazonaws.services.kendra.model.QueryResultItem;
+import com.amazonaws.services.kendra.model.QueryResultType;
+import com.amazonaws.services.kendra.model.ScoreConfidence;
+import com.amazonaws.services.kendra.model.StartDataSourceSyncJobRequest;
+import com.amazonaws.services.kendra.model.StartDataSourceSyncJobResult;
+import com.amazonaws.services.kendra.model.StopDataSourceSyncJobRequest;
+import com.amazonaws.services.kendra.model.ThrottlingException;
+import com.amazonaws.util.IOUtils;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.reflect.TypeToken;
+
+/**
+ * Search provider that implements {link SearchProvider} using AWS Kendra for
+ * indexing. Note that we are using a Custom DataSource which limits the
+ * attributes that can be uploaded / searched for each page (as per
+ * https://docs.aws.amazon.com/kendra/latest/dg/custom-attributes.html). This
+ * could be overcome by using an S3 bucket where any custom attributes can be
+ * added.
+ *
+ * @since 2.11.0
+ */
+public class KendraSearchProvider implements SearchProvider {
+
+ private static final Logger LOG = Logger.getLogger(KendraSearchProvider.class);
+ private Engine engine;
+ private Properties properties;
+ private Map<String, Object> contentTypes;
+ private AWSkendra kendra;
+ private String indexName;
+ private String indexId;
+ private String dataSourceName;
+ private String dataSourceId;
+
+ private List<Page> updates = Collections.synchronizedList(new ArrayList<>());
+
+ private static final String PROP_KENDRA_INDEX_NAME = "jspwiki.kendra.indexName";
+ private static final String PROP_KENDRA_DATA_SOURCE_NAME = "jspwiki.kendra.dataSourceName";
+ private static final String PROP_KENDRA_INDEXDELAY = "jspwiki.kendra.indexdelay";
+ private static final String PROP_KENDRA_INITIALDELAY = "jspwiki.kendra.initialdelay";
+
+ public KendraSearchProvider() {
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize(Engine engine, Properties properties) throws NoRequiredPropertyException, IOException {
+ this.engine = engine;
+ this.properties = properties;
+ this.contentTypes = getContentTypes();
+
+ setKendra(buildClient());
+
+ this.indexName = TextUtil.getRequiredProperty(this.properties, PROP_KENDRA_INDEX_NAME);
+ this.dataSourceName = TextUtil.getRequiredProperty(this.properties, PROP_KENDRA_DATA_SOURCE_NAME);
+ int initialDelay = TextUtil.getIntegerProperty(this.properties, PROP_KENDRA_INITIALDELAY,
+ KendraUpdater.INITIAL_DELAY);
+ int indexDelay = TextUtil.getIntegerProperty(this.properties, PROP_KENDRA_INDEXDELAY, KendraUpdater.INDEX_DELAY);
+
+ // Start the Kendra update thread, which waits first for a little while
+ // before starting to go through the "pages that need updating".
+ KendraUpdater updater = new KendraUpdater(engine, this, initialDelay, indexDelay);
+ updater.start();
+ }
+
+ private Map<String, Object> getContentTypes() {
+ Gson gson = new GsonBuilder().create();
+ try (InputStream in = KendraSearchProvider.class.getResourceAsStream("content_types.json")) {
+ if (in != null) {
+ Type collectionType = new TypeToken<HashMap<String, Object>>(){}.getType();
+ return gson.fromJson(new InputStreamReader(in), collectionType);
+ }
+ } catch (IOException e) {
+ LOG.error(format("Unable to load default propertyfile 'content_types.json': %s", e.getMessage()), e);
+ }
+ return null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String getProviderInfo() {
+ return "KendraSearchProvider";
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void pageRemoved(Page page) {
+ String pageName = page.getName();
+ BatchDeleteDocumentRequest request = new BatchDeleteDocumentRequest().withIndexId(indexId)
+ .withDocumentIdList(pageName);
+ BatchDeleteDocumentResult result = getKendra().batchDeleteDocument(request);
+ if (result.getFailedDocuments().size() == 0) {
+ LOG.debug(format("Page '%s' was removed from index", pageName));
+ } else {
+ LOG.error(format("Failed to remove Page '%s' from index", pageName));
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void reindexPage(Page page) {
+ if (page != null) {
+ updates.add(page);
+ LOG.debug(format("Scheduling page '%s' for indexing ...", page.getName()));
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Collection<SearchResult> findPages(String query, Context wikiContext) throws ProviderException, IOException {
+ QueryRequest request = new QueryRequest().withIndexId(indexId).withQueryText(query);
+ List<QueryResultItem> items = null;
+ try {
+ items = getKendra().query(request).getResultItems();
+ } catch (ThrottlingException e) {
+ LOG.error(format("ThrottlingException. Skipping..."));
+ return new ArrayList<>();
+ }
+ List<SearchResult> searchResults = new ArrayList<>(items.size());
+ AuthorizationManager mgr = engine.getManager(AuthorizationManager.class);
+
+ for (QueryResultItem item : items) {
+ switch (QueryResultType.fromValue(item.getType())) {
+ case DOCUMENT:
+ String documentId = item.getDocumentId();
+ String documentExcerpt = item.getDocumentExcerpt().getText();
+ String scoreConfidence = item.getScoreAttributes().getScoreConfidence();
+ Page page = this.engine.getManager(PageManager.class).getPage(documentId, PageProvider.LATEST_VERSION);
+ if (page != null) {
+ PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION);
+ if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) {
+ SearchResult searchResult = new SearchResultImpl(page, confidence2score(scoreConfidence),
+ new String[] { documentExcerpt });
+ searchResults.add(searchResult);
+ } else {
+ LOG.error(format("Page '%s' is not accessible", documentId));
+ }
+ } else {
+ LOG.error(
+ format("Kendra found a result page '%s' that could not be loaded, removing from index", documentId));
+ pageRemoved(Wiki.contents().page(this.engine, documentId));
+ }
+ break;
+ default:
+ LOG.error(format("Unknown query result type: %s", item.getType()));
+ }
+ }
+ return searchResults;
+ }
+
+ /**
+ * This method initialize the AWS Kendra Index and Datasources to be used.
+ *
+ * @throws InterruptedException
+ */
+ public void initializeIndexAndDataSource() throws InterruptedException {
+ this.indexId = getIndexId(indexName);
+ if (this.indexId == null) {
+ String message = format("Index '%s' does not exists", indexName);
+ LOG.error(message);
+ throw new IllegalArgumentException(message);
+ }
+ this.dataSourceId = getDatasourceId(this.indexId, dataSourceName);
+ if (this.dataSourceId == null) {
+ String message = format("Datasource '%s' does not exists in index %s", dataSourceName, indexName);
+ LOG.error(message);
+ throw new IllegalArgumentException(message);
+ }
+ }
+
+ /**
+ * Given an Kendra's Index name, returns the corresponding Index Id, or
+ * {@code null} if it does not exists
+ *
+ * @param indexName the name of the index to look up
+ * @return the index id or {@code null}
+ */
+ private String getIndexId(String indexName) {
+ ListIndicesRequest request = new ListIndicesRequest();
+ ListIndicesResult result = getKendra().listIndices(request);
+ String nextToken = "";
+ while (nextToken != null) {
+ for (IndexConfigurationSummary item : result.getIndexConfigurationSummaryItems()) {
+ if (StringUtils.equals(item.getName(), indexName)) {
+ return item.getId();
+ }
+ }
+ nextToken = result.getNextToken();
+ request = new ListIndicesRequest().withNextToken(result.getNextToken());
+ result = getKendra().listIndices(request);
+ }
+ return null;
+ }
+
+ /**
+ * Given an Kendra's Datasource name, returns the corresponding Datasource Id,
+ * or {@code null} if it does not exists
+ *
+ * @param dataSourceName the name of the datasource to look up
+ * @return the datasource id or {@code null}
+ */
+ private String getDatasourceId(String indexId, String dataSourceName) {
+ ListDataSourcesRequest request = new ListDataSourcesRequest().withIndexId(indexId);
+ ListDataSourcesResult result = getKendra().listDataSources(request);
+ String nextToken = "";
+ while (nextToken != null) {
+ for (DataSourceSummary item : result.getSummaryItems()) {
+ if (StringUtils.equals(item.getName(), dataSourceName)) {
+ return item.getId();
+ }
+ }
+ nextToken = result.getNextToken();
+ request = new ListDataSourcesRequest().withNextToken(result.getNextToken());
+ result = getKendra().listDataSources(request);
+ }
+ return null;
+ }
+
+ /*
+ * Converts a SCORE Confidence from Kendra to an "equivalent" integer score
+ */
+ private int confidence2score(String scoreConfidence) {
+ switch (ScoreConfidence.fromValue(scoreConfidence)) {
+ case VERY_HIGH:
+ return 100;
+ case HIGH:
+ return 75;
+ case MEDIUM:
+ return 50;
+ case LOW:
+ return 25;
+ default:
+ return 0;
+ }
+ }
+
+ /**
+ * This method re-index all the pages found in the Wiki. It is mainly used at
+ * startup.
+ *
+ * @throws IOException in case some page can not be read
+ */
+ private void doFullReindex() throws IOException {
+ try {
+ Collection<Page> pages = engine.getManager(PageManager.class).getAllPages();
+ if (pages.isEmpty()) {
+ return;
+ }
+ LOG.debug(format("Indexing all %d pages. Please wait ...", pages.size()));
+ String executionId = startExecution();
+ for (Page page : pages) {
+ // Since I do not want to handle the size limit
+ // (https://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/BatchPutDocument)
+ // uploading documents one at a time
+ indexOnePage(page, executionId);
+ }
+ } catch (ProviderException e) {
+ LOG.error(e.getMessage());
+ throw new IOException(e);
+ } finally {
+ stopExecution();
+ }
+ }
+
+ /**
+ * This method re-index all pages marked as updated. It is used to periodically
+ * index pages that have been modified
+ */
+ private void doPartialReindex() {
+ if (updates.isEmpty()) {
+ return;
+ }
+ LOG.debug(format("Indexing updated pages. Please wait ..."));
+ String executionId = startExecution();
+ synchronized (updates) {
+ try {
+ while (updates.size() > 0) {
+ indexOnePage(updates.remove(0), executionId);
+ }
+ } finally {
+ stopExecution();
+ }
+ }
+ }
+
+ /**
+ * Returns an ExecutiuonId that is required to keep track of the modifed
+ * documents
+ * @return The execution id
+ */
+ private String startExecution() {
+ StartDataSourceSyncJobRequest request = new StartDataSourceSyncJobRequest().withIndexId(indexId)
+ .withId(dataSourceId);
+ StartDataSourceSyncJobResult result = getKendra().startDataSourceSyncJob(request);
+ return result.getExecutionId();
+ }
+
+ /**
+ * Stop the execution for the given index Id and DataSource Id.
+ */
+ private void stopExecution() {
+ StopDataSourceSyncJobRequest request = new StopDataSourceSyncJobRequest().withIndexId(indexId).withId(dataSourceId);
+ getKendra().stopDataSourceSyncJob(request);
+ }
+
+ /**
+ * Index on single {@link Page} into the Kendra Index
+ * @param page the {@link Page} to index
+ * @param executionId The Execution Id
+ */
+ private void indexOnePage(Page page, String executionId) {
+ String pageName = page.getName();
+ try {
+ Document document = newDocument(page, executionId);
+ BatchPutDocumentRequest request = new BatchPutDocumentRequest().withIndexId(indexId)
+ .withDocuments(document);
+ BatchPutDocumentResult result = getKendra().batchPutDocument(request);
+ if (result.getFailedDocuments().size() == 0) {
+ LOG.info(format("Successfully indexed Page '%s' as %s", page.getName(), document.getContentType()));
+ } else {
+ for (BatchPutDocumentResponseFailedDocument failedDocument : result.getFailedDocuments()) {
+ LOG.error(format("Failed to index Page '%s': %s", failedDocument.getId(), failedDocument.getErrorMessage()));
+ }
+ }
+ } catch (IOException e) {
+ LOG.error(format("Failed to index Page '%s': %s", pageName, e.getMessage()));
+ }
+ }
+
+
+ /**
+ * Given a {@link Page}, returns the corresponding Kendra {@link Document}.
+ *
+ * @param page the {@link Page} to be indexed
+ * @param executionId an execution id to identify when the {@link Page} was
+ * indexed for the last time.
+ * @return a {@link Document} containing the searchable attributes.
+ * @throws IOException if the {@link Page}'s {@link Attachment} can not be read.
+ */
+ private Document newDocument(Page page, String executionId) throws IOException {
+ String pageName = page.getName();
+ List<DocumentAttribute> attrs = new ArrayList<>();
+ // These 2 are required as per
+ // https://docs.aws.amazon.com/kendra/latest/dg/data-source-custom.html#custom-required-attributes
+ attrs.add(newAttribute("_data_source_id", dataSourceId));
+ attrs.add(newAttribute("_data_source_sync_job_execution_id", executionId));
+
+ String title = TextUtil.beautifyString(pageName);
+ ByteBuffer blob = null;
+ ContentType contentType = ContentType.PLAIN_TEXT;
+ if (page instanceof Attachment) {
+ Attachment attachment = (Attachment) page;
+ InputStream is = null;
+ try {
+ String filename = attachment.getFileName();
+ contentType = getContentType(filename);
+ is = engine.getManager(AttachmentManager.class).getAttachmentStream(attachment);
+ blob = ByteBuffer.wrap(IOUtils.toByteArray(is));
+ } catch (ProviderException e) {
+ throw new IOException(e);
+ } finally {
+ IOUtils.closeQuietly(is, null);
+ }
+ // contentType should be set to its real value
+ } else {
+ String text = engine.getManager(PageManager.class).getPureText(page);
+ blob = ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8));
+ }
+ return new Document().withId(pageName).withTitle(title).withAttributes(attrs).withBlob(blob)
+ .withContentType(contentType);
+ }
+
+ private DocumentAttribute newAttribute(String key, String value) {
+ return new DocumentAttribute().withKey(key).withValue(new DocumentAttributeValue().withStringValue(value));
+ }
+
+ @SuppressWarnings("unchecked")
+ private ContentType getContentType(String filename) {
+ String extention = FilenameUtils.getExtension(filename);
+ Map<String, String> ct = (Map<String, String>) this.contentTypes.get("ContentTypes");
+ return ContentType.fromValue(ct.getOrDefault(extention, ContentType.PLAIN_TEXT.name()));
+ }
+
+ /**
+ * Updater thread that updates Kendra indexes.
+ */
+ private static final class KendraUpdater extends WikiBackgroundThread {
+ protected static final int INDEX_DELAY = 5;
+ protected static final int INITIAL_DELAY = 10;
+ private KendraSearchProvider provider;
+
+ private int initialDelay;
+
+ private WatchDog watchdog;
+
+ private KendraUpdater(Engine engine, KendraSearchProvider provider, int initialDelay, int indexDelay) {
+ super(engine, indexDelay);
+ this.provider = provider;
+ this.initialDelay = initialDelay;
+ setName("JSPWiki Kendra Indexer");
+ }
+
+ @Override
+ public void startupTask() throws Exception {
+ watchdog = WatchDog.getCurrentWatchDog(getEngine());
+ try {
+ Thread.sleep(initialDelay * 1000L);
+ } catch (InterruptedException e) {
+ throw new InternalWikiException("Interrupted while waiting to start.", e);
+ }
+ watchdog.enterState("Full reindex");
+ provider.initializeIndexAndDataSource();
+ provider.doFullReindex();
+ watchdog.exitState();
+ }
+
+ @Override
+ public void backgroundTask() {
+ watchdog.enterState("Reindexing ...", 60);
+ provider.doPartialReindex();
+ watchdog.exitState();
+ }
+ }
+
+ private static class SearchResultImpl implements SearchResult {
+
+ private Page page;
+ private int score;
+ private String[] contexts;
+
+ public SearchResultImpl(Page page, int score, String[] contexts) {
+ this.page = page;
+ this.score = score;
+ this.contexts = contexts != null ? contexts.clone() : null;
+ }
+
+ @Override
+ public Page getPage() {
+ return this.page;
+ }
+
+ @Override
+ public int getScore() {
+ return this.score;
+ }
+
+ @Override
+ public String[] getContexts() {
+ return this.contexts;
+ }
+ }
+
+ public AWSkendra getKendra() {
+ return kendra;
+ }
+
+ public void setKendra(AWSkendra kendra) {
+ this.kendra = kendra;
+ }
+
+ protected AWSkendra buildClient() {
+ return AWSkendraClientBuilder.defaultClient();
+ }
+
+ public String getIndexName() {
+ return indexName;
+ }
+
+ public String getDataSourceName() {
+ return dataSourceName;
+ }
+
+}
\ No newline at end of file
diff --git a/jspwiki-kendra-searchprovider/src/main/resources/org/apache/wiki/search/kendra/content_types.json b/jspwiki-kendra-searchprovider/src/main/resources/org/apache/wiki/search/kendra/content_types.json
new file mode 100644
index 0000000..87b9d07
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/src/main/resources/org/apache/wiki/search/kendra/content_types.json
@@ -0,0 +1,18 @@
+{
+ "ContentTypes": {
+ "pdf": "PDF",
+ "html": "HTML",
+ "htm": "HTML",
+ "xhtml": "HTML",
+ "doc": "MS_WORD",
+ "docx": "MS_WORD",
+ "txt": "PLAIN_TEXT",
+ "xml": "PLAIN_TEXT",
+ "properties": "PLAIN_TEXT",
+ "java": "PLAIN_TEXT",
+ "js": "PLAIN_TEXT",
+ "py": "PLAIN_TEXT",
+ "md": "PLAIN_TEXT",
+ "ppt": "PPT"
+ }
+}
\ No newline at end of file
diff --git a/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java b/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java
new file mode 100644
index 0000000..bc0e6fc
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.wiki.search.kendra;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.lenient;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+
+import org.apache.log4j.Logger;
+import org.apache.wiki.TestEngine;
+import org.apache.wiki.api.core.Context;
+import org.apache.wiki.api.core.ContextEnum;
+import org.apache.wiki.api.core.Engine;
+import org.apache.wiki.api.search.SearchResult;
+import org.apache.wiki.api.spi.Wiki;
+import org.apache.wiki.search.SearchManager;
+import org.awaitility.Awaitility;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.stubbing.Answer;
+
+import com.amazonaws.services.kendra.AWSkendra;
+import com.amazonaws.services.kendra.model.BatchPutDocumentRequest;
+import com.amazonaws.services.kendra.model.BatchPutDocumentResult;
+import com.amazonaws.services.kendra.model.DataSourceSummary;
+import com.amazonaws.services.kendra.model.IndexConfigurationSummary;
+import com.amazonaws.services.kendra.model.ListDataSourcesRequest;
+import com.amazonaws.services.kendra.model.ListDataSourcesResult;
+import com.amazonaws.services.kendra.model.ListIndicesRequest;
+import com.amazonaws.services.kendra.model.ListIndicesResult;
+import com.amazonaws.services.kendra.model.QueryRequest;
+import com.amazonaws.services.kendra.model.QueryResult;
+import com.amazonaws.services.kendra.model.QueryResultItem;
+import com.amazonaws.services.kendra.model.QueryResultType;
+import com.amazonaws.services.kendra.model.ScoreAttributes;
+import com.amazonaws.services.kendra.model.ScoreConfidence;
+import com.amazonaws.services.kendra.model.StartDataSourceSyncJobRequest;
+import com.amazonaws.services.kendra.model.StartDataSourceSyncJobResult;
+import com.amazonaws.services.kendra.model.TextWithHighlights;
+
+import net.sf.ehcache.CacheManager;
+import net.sourceforge.stripes.mock.MockHttpServletRequest;
+
+@ExtendWith(MockitoExtension.class)
+public class KendraSearchProviderTest {
+
+ private static final Logger LOG = Logger.getLogger(KendraSearchProviderTest.class);
+
+ TestEngine engine;
+ Properties props;
+ KendraSearchProvider ksp;
+
+ @Mock
+ AWSkendra kendraMock;
+
+ @BeforeEach
+ void setUp() throws Exception {
+ props = TestEngine.getTestProperties();
+ TestEngine.emptyWorkDir(props);
+ CacheManager.getInstance().removeAllCaches();
+ engine = new TestEngine(props);
+ try {
+ setupAWSKendra(engine);
+ } catch (Exception e) {
+ LOG.error(e.toString());
+ }
+ }
+
+ private void setupAWSKendra(Engine engine) throws InterruptedException {
+ ksp = (KendraSearchProvider) engine.getManager(SearchManager.class).getSearchEngine();
+ when(kendraMock.listIndices(any(ListIndicesRequest.class))).then(new Answer<ListIndicesResult>() {
+ @Override
+ public ListIndicesResult answer(InvocationOnMock invocation) throws Throwable {
+ return new ListIndicesResult().withIndexConfigurationSummaryItems(
+ new IndexConfigurationSummary().withId("IndexId").withName(ksp.getIndexName()));
+ }
+ });
+ lenient().when(kendraMock.listDataSources(any(ListDataSourcesRequest.class))).then(new Answer<ListDataSourcesResult>() {
+ @Override
+ public ListDataSourcesResult answer(InvocationOnMock invocation) throws Throwable {
+ return new ListDataSourcesResult()
+ .withSummaryItems(new DataSourceSummary().withId("DataSourceId").withName(ksp.getDataSourceName()));
+ }
+ });
+ lenient().when(kendraMock.startDataSourceSyncJob(any(StartDataSourceSyncJobRequest.class))).then(new Answer<StartDataSourceSyncJobResult>() {
+ @Override
+ public StartDataSourceSyncJobResult answer(InvocationOnMock invocation) throws Throwable {
+ return new StartDataSourceSyncJobResult().withExecutionId("executionId");
+ }
+ });
+ lenient().when(kendraMock.batchPutDocument(any(BatchPutDocumentRequest.class))).then(new Answer<BatchPutDocumentResult>() {
+ @Override
+ public BatchPutDocumentResult answer(InvocationOnMock invocation) throws Throwable {
+ return new BatchPutDocumentResult().withFailedDocuments(new ArrayList<>());
+ }
+ });
+ lenient().when(kendraMock.query(any(QueryRequest.class))).then(new Answer<QueryResult>() {
+ @Override
+ public QueryResult answer(InvocationOnMock invocation) throws Throwable {
+ return new QueryResult().withResultItems(new ArrayList<>());
+ }
+ });
+
+ ksp.setKendra(kendraMock);
+ ksp.initializeIndexAndDataSource();
+ }
+
+ void debugSearchResults(final Collection<SearchResult> res) {
+ res.forEach(next -> {
+ System.out.println("page: " + next.getPage());
+ for (final String s : next.getContexts()) {
+ System.out.println("snippet: " + s);
+ }
+ });
+ }
+
+ Callable<Boolean> findsResultsFor(final Collection<SearchResult> res, final String text) {
+ return () -> {
+ final MockHttpServletRequest request = engine.newHttpRequest();
+ final Context ctx = Wiki.context().create(engine, request, ContextEnum.PAGE_EDIT.getRequestContext());
+ final Collection<SearchResult> searchResults = ksp.findPages(text, ctx);
+ if (searchResults != null && searchResults.size() > 0) {
+ debugSearchResults(searchResults);
+ res.addAll(searchResults);
+ return true;
+ }
+ return false;
+ };
+ }
+
+ @Test
+ public void testSimpleSearch() throws Exception {
+ final String txt = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.";
+ engine.saveText("TestPage", txt);
+ addTestresult("TestPage", "mankind", ScoreConfidence.VERY_HIGH);
+ final Collection<SearchResult> res = new ArrayList<>();
+ Awaitility.await("testSimpleSearch").until(findsResultsFor(res, "mankind"));
+ Assertions.assertEquals(1, res.size(), "no pages");
+ Assertions.assertEquals("TestPage", res.iterator().next().getPage().getName(), "page");
+ engine.deleteTestPage("TestPage");
+ }
+
+ private void addTestresult(String pageName, String pageContent, ScoreConfidence scoreConfidence ) {
+ when(kendraMock.query(any(QueryRequest.class))).then(new Answer<QueryResult>() {
+ @Override
+ public QueryResult answer(InvocationOnMock invocation) throws Throwable {
+ QueryResultItem item = new QueryResultItem().withId(pageName).withType(QueryResultType.DOCUMENT);
+ item.withDocumentTitle(new TextWithHighlights().withText(pageName));
+ item.withDocumentExcerpt(new TextWithHighlights().withText(pageContent));
+ item.withScoreAttributes(new ScoreAttributes().withScoreConfidence(scoreConfidence));
+ return new QueryResult().withResultItems(item);
+ }
+ });
+ }
+}
\ No newline at end of file
diff --git a/jspwiki-kendra-searchprovider/src/test/resources/aaa-diagram.pdf b/jspwiki-kendra-searchprovider/src/test/resources/aaa-diagram.pdf
new file mode 100644
index 0000000..9bb37c7
Binary files /dev/null and b/jspwiki-kendra-searchprovider/src/test/resources/aaa-diagram.pdf differ
diff --git a/jspwiki-kendra-searchprovider/src/test/resources/favicon.png b/jspwiki-kendra-searchprovider/src/test/resources/favicon.png
new file mode 100644
index 0000000..bb6f654
Binary files /dev/null and b/jspwiki-kendra-searchprovider/src/test/resources/favicon.png differ
diff --git a/jspwiki-kendra-searchprovider/src/test/resources/jspwiki-custom.properties b/jspwiki-kendra-searchprovider/src/test/resources/jspwiki-custom.properties
new file mode 100644
index 0000000..d4838fe
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/src/test/resources/jspwiki-custom.properties
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Custom configuration file used by most JUnit tests overriding
+# certain default values in src/main/resources/ini/jspwiki.properties
+#
+jspwiki.fileSystemProvider.pageDir = target/test-classes/testrepository
+jspwiki.workDir = target/test-classes/testworkdir
+jspwiki.searchProvider = org.apache.wiki.search.kendra.KendraSearchProvider
+jspwiki.kendra.indexName = JSPWikiPageIndex
+jspwiki.kendra.indexRoleArn = arn:aws:iam::123456789012:role/IndexRoleArn
+jspwiki.kendra.dataSourceName = JSPWikiPageDataSource
+jspwiki.kendra.dataSourceRoleArn = arn:aws:iam::123456789012:role/DataSourceRoleArn
+jspwiki.kendra.initialdelay = 1
+jspwiki.kendra.indexdelay = 1
+
+jspwiki.translatorReader.camelCaseLinks = true
+jspwiki.breakTitleWithSpaces = true
+jspwiki.translatorReader.useOutlinkImage = false
+jspwiki.basicAttachmentProvider.storageDir = target/test-classes/testrepository
+jspwiki.encoding = ISO-8859-1
+jspwiki.filterConfig = /filters.xml
+jspwiki.referenceStyle = relative
+jspwiki.authorizer=org.apache.wiki.auth.TestAuthorizer
+
+# log file under ./target
+log4j.appender.FileLog.File=./target/logs/jspwiki.log
+
+# RSS under ./target
+jspwiki.rss.fileName=./target/rss.rdf
+
+#
+# Security: use standard providers for user/group auth, user management
+# and ACLs. Use a test userdatabase for storing users.
+#
+jspwiki.xmlGroupDatabaseFile = target/test-classes/groupdatabase.xml
+jspwiki.xmlUserDatabaseFile = target/test-classes/userdatabase.xml
+
+log4j.logger.org.apache.wiki.search=DEBUG,ConsoleAppender
+
+log4j.logger.SecurityLog=INFO, SecurityAppender
+log4j.appender.SecurityAppender = org.apache.log4j.RollingFileAppender
+log4j.appender.SecurityAppender.File = ./target/logs/security.log
+log4j.appender.SecurityAppender.layout = org.apache.log4j.PatternLayout
+log4j.appender.SecurityAppender.layout.ConversionPattern=%d %p - %m%n
+
+# Used by CommandResolverTest
+jspwiki.specialPage.RecentChanges = RecentChanges.jsp
+jspwiki.specialPage.Search = Search.jsp
+
+# Used by JSPWikiMarkupParserTest
+jspwiki.translatorReader.inlinePattern.1 = *.jpg
+jspwiki.translatorReader.inlinePattern.2 = *.png
+jspwiki.translatorReader.inlinePattern.3 = http://images.com/*
+
+# Used by WorkflowManagerTest
+jspwiki.approver.workflow.saveWikiPage=
+jspwiki.approver.workflow.foo=janne
+jspwiki.approver.workflow.bar=Admin
+
+# Fields needed in order to run MailUtilTest
+#mail.smtp.host = 127.0.0.1
+#mail.smtp.port = 25
+#mail.from = JSPWiki <JS...@localhost>
+#mail.smtp.account =
+#mail.smtp.password =
+
+# for JDBC tests
+server.port=9321
+server.database.0=file:target/jspwiki.hsqldb
+server.dbname.0=jspwiki
+
+jdbc.admin.id=SA
+jdbc.admin.password=
+jdbc.driver.class=org.hsqldb.jdbc.JDBCDriver
+jdbc.driver.id=hsql
+jdbc.driver.url=jdbc\:hsqldb\:hsql\://localhost:9321/jspwiki
+jdbc.user.id=jspwiki
+jdbc.user.password=password
diff --git a/jspwiki-kendra-searchprovider/src/test/resources/log4j.properties b/jspwiki-kendra-searchprovider/src/test/resources/log4j.properties
new file mode 100644
index 0000000..5e79818
--- /dev/null
+++ b/jspwiki-kendra-searchprovider/src/test/resources/log4j.properties
@@ -0,0 +1,9 @@
+# Root logger option
+log4j.rootLogger=INFO, stdout
+
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
+
\ No newline at end of file
diff --git a/jspwiki-war/pom.xml b/jspwiki-war/pom.xml
index 3a4d4aa..a823e6b 100644
--- a/jspwiki-war/pom.xml
+++ b/jspwiki-war/pom.xml
@@ -54,6 +54,12 @@
<artifactId>jspwiki-210-adapters</artifactId>
<version>${project.version}</version>
</dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>jspwiki-kendra-searchprovider</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>javax.servlet</groupId>
diff --git a/pom.xml b/pom.xml
index 27277e5..cbe1326 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,7 @@
<module>jspwiki-main</module>
<module>jspwiki-markdown</module>
<module>jspwiki-tika-searchprovider</module>
+ <module>jspwiki-kendra-searchprovider</module>
<module>jspwiki-210-test-adaptees</module>
<module>jspwiki-210-adapters</module>
<module>jspwiki-war</module>