You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by rh...@apache.org on 2015/03/12 18:41:49 UTC
svn commit: r1666263 [1/2] - in
/manifoldcf/branches/CONNECTORS-1168/connectors: ./ searchblox/
searchblox/connector/ searchblox/connector/src/
searchblox/connector/src/main/ searchblox/connector/src/main/java/
searchblox/connector/src/main/java/org/ s...
Author: rharo
Date: Thu Mar 12 17:41:48 2015
New Revision: 1666263
URL: http://svn.apache.org/r1666263
Log:
CONNECTORS_1168 Initial Commit of SearchBlox Output Connector
Added:
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/build.xml
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/Messages.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxClient.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConfig.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConnector.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/common_en_US.properties
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration.js
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration_Parameters.html
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification.js
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification_Configuration.html
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/viewConfiguration.html
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/viewSpecification.html
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/agents/output/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/agents/output/searchblox/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/agents/output/searchblox/tests/
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/test/java/org/apache/manifoldcf/agents/output/searchblox/tests/SearchBloxDocumentTest.java
manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/pom.xml
Modified:
manifoldcf/branches/CONNECTORS-1168/connectors/pom.xml
Modified: manifoldcf/branches/CONNECTORS-1168/connectors/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/pom.xml?rev=1666263&r1=1666262&r2=1666263&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/pom.xml Thu Mar 12 17:41:48 2015
@@ -64,6 +64,7 @@
<module>forcedmetadata</module>
<module>tika</module>
<module>documentfilter</module>
+ <module>searchblox</module>
</modules>
</project>
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/build.xml?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/build.xml (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/build.xml Thu Mar 12 17:41:48 2015
@@ -0,0 +1,95 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="searchblox" default="all">
+ <property environment="env"/>
+ <condition property="mcf-dist" value="${env.MCFDISTPATH}">
+ <isset property="env.MCFDISTPATH"/>
+ </condition>
+ <property name="abs-dist" location="../../dist"/>
+ <condition property="mcf-dist" value="${abs-dist}">
+ <not>
+ <isset property="env.MCFDISTPATH"/>
+ </not>
+ </condition>
+
+ <property name="resteasy.client.version" value="3.0.8.Final"/>
+ <property name="jsoup.version" value="1.8.1"/>
+
+ <import file="${mcf-dist}/connector-build.xml"/>
+
+ <path id="connector-classpath">
+ <path refid="mcf-connector-build.connector-classpath"/>
+ <fileset dir="../../lib-proprietary">
+ <include name="jaxrs-*.jar" />
+ <include name="jsoup*.jar" />
+ <include name="resteasy-*.jar" />
+ </fileset>
+ <fileset dir="../../lib">
+ <include name="*.jar" />
+ </fileset>
+ </path>
+
+ <path id="connector-test-classpath">
+ <path refid="mcf-connector-build.connector-test-classpath"/>
+ <fileset dir="../../lib">
+ </fileset>
+ </path>
+
+ <target name="lib">
+ <mkdir dir="dist/lib"/>
+ <copy todir="dist/lib">
+ <fileset dir="../../lib-proprietary">
+ <include name="jaxrs-*.jar" />
+ <include name="jsoup*.jar" />
+ <include name="resteasy-*.jar" />
+ </fileset>
+ </copy>
+ </target>
+
+ <target name="download-cleanup">
+ <delete>
+ <fileset dir="test-materials-proprietary" excludes="README*.txt"/>
+ </delete>
+ </target>
+
+ <target name="download-resteasy-jaxrs">
+ <get src="http://central.maven.org/maven2/org/jboss/resteasy/resteasy-jaxrs/${resteasy.client.version}/resteasy-jaxrs-${resteasy.client.version}.jar" dest="../../lib-proprietary"/>
+ </target>
+
+ <target name="download-resteasy-client">
+ <get src="http://central.maven.org/maven2/org/jboss/resteasy/resteasy-client/${resteasy.client.version}/resteasy-client-${resteasy.client.version}.jar" dest="../../lib-proprietary"/>
+ </target>
+
+ <target name="download-jaxrs-api">
+ <get src="http://central.maven.org/maven2/org/jboss/resteasy/jaxrs-api/${resteasy.client.version}/jaxrs-api-${resteasy.client.version}.jar" dest="../../lib-proprietary"/>
+ </target>
+
+ <target name="download-jsoup">
+ <get src="http://central.maven.org/maven2/org/jsoup/jsoup/${jsoup.version}/jsoup-${jsoup.version}.jar" dest="../../lib-proprietary/"/>
+ </target>
+
+ <target name="download-dependencies" depends="download-jaxrs-api, download-resteasy-client, download-resteasy-jaxrs, download-jsoup"/>
+
+ <target name="deliver-connector" depends="mcf-connector-build.deliver-connector">
+ <antcall target="general-add-output-connector">
+ <param name="connector-label" value="Searchblox"/>
+ <param name="connector-class" value="org.apache.manifoldcf.agents.output.searchblox.SearchBloxConnector"/>
+ </antcall>
+ </target>
+
+</project>
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/Messages.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/Messages.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/Messages.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/Messages.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.searchblox;
+
+import java.util.Locale;
+import java.util.Map;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+
+/**
+ *
+ * @author Rafa Haro <rh...@apache.org>
+ *
+ */
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+ public static final String DEFAULT_BUNDLE_NAME = "org.apache.manifoldcf.agents.output.searchblox.common";
+ public static final String DEFAULT_PATH_NAME = "org.apache.manifoldcf.crawler.connectors.searchblox";
+
+ /** Constructor - do no instantiate
+ */
+ protected Messages()
+ {
+ }
+
+ public static String getString(Locale locale, String messageKey)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getString(Locale locale, String messageKey, Object[] args)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ // More general methods which allow bundlenames and class loaders to be specified.
+
+ public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ // Resource output
+
+ public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,Object> contextObjects)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ contextObjects);
+ }
+
+}
+
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxClient.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxClient.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxClient.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxClient.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.searchblox;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.UUID;
+
+import javax.ws.rs.client.Client;
+import javax.ws.rs.client.ClientBuilder;
+import javax.ws.rs.client.Entity;
+import javax.ws.rs.client.Invocation.Builder;
+import javax.ws.rs.client.WebTarget;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.UriBuilder;
+
+import org.apache.manifoldcf.crawler.system.Logging;
+import org.apache.xerces.parsers.DOMParser;
+import org.jboss.resteasy.plugins.providers.StringTextStar;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+/**
+ * SearchBox REST Client
+ *
+ * @author Rafa Haro <rh...@apache.org>
+ */
+public class SearchBloxClient {
+
+ // TODO All this might need to be included in a configuration file
+ public static final String DEFAULT_ENDPOINT = "http://localhost:8080/searchblox/api/rest";
+
+ private static final String ADD_PATH = "add";
+
+ private static final String DELETE_PATH = "delete";
+
+ private static final String STATUS_PATH = "status";
+
+ private static final String CREATE_PATH = "coladd";
+
+ private static final String CLEAR_PATH = "clear";
+
+ private static final String STATUS_NODE = "statuscode";
+
+ public static enum ResponseCode {
+ DOCUMENT_INDEXED(100),
+ DOCUMENT_REJECTED(101),
+ DOCUMENT_DELETED(200),
+ DOCUMENT_NOT_EXIST(201),
+ DOCUMENT_NOT_FOUND(301),
+ COLLECTION_CLEARED(400),
+ ERROR_CLEARING_COLLECTION(401),
+ COLLECTION_CREATED(900),
+ INVALID_COLLECTION_NAME(500),
+ INVALID_REQUEST(501),
+ INVALID_DOCUMENT_LOCATION(502),
+ NOT_CUSTOM_COLLECTION(503),
+ LIMIT_EXCEEDED(504),
+ INVALID_LICENSE_ID(601),
+ SERVER_UNREACHABLE(700);
+
+ private int code;
+
+ ResponseCode(int code) {
+ this.code = code;
+ }
+
+ static ResponseCode getCodeFromValue(int value){
+ for(ResponseCode e:ResponseCode.values())
+ if(value == e.code)
+ return e;
+ return null;
+ }
+
+ int getCode(){
+ return code;
+ }
+ }
+
+
+ private String apikey;
+ private Client client;
+ private UriBuilder uriBuilder;
+
+ public SearchBloxClient(String apikey, ClientBuilder builder, String endpoint) {
+ this.apikey = apikey;
+ builder.register(StringTextStar.class);
+ this.client = builder.build();
+ if (endpoint != null && !endpoint.isEmpty()) {
+ uriBuilder = UriBuilder.fromUri(endpoint);
+ } else {
+ uriBuilder = UriBuilder.fromUri(DEFAULT_ENDPOINT);
+ }
+ }
+
+
+ public ResponseCode addUpdateDocument(SearchBloxDocument document, String format)
+ throws SearchBloxException {
+ return post(document, format, SearchBloxDocument.DocumentAction.ADD_UPDATE);
+ }
+
+ public ResponseCode deleteDocument(SearchBloxDocument document, String format)
+ throws SearchBloxException {
+ return post(document, format, SearchBloxDocument.DocumentAction.DELETE);
+ }
+
+ public ResponseCode createCollection(String colname)
+ throws SearchBloxException {
+ SearchBloxDocument document = new SearchBloxDocument(apikey);
+ document.colName = colname;
+ return post(document, SearchBloxDocument.IndexingFormat.XML.name(), SearchBloxDocument.DocumentAction.CREATE);
+ }
+
+ public ResponseCode clearCollection(String colname)
+ throws SearchBloxException {
+ SearchBloxDocument document = new SearchBloxDocument(apikey);
+ document.colName = colname;
+ return post(document, SearchBloxDocument.IndexingFormat.XML.name(), SearchBloxDocument.DocumentAction.CLEAR);
+ }
+
+ public boolean ping()
+ throws SearchBloxException {
+ SearchBloxDocument document = new SearchBloxDocument(apikey);
+ document.colName = UUID.randomUUID().toString();
+ document.uid = UUID.randomUUID().toString();
+ ResponseCode result = post(document, SearchBloxDocument.IndexingFormat.XML.name(), SearchBloxDocument.DocumentAction.STATUS);
+ return result == ResponseCode.INVALID_COLLECTION_NAME;
+ }
+
+ private ResponseCode post(SearchBloxDocument document, String format, SearchBloxDocument.DocumentAction action)
+ throws SearchBloxException {
+
+ SearchBloxDocument.IndexingFormat iFormat = SearchBloxDocument.IndexingFormat.valueOf(format.toUpperCase());
+ if (iFormat == null) {
+ Logging.connectors.error("[Post request] Format not recognized " +format);
+ throw new SearchBloxException("Unknown Serialization Format " + format);
+ }
+
+ UriBuilder uri = uriBuilder.clone();
+ if (action == SearchBloxDocument.DocumentAction.ADD_UPDATE) {
+ uri = uri.path(ADD_PATH);
+ } else if (action == SearchBloxDocument.DocumentAction.DELETE) {
+ uri = uri.path(DELETE_PATH);
+ } else if (action == SearchBloxDocument.DocumentAction.STATUS) {
+ uri = uri.path(STATUS_PATH);
+ } else if (action == SearchBloxDocument.DocumentAction.CREATE) {
+ uri = uri.path(CREATE_PATH);
+ } else if (action == SearchBloxDocument.DocumentAction.CLEAR) {
+ uri = uri.path(CLEAR_PATH);
+ }
+
+ WebTarget target = client.target(uri.build());
+ Builder httpRequest = target.request();
+ httpRequest.accept(MediaType.TEXT_XML_TYPE);
+
+ document.apiKey = this.apikey;
+
+ String body = document.toString(iFormat, action);
+ Logging.connectors.debug("XML Document for document: " + document.uid +":" + body);
+ MediaType type = MediaType.TEXT_PLAIN_TYPE;
+// if (iFormat == SearchBloxDocument.IndexingFormat.JSON) {
+// type = MediaType.APPLICATION_JSON_TYPE;
+// }
+
+
+ Entity<String> entity = Entity.entity(body, type);
+ Response response = null;
+ try {
+ response = httpRequest.post(entity);
+ }
+ catch(Exception e) {
+ // return e.getCause() instanceof ConnectException ? ResponseCode.SERVER_UNREACHABLE : ResponseCode.INVALID_COLLECTION_NAME;
+ Logging.connectors.error("[No Connection] Error trying to connect ",e);
+ return ResponseCode.SERVER_UNREACHABLE;
+ }
+
+ String xmlResponse = response.readEntity(String.class);
+ DOMParser parser = new DOMParser();
+ try {
+ parser.parse(new InputSource(new StringReader(xmlResponse)));
+ } catch (SAXException | IOException e) {
+ Logging.connectors.error("[Response parsing] Dom parsing error", e);
+ throw new SearchBloxException(e);
+ }
+ Document doc = parser.getDocument();
+ NodeList nodeList = doc.getElementsByTagName(STATUS_NODE);
+ if (nodeList == null || nodeList.getLength() == 0) {
+ String message = "[Response Parsing] Status code not found";
+ Logging.connectors.error(message);
+ throw new SearchBloxException(message);
+ }
+ String codeStr = nodeList.item(0).getTextContent();
+ int statusCode = Integer.parseInt(codeStr);
+ return ResponseCode.getCodeFromValue(statusCode);
+ }
+}
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConfig.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConfig.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConfig.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,21 @@
+package org.apache.manifoldcf.agents.output.searchblox;
+
+/**
+ * <p>SearchBloxConfig class</p>
+ * @author Antonio David Perez Morales <ad...@apache.org>
+ *
+ */
+class SearchBloxConfig {
+
+ static final String NODE_CONFIGURATION = "configuration";
+ static final String ATTRIBUTE_TITLEBOOST = "title_boost";
+ static final String ATTRIBUTE_CONTENTBOOST = "content_boost";
+ static final String ATTRIBUTE_KEYWORDSBOOST = "keywords_boost";
+ static final String ATTRIBUTE_DESCRIPTIONBOOST = "description_boost";
+ static final String ATTRIBUTE_POOLSIZE = "poolsize";
+ static final String ATTRIBUTE_TIMEOUT_CONNECTION = "timeoutconnection";
+ static final String ATTRIBUTE_TIMEOUT_SOCKET = "timeoutsocket";
+ static final String ATTRIBUTE_INDEX_FORMAT = "indexformat";
+ static final String ATTRIBUTE_COLLECTION_NAME = "collection";
+
+}
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConnector.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConnector.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxConnector.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,879 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.searchblox;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.agents.output.BaseOutputConnector;
+import org.apache.manifoldcf.agents.output.searchblox.SearchBloxClient.ResponseCode;
+import org.apache.manifoldcf.agents.output.searchblox.SearchBloxDocument.IndexingFormat;
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.crawler.system.Logging;
+import org.jboss.resteasy.client.jaxrs.ResteasyClientBuilder;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * SearchBox Output Connector. SearchBox (http://www.searchblox.com/) is a Cloud
+ * Based Search Engine. This connector index ManifoldCF crawled content and
+ * metadata using the SearchBox REST API
+ * (http://www.searchblox.com/developers-2/api-2)
+ *
+ * @author Rafa Haro <rh...@apache.org>
+ * @author Antonio David Perez Morales <ad...@apache.org>
+ */
+public class SearchBloxConnector extends BaseOutputConnector {
+
+ private final static String SEARCHBLOX_TAB_PARAMETERS = "SearchBloxConnector.Parameters";
+
+ private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+ private static final String EDIT_SPECIFICATION_CONFIGURATION_HTML = "editSpecification_Configuration.html";
+ private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+ /** Forward to the javascript to check the configuration parameters */
+ private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration.js";
+
+ /** Forward to the HTML template to edit the configuration parameters */
+ private static final String EDIT_CONFIG_FORWARD_PARAMETERS = "editConfiguration_Parameters.html";
+
+ /** Forward to the HTML template to view the configuration parameters */
+ private static final String VIEW_CONFIG_FORWARD = "viewConfiguration.html";
+
+ /**
+ * Default collection name
+ */
+ private static final String DEFAULT_COLLECTION = "Custom";
+
+ /**
+ * Default apiKey
+ */
+ private static final String DEFAULT_APIKEY = "apiKey";
+
+ /**
+ * Ingestion activity
+ */
+ public final static String INGEST_ACTIVITY = "document ingest";
+
+ /**
+ * Document removal activity
+ */
+ public final static String REMOVE_ACTIVITY = "document deletion";
+
+ /**
+ * Collection Creation activity
+ */
+ public final static String CREATION_ACTIVITY = "collection created";
+
+
+ private static final String SEARCHBLOX_ENDPOINT = "endpoint";
+ private static final String SEARCHBLOX_INDEXING_FORMAT = "indexformat";
+
+ private static final int BUILDER_DEFAULT_POOL_SIZE = 5;
+ private static final long BUILDER_DEFAULT_SOCKET_TIMEOUT = 60;
+ private static final long BUILDER_DEFAULT_CONNECTION_TIMEOUT = 60;
+
+ private ResteasyClientBuilder restBuilder = null;
+ private SearchBloxClient client = null;
+ private String apiKey = null;
+ private String lastVersion = null;
+
+ public SearchBloxConnector() {
+
+ }
+
+ /**
+ * This method is called to assess whether to count this connector instance
+ * should actually be counted as being connected.
+ *
+ * @return true if the connector instance is actually connected.
+ */
+ @Override
+ public boolean isConnected() {
+ return client != null;
+ }
+
+ /**
+ * Close the connection. Call this before discarding the connection.
+ */
+ @Override
+ public void disconnect() throws ManifoldCFException {
+ if (client != null) {
+ apiKey = null;
+ restBuilder = null;
+ client = null;
+ }
+ super.disconnect();
+ }
+
+ /**
+ * Return the list of activities that this connector supports (i.e. writes
+ * into the log).
+ *
+ * @return the list.
+ */
+ @Override
+ public String[] getActivitiesList() {
+ return new String[] { INGEST_ACTIVITY, REMOVE_ACTIVITY,
+ CREATION_ACTIVITY };
+ }
+
+ protected synchronized void getSession(Map<String, List<String>> args, String version) {
+ if (client == null) {
+ String endpoint = params.getParameter(SEARCHBLOX_ENDPOINT);
+ this.apiKey = params.getParameter(SearchBloxDocument.API_KEY);
+ ResteasyClientBuilder builder = new ResteasyClientBuilder();
+ builder.connectionPoolSize(BUILDER_DEFAULT_POOL_SIZE);
+ builder.socketTimeout(BUILDER_DEFAULT_SOCKET_TIMEOUT,
+ TimeUnit.SECONDS);
+ builder.establishConnectionTimeout(
+ BUILDER_DEFAULT_CONNECTION_TIMEOUT, TimeUnit.SECONDS);
+ this.restBuilder = builder;
+ client = new SearchBloxClient(apiKey, this.restBuilder, endpoint);
+ }
+
+ if(args != null && !version.equals(lastVersion)){
+
+ lastVersion = version;
+
+ try{
+ int poolSize =
+ Integer.parseInt(args.get(
+ SearchBloxConfig.ATTRIBUTE_POOLSIZE).get(0));
+ this.restBuilder.connectionPoolSize(poolSize);
+ }catch(NumberFormatException e){
+ Logging.connectors.error("Incorrect Argument Value for Client Pool Size", e);
+ }
+
+ try{
+ int connectionTimeout =
+ Integer.parseInt(args.get(
+ SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION).get(0));
+ this.restBuilder.connectionPoolSize(connectionTimeout);
+ }catch(NumberFormatException e){
+ Logging.connectors.error("Incorrect Argument Value for Client Connection Timeout", e);
+ }
+
+ try{
+ int socketTimeout =
+ Integer.parseInt(args.get(
+ SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET).get(0));
+ this.restBuilder.connectionPoolSize(socketTimeout);
+ }catch(NumberFormatException e){
+ Logging.connectors.error("Incorrect Argument Value for Client Socket Timeout", e);
+ }
+ }
+ }
+
+ @Override
+ public String check() throws ManifoldCFException {
+ getSession(null, null);
+ try {
+ if (client.ping()) {
+ return super.check();
+ } else {
+ return "Connection Not Working!. Check SearchBlox Server is up and the configuration is correct.";
+ }
+ } catch (SearchBloxException e) {
+ Logging.connectors.error("Connection Not Working", e);
+ return "Connection Not Working!" + e.getMessage();
+ }
+ }
+
+ /**
+ * Get an output version string, given an output specification. The output
+ * version string is used to uniquely describe the pertinent details of the
+ * output specification and the configuration, to allow the Connector
+ * Framework to determine whether a document will need to be output again.
+ * Note that the contents of the document cannot be considered by this
+ * method, and that a different version string (defined in
+ * IRepositoryConnector) is used to describe the version of the actual
+ * document.
+ * <p/>
+ * This method presumes that the connector object has been configured, and
+ * it is thus able to communicate with the output data store should that be
+ * necessary.
+ *
+ * @param spec
+ * is the current output specification for the job that is doing
+ * the crawling.
+ * @return a string, of unlimited length, which uniquely describes output
+ * configuration and specification in such a way that if two such
+ * strings are equal, the document will not need to be sent again to
+ * the output data store.
+ */
+ @Override
+ public VersionContext getPipelineDescription(Specification spec)
+ throws ManifoldCFException, ServiceInterruption {
+ SpecPacker sp = new SpecPacker(spec);
+ return new VersionContext(sp.toPackedString(), params, spec);
+ }
+
+ /**
+ * Detect if a mime type is indexable or not. This method is used by
+ * participating repository connectors to pre-filter the number of unusable
+ * documents that will be passed to this output connector.
+ *
+ * @param outputDescription
+ * is the document's output version.
+ * @param mimeType
+ * is the mime type of the document.
+ * @return true if the mime type is indexable by this connector.
+ */
+ @Override
+ public boolean checkMimeTypeIndexable(VersionContext outputDescription,
+ String mimeType, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
+ // We work against the API, so we need to suppose the content reached by
+ // the connector is a String convertible stream
+ return true;
+ }
+
+ /**
+ * Pre-determine whether a document's length is indexable by this connector.
+ * This method is used by participating repository connectors to help filter
+ * out documents that are too long to be indexable.
+ *
+ * @param outputDescription
+ * is the document's output version.
+ * @param length
+ * is the length of the document.
+ * @return true if the file is indexable.
+ */
+ @Override
+ public boolean checkLengthIndexable(VersionContext outputDescription,
+ long length, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
+ // No Size Limit for SearchBlox
+ return true;
+ }
+
+ /**
+ * Add (or replace) a document in the output data store using the connector.
+ * This method presumes that the connector object has been configured, and
+ * it is thus able to communicate with the output data store should that be
+ * necessary.
+ *
+ * @param documentURI
+ * is the URI of the document. The URI is presumed to be the
+ * unique identifier which the output data store will use to
+ * process and serve the document. This URI is constructed by the
+ * repository connector which fetches the document, and is thus
+ * universal across all output connectors.
+ * @param pipelineDescription
+ * includes the description string that was constructed for this
+ * document by the getOutputDescription() method.
+ * @param document
+ * is the document data to be processed (handed to the output
+ * data store).
+ * @param authorityNameString
+ * is the name of the authority responsible for authorizing any
+ * access tokens passed in with the repository document. May be
+ * null.
+ * @param activities
+ * is the handle to an object that the implementer of a pipeline
+ * connector may use to perform operations, such as logging
+ * processing activity, or sending a modified document to the
+ * next stage in the pipeline.
+ * @return the document status (accepted or permanently rejected).
+ * @throws IOException
+ * only if there's a stream error reading the document data.
+ */
+ @Override
+ public int addOrReplaceDocumentWithException(String documentURI,
+ VersionContext pipelineDescription, RepositoryDocument document,
+ String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException {
+
+ Logging.connectors.info("Indexing Document " + documentURI);
+ long indexingTime = System.currentTimeMillis();
+ SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
+ Map<String, List<String>> args = sp.getArgs();
+ // Establish a session
+ getSession(args, pipelineDescription.getVersionString());
+
+ SearchBloxDocument sbDoc = new SearchBloxDocument(this.apiKey,
+ documentURI, document, sp.getArgs());
+ String format = args.get(SEARCHBLOX_INDEXING_FORMAT).get(0);
+ long startTime = System.currentTimeMillis();
+ try {
+ ResponseCode code = client.addUpdateDocument(sbDoc, format);
+ if (code == ResponseCode.DOCUMENT_INDEXED) {
+ Logging.connectors.info("Document Indexed" + documentURI);
+ activities.recordActivity(startTime, INGEST_ACTIVITY, null,
+ documentURI, "OK", "Document Indexed");}
+ else
+ activities.recordActivity(startTime, INGEST_ACTIVITY, null,
+ documentURI, "" + code.getCode(), code.name());
+ } catch (SearchBloxException e) {
+ Logging.connectors
+ .error("[Indexing - Add] Exception indexing document :"
+ + document, e);
+ String activityCode = e.getClass().getSimpleName()
+ .toUpperCase(Locale.ROOT);
+ String activityDetails = e.getMessage()
+ + ((e.getCause() != null) ? ": "
+ + e.getCause().getMessage() : "");
+ activities.recordActivity(startTime, INGEST_ACTIVITY, null,
+ documentURI, activityCode, activityDetails);
+ return DOCUMENTSTATUS_REJECTED;
+ }
+ indexingTime = System.currentTimeMillis() - indexingTime;
+ Logging.connectors.info("Indexing Time for document " + documentURI + ": " + indexingTime);
+ return DOCUMENTSTATUS_ACCEPTED;
+ }
+
+ /**
+ * Remove a document using the connector. Note that the last
+ * outputDescription is included, since it may be necessary for the
+ * connector to use such information to know how to properly remove the
+ * document.
+ *
+ * @param documentURI
+ * is the URI of the document. The URI is presumed to be the
+ * unique identifier which the output data store will use to
+ * process and serve the document. This URI is constructed by the
+ * repository connector which fetches the document, and is thus
+ * universal across all output connectors.
+ * @param outputDescription
+ * is the last description string that was constructed for this
+ * document by the getOutputDescription() method above.
+ * @param activities
+ * is the handle to an object that the implementer of an output
+ * connector may use to perform operations, such as logging
+ * processing activity.
+ */
+ @Override
+ public void removeDocument(String documentURI, String outputDescription,
+ IOutputRemoveActivity activities) throws ManifoldCFException,
+ ServiceInterruption {
+ Logging.ingest.debug("Deleting SearchBlox Document: '" + documentURI
+ + "'");
+
+ SpecPacker packer = new SpecPacker(outputDescription);
+ Map<String, List<String>> args = packer.getArgs();
+ // Establish a session
+ getSession(args, packer.toPackedString());
+
+ SearchBloxDocument document = new SearchBloxDocument(this.apiKey);
+ document.uid = documentURI;
+ // document.apiKey = args.get(API_KEY).get(0);
+ document.colName = args.get(SearchBloxDocument.SEARCHBLOX_COLLECTION).get(0);
+ String format = args.get(SEARCHBLOX_INDEXING_FORMAT).get(0);
+ long startTime = System.currentTimeMillis();
+ try {
+ ResponseCode code = client.deleteDocument(document, format);
+ if (code == ResponseCode.DOCUMENT_DELETED)
+ activities.recordActivity(startTime, REMOVE_ACTIVITY, null,
+ documentURI, "OK", "Document Deleted");
+ else
+ activities.recordActivity(startTime, REMOVE_ACTIVITY, null,
+ documentURI, "" + code.getCode(), code.name());
+ } catch (SearchBloxException e) {
+ Logging.connectors.error(
+ "[Indexing - Remove] Exception indexing document :"
+ + document, e);
+ String activityCode = e.getClass().getSimpleName()
+ .toUpperCase(Locale.ROOT);
+ String activityDetails = e.getMessage()
+ + ((e.getCause() != null) ? ": "
+ + e.getCause().getMessage() : "");
+ activities.recordActivity(startTime, REMOVE_ACTIVITY, null,
+ documentURI, activityCode, activityDetails);
+ }
+ }
+
+ /**
+ * Read the content of a resource, replace the variable ${PARAMNAME} with
+ * the value and copy it to the out.
+ *
+ * @param resName
+ * @param out
+ * @throws ManifoldCFException
+ */
+ private static void outputResource(String resName, IHTTPOutput out,
+ Locale locale, Map<String, String> params, String tabName,
+ Integer sequenceNumber, Integer currentSequenceNumber)
+ throws ManifoldCFException {
+ Map<String, String> paramMap = null;
+ if (params != null) {
+ paramMap = params;
+ if (tabName != null) {
+ paramMap.put("TabName", tabName);
+ }
+ if (currentSequenceNumber != null)
+ paramMap.put("SelectedNum", currentSequenceNumber.toString());
+ } else {
+ paramMap = new HashMap<String, String>();
+ }
+ if (sequenceNumber != null)
+ paramMap.put("SeqNum", sequenceNumber.toString());
+
+ Messages.outputResourceWithVelocity(out, locale, resName, paramMap,
+ true);
+ }
+
+ @Override
+ public void outputConfigurationHeader(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters,
+ List<String> tabsArray) throws ManifoldCFException, IOException {
+ super.outputConfigurationHeader(threadContext, out, locale, parameters,
+ tabsArray);
+ tabsArray.add(Messages.getString(locale, SEARCHBLOX_TAB_PARAMETERS));
+ outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null,
+ null, null);
+ }
+
+ @Override
+ public void outputConfigurationBody(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters,
+ String tabName) throws ManifoldCFException, IOException {
+ super.outputConfigurationBody(threadContext, out, locale, parameters,
+ tabName);
+ Map<String, String> config = this.getConfigParameters(parameters);
+ outputResource(EDIT_CONFIG_FORWARD_PARAMETERS, out, locale, config,
+ tabName, null, null);
+ }
+
+ /**
+ * Build a Map of SearchBlox parameters. If configParams is null,
+ * getConfiguration() is used.
+ *
+ * @param configParams
+ */
+ final private Map<String, String> getConfigParameters(
+ ConfigParams configParams) {
+ Map<String, String> map = new HashMap<String, String>();
+ if (configParams == null)
+ configParams = getConfiguration();
+
+ String apiKey = configParams.getParameter(SearchBloxDocument.API_KEY);
+ if(apiKey == null || apiKey.isEmpty())
+ apiKey = DEFAULT_APIKEY;
+ map.put(SearchBloxDocument.API_KEY, apiKey);
+
+ String endpoint = configParams.getParameter(SEARCHBLOX_ENDPOINT);
+ if(endpoint == null || endpoint.isEmpty()) {
+ endpoint = SearchBloxClient.DEFAULT_ENDPOINT;
+ }
+ map.put(SEARCHBLOX_ENDPOINT,
+ endpoint);
+ return map;
+ }
+
+ @Override
+ public void viewConfiguration(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters)
+ throws ManifoldCFException, IOException {
+ outputResource(VIEW_CONFIG_FORWARD, out, locale,
+ getConfigParameters(parameters), null, null, null);
+ }
+
+ /**
+ * Process a configuration post. This method is called at the start of the
+ * connector's configuration page, whenever there is a possibility that form
+ * data for a connection has been posted. Its purpose is to gather form
+ * information and modify the configuration parameters accordingly. The name
+ * of the posted form is "editconnection".
+ *
+ * @param threadContext
+ * is the local thread context.
+ * @param variableContext
+ * is the set of variables available from the post, including
+ * binary file post information.
+ * @param parameters
+ * are the configuration parameters, as they currently exist, for
+ * this connection being configured.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the connection (and cause a
+ * redirection to an error page).
+ */
+ @Override
+ public String processConfigurationPost(IThreadContext threadContext,
+ IPostParameters variableContext, Locale locale,
+ ConfigParams parameters) throws ManifoldCFException {
+ String apiKey = variableContext.getParameter(SearchBloxDocument.API_KEY);
+ if (apiKey != null)
+ parameters.setParameter(SearchBloxDocument.API_KEY, apiKey);
+
+ String endpoint = variableContext.getParameter(SEARCHBLOX_ENDPOINT);
+ if (endpoint != null)
+ parameters.setParameter(SEARCHBLOX_ENDPOINT, endpoint);
+
+ return null;
+ }
+
+ /**
+ * Output the specification header section. This method is called in the
+ * head section of a job page which has selected a pipeline connection of
+ * the current type. Its purpose is to add the required tabs to the list,
+ * and to output any javascript methods that might be needed by the job
+ * editing HTML.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current pipeline specification for this connection.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param tabsArray
+ * is an array of tab names. Add to this array any tab names that
+ * are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out, Locale locale,
+ Specification os, int connectionSequenceNumber,
+ List<String> tabsArray) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ tabsArray.add(Messages.getString(locale,
+ "SearchBloxConnector.Configuration"));
+
+ // Fill in the specification header map, using data from all tabs.
+ fillInConfigurationSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS,
+ paramMap);
+ }
+
+ private void fillInConfigurationSpecificationMap(
+ Map<String, Object> paramMap, Specification os) {
+
+ for (int i = 0, len = os.getChildCount(); i < len; i++) {
+ SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(SearchBloxConfig.NODE_CONFIGURATION)) {
+
+ String titleBoost = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_TITLEBOOST);
+ if (titleBoost == null || titleBoost.isEmpty())
+ titleBoost = "0";
+ String contentBoost = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST);
+ if (contentBoost == null || contentBoost.isEmpty())
+ contentBoost = "0";
+ String keywordsBoost = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST);
+ if (keywordsBoost == null || keywordsBoost.isEmpty())
+ keywordsBoost = "0";
+ String descriptionBoost = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST);
+ if (descriptionBoost == null || descriptionBoost.isEmpty())
+ descriptionBoost = "0";
+ String poolSize = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_POOLSIZE);
+ if (poolSize == null || poolSize.isEmpty())
+ poolSize = String.valueOf(BUILDER_DEFAULT_POOL_SIZE);
+ String timeoutConnection = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION);
+ if (timeoutConnection == null || timeoutConnection.isEmpty())
+ timeoutConnection = String
+ .valueOf(BUILDER_DEFAULT_CONNECTION_TIMEOUT);
+ String timeoutSocket = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET);
+ if (timeoutSocket == null || timeoutSocket.isEmpty())
+ timeoutSocket = String
+ .valueOf(BUILDER_DEFAULT_SOCKET_TIMEOUT);
+
+ String collection = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME);
+ if (collection == null)
+ collection = DEFAULT_COLLECTION;
+
+ String indexFormat = sn
+ .getAttributeValue(SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT);
+ IndexingFormat format = IndexingFormat.valueOf(indexFormat.toUpperCase());
+ if (format == null)
+ indexFormat = IndexingFormat.XML.name();
+ else
+ indexFormat = format.name();
+
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_TITLEBOOST.toUpperCase(),
+ titleBoost);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST.toUpperCase(),
+ contentBoost);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST.toUpperCase(),
+ keywordsBoost);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST.toUpperCase(),
+ descriptionBoost);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_POOLSIZE.toUpperCase(),
+ poolSize);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION
+ .toUpperCase(), timeoutConnection);
+ paramMap.put(
+ SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET.toUpperCase(),
+ timeoutSocket);
+ paramMap.put(
+ SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT.toUpperCase(),
+ indexFormat);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME
+ .toUpperCase(), collection);
+
+ return;
+ }
+
+ }
+
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_TITLEBOOST.toUpperCase(), 0);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST.toUpperCase(), 0);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST.toUpperCase(), 0);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST.toUpperCase(), 0);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_POOLSIZE.toUpperCase(),
+ BUILDER_DEFAULT_POOL_SIZE);
+ paramMap.put(
+ SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION.toUpperCase(),
+ BUILDER_DEFAULT_CONNECTION_TIMEOUT);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET.toUpperCase(),
+ BUILDER_DEFAULT_SOCKET_TIMEOUT);
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT.toUpperCase(),
+ IndexingFormat.XML.name());
+ paramMap.put(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME.toUpperCase(),
+ "");
+
+ }
+
+ /**
+ * View specification. This method is called in the body section of a job's
+ * view page. Its purpose is to present the pipeline specification
+ * information to the user. The coder can presume that the HTML that is
+ * output from this configuration will be within appropriate <html> and
+ * <body> tags.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param os
+ * is the current pipeline specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale,
+ Specification os, int connectionSequenceNumber)
+ throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ // Fill in the map with data from all tabs
+ fillInConfigurationSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale,
+ VIEW_SPECIFICATION_HTML, paramMap);
+
+ }
+
+ /**
+ * Output the specification body section. This method is called in the body
+ * section of a job page which has selected a pipeline connection of the
+ * current type. Its purpose is to present the required form elements for
+ * editing. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html>, <body>, and <form> tags.
+ * The name of the form is "editjob".
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current pipeline specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param actualSequenceNumber
+ * is the connection within the job that has currently been
+ * selected.
+ * @param tabName
+ * is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out, Locale locale,
+ Specification os, int connectionSequenceNumber,
+ int actualSequenceNumber, String tabName)
+ throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Set the tab name
+ paramMap.put("TABNAME", tabName);
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+ paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+ // Fill in the field mapping tab data
+ fillInConfigurationSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale,
+ EDIT_SPECIFICATION_CONFIGURATION_HTML, paramMap);
+
+ }
+
+ /**
+ * Process a specification post. This method is called at the start of job's
+ * edit or view page, whenever there is a possibility that form data for a
+ * connection has been posted. Its purpose is to gather form information and
+ * modify the transformation specification accordingly. The name of the
+ * posted form is "editjob".
+ *
+ * @param variableContext
+ * contains the post data, including binary file-upload
+ * information.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current pipeline specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the job (and cause a
+ * redirection to an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext,
+ Locale locale, Specification os, int connectionSequenceNumber)
+ throws ManifoldCFException {
+ String seqPrefix = "s" + connectionSequenceNumber + "_";
+ String titleBoost = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_TITLEBOOST);
+ String contentBoost = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_CONTENTBOOST);
+ String keywordsBoost = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST);
+ String descriptionBoost = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST);
+ String poolSize = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_POOLSIZE);
+ String timeoutConnection = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION);
+ String timeoutSocket = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET);
+ String collection = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME);
+ String indexFormat = variableContext.getParameter(seqPrefix
+ + SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT);
+
+ // About to gather the configuration values, so get rid of the old one.
+ int i = 0, len = os.getChildCount();
+ while (i < len) {
+ SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(SearchBloxConfig.NODE_CONFIGURATION))
+ os.removeChild(i);
+ else
+ i++;
+ }
+
+ SpecificationNode node = new SpecificationNode(
+ SearchBloxConfig.NODE_CONFIGURATION);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_TITLEBOOST, titleBoost);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST, contentBoost);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST, keywordsBoost);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST, descriptionBoost);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_POOLSIZE, poolSize);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION,
+ timeoutConnection);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET,
+ timeoutSocket);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME,
+ collection);
+ node.setAttribute(SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT, indexFormat);
+ os.addChild(os.getChildCount(), node);
+
+ return null;
+
+ }
+
+
+ protected class SpecPacker {
+ /** Arguments, from configuration */
+ private final Multimap<String, String> args = HashMultimap.create();
+
+ public SpecPacker(String outputDescription) {
+ String[] parts = outputDescription.split(",");
+ for(String part : parts) {
+ String[] keyValue = part.split("=");
+ if(keyValue.length != 2) {
+ continue;
+ }
+
+ args.put(keyValue[0], keyValue[1]);
+ }
+ }
+
+ public SpecPacker(Specification spec) {
+ // Process arguments
+ for (int i = 0; i < spec.getChildCount(); i++)
+ {
+ SpecificationNode node = spec.getChild(i);
+ if (node.getType().equals(SearchBloxConfig.NODE_CONFIGURATION))
+ {
+ String titleBoost = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_TITLEBOOST);
+ String contentBoost = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST);
+ String keywordsBoost = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST);
+ String descriptionBoost = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_DESCRIPTIONBOOST);
+ String collection = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME);
+ String poolSize = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_POOLSIZE);
+ String connectTimeout = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION);
+ String socketTimeout = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET);
+ String indexingFormat = node.getAttributeValue(SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT);
+ args.put(SearchBloxConfig.ATTRIBUTE_TITLEBOOST, titleBoost);
+ args.put(SearchBloxConfig.ATTRIBUTE_CONTENTBOOST, contentBoost);
+ args.put(SearchBloxConfig.ATTRIBUTE_KEYWORDSBOOST, keywordsBoost);
+ args.put(SearchBloxConfig.ATTRIBUTE_COLLECTION_NAME, descriptionBoost);
+ args.put(SearchBloxDocument.SEARCHBLOX_COLLECTION, collection);
+ args.put(SearchBloxConfig.ATTRIBUTE_POOLSIZE, poolSize);
+ args.put(SearchBloxConfig.ATTRIBUTE_TIMEOUT_CONNECTION, connectTimeout);
+ args.put(SearchBloxConfig.ATTRIBUTE_TIMEOUT_SOCKET, socketTimeout);
+ args.put(SearchBloxConfig.ATTRIBUTE_INDEX_FORMAT, indexingFormat);
+
+ }
+ }
+
+ }
+
+ public String toPackedString() {
+ Map<String, List<String>> mapList = getArgs();
+ StringBuilder sb = new StringBuilder();
+ for(String s : mapList.keySet()) {
+ sb.append(s).append("=").append(mapList.get(s).get(0));
+ sb.append(",");
+ }
+ if(sb.toString().length()!=0)
+ return sb.substring(0, sb.length()-1);
+ else
+ return "";
+
+ }
+
+ public Map<String,List<String>> getArgs() {
+ Map<String,List<String>> result = Maps.newHashMap();
+ for(String s : args.keySet()) {
+ Collection<String> list = args.get(s);
+ if(list instanceof List) {
+ result.put(s, (List<String>) list);
+ }
+ else {
+ List<String> l = Lists.newArrayList(list);
+ result.put(s, l);
+ }
+ }
+ return result;
+ }
+ }
+}
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.searchblox;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.crawler.system.Logging;
+import org.jsoup.Jsoup;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+/**
+ * "Package" class modeling a SearchBox document as a POJO
+ *
+ * @author Rafa Haro <rh...@apache.org>
+ * @author Antonio David Perez Morales <ad...@apache.org>
+ */
+public class SearchBloxDocument {
+
+ static final String API_KEY = "apikey";
+ static final String SEARCHBLOX_COLLECTION = "collection";
+ static final String DATE_FORMAT = "dd MMMM yyyy HH:mm:ss z";
+
+ public enum IndexingFormat {
+ JSON, XML
+ }
+
+ public enum DocumentAction {
+ ADD_UPDATE, DELETE, STATUS, CREATE, CLEAR
+ }
+ static final List<String> xmlElements= Lists.newArrayList("searchblox","document","url","title","keywords","content","description","lastmodified","size",
+ "alpha","contenttype","category","meta","acl","uid");
+
+ static final String COLNAME_ATTRIBUTE = "colname";
+ static final String APIKEY_ATTRIBUTE = "apikey";
+ static final String NAME_ATTRIBUTE = "name";
+ static final String LOCATION_ATTRIBUTE = "location";
+ static final String BOOST_ATTRIBUTE = "boost";
+ static final String ACL_TYPE_ATTRIBUTE = "boost";
+
+ private Multimap<String, Object> data_fields = HashMultimap.create();
+
+ /**
+ * API key accessible in the SearchBlox Admin Console.
+ */
+ String apiKey;
+
+ /**
+ * Name of the Custom collection
+ */
+ String colName;
+
+ /**
+ * unique identifer for a document (default when unassigned is url location)
+ */
+ String uid;
+
+ public SearchBloxDocument(String apikey) {
+ this.apiKey = apikey;
+ }
+
+ public SearchBloxDocument(String apikey, String documentURI,
+ RepositoryDocument rd, Map<String, List<String>> args) {
+ this(apikey);
+ SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+
+ this.uid = documentURI;
+ this.colName = args.get(SEARCHBLOX_COLLECTION).get(0);
+
+ data_fields.put(xmlElements.get(8), "" + rd.getBinaryLength());
+ Date date = rd.getModifiedDate();
+ if(date!=null){
+ data_fields.put(xmlElements.get(7),
+ dateFormat.format(rd.getModifiedDate()));
+ }
+
+ // content
+ String content = "";
+ try {
+ if (rd.getField(xmlElements.get(5)) != null)
+ content = (String) rd.getField(xmlElements.get(5))[0];
+ else
+ content = this.buildString(rd.getBinaryStream());
+ } catch (IOException e) {
+ Logging.connectors
+ .error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer",
+ e);
+ }
+ data_fields.put(xmlElements.get(5), this.clean(content));
+
+ // Content Type
+ data_fields.put(xmlElements.get(10), rd.getMimeType());
+
+ // Boosting
+ for(String boostId:args.keySet()){
+ if(boostId.endsWith("_boost")){
+ List<String> argBoost = args.get(boostId);
+ if(argBoost!=null && !argBoost.isEmpty())
+ data_fields.put(boostId,argBoost.get(0));
+ }
+ }
+
+ // Metadata
+ Multimap<String, String> metadata = HashMultimap.create();
+ Iterator<String> it = rd.getFields();
+ while (it.hasNext()) {
+ String name = it.next();
+ try {
+ String[] values = rd.getFieldAsStrings(name);
+ for (String value : values) {
+ String key = name.toLowerCase();
+ if (xmlElements.contains(key)) {
+ data_fields.put(key, value);
+ } else
+ metadata.put(name, value);
+ }
+ } catch (IOException e) {
+ Logging.connectors.error(
+ "[Getting Field Values]Impossible to read value for metadata "
+ + name, e);
+ }
+ }
+ data_fields.put(xmlElements.get(12), metadata);
+
+ // ACLS
+ Multimap<String, String> acls = HashMultimap.create();
+ Iterator<String> aclTypes = rd.securityTypesIterator();
+ while (aclTypes.hasNext()) {
+ String aclType = aclTypes.next();
+ String[] tokens = rd.getSecurityACL(aclType);
+ for (String token : tokens)
+ acls.put(aclType, token);
+ tokens = rd.getSecurityDenyACL(aclType);
+ for (String token : tokens)
+ acls.put(aclType, token);
+ }
+ data_fields.put(xmlElements.get(13), acls);
+ }
+
+ /**
+ * Clean a String from html tags or break lines
+ * @param content
+ * @return
+ */
+ private String clean(String content) {
+ content = content.replaceAll("(\r\n|\n)", " ");
+ String cleanContent= Jsoup.parseBodyFragment(content).text();
+ return cleanContent;
+ }
+
+ private String buildString(InputStream binaryStream) throws IOException {
+ StringWriter writer = new StringWriter();
+ IOUtils.copy(binaryStream, writer, "UTF-8");
+ return writer.toString();
+ }
+
+ public String toString(IndexingFormat format, DocumentAction action)
+ throws SearchBloxException {
+ Document doc = null;
+ try {
+ doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument();
+
+ } catch (ParserConfigurationException e) {
+ throw new SearchBloxException(e);
+ }
+
+ // Document Base Data
+ Element root = doc.createElement(xmlElements.get(0));
+ if (apiKey == null)
+ throw new SearchBloxException(
+ "The API Key for accessing SearchBlox Server CAN'T be NULL");
+ root.setAttribute(APIKEY_ATTRIBUTE, apiKey);
+ doc.appendChild(root);
+ Element document = doc.createElement(xmlElements.get(1));
+ if (colName == null)
+ throw new SearchBloxException(
+ "The Collection Name of the SearchBlox Server CAN'T be NULL");
+ document.setAttribute(COLNAME_ATTRIBUTE, colName);
+ root.appendChild(document);
+
+ // Uid
+ if (uid != null && !uid.isEmpty()) {
+ Element uidElement = doc.createElement(xmlElements.get(14));
+ uidElement.setTextContent(uid);
+ document.appendChild(uidElement);
+ }
+
+ if (action == DocumentAction.ADD_UPDATE) {
+ // Location
+ Collection<Object> location = data_fields.get(LOCATION_ATTRIBUTE);
+ if (location != null && !location.isEmpty()) {
+ document.setAttribute(LOCATION_ATTRIBUTE, (String) location.iterator().next());
+ }else{
+ document.setAttribute(LOCATION_ATTRIBUTE, "");
+ }
+
+ for(String element:xmlElements){
+ if (!element.equals(xmlElements.get(12)) && !element.equals(xmlElements.get(13)) ) {
+ Collection<Object> values = data_fields.get(element);
+ if (values!=null && values.size()>0) {
+ Object next = values.iterator()
+ .next();
+ String value =(String) next;
+ if (value != null && !value.isEmpty()) {
+ Element eValue = doc.createElement(element);
+ if(element.equals("keywords"))
+ eValue.setTextContent(StringUtils.join(values, ','));
+ else
+ eValue.setTextContent(value);
+ Collection<Object> boostElement = data_fields
+ .get(element + "_boost");
+ if(boostElement!=null && boostElement.size()>0){
+ String value_boost = (String) boostElement.iterator()
+ .next();
+ eValue.setAttribute(BOOST_ATTRIBUTE, "" + value_boost);
+ }
+ document.appendChild(eValue);
+ }
+ }
+ }
+ }
+
+
+ // Metadata
+ Collection<Object> metadataSet = data_fields
+ .get(xmlElements.get(12));
+ if(metadataSet!=null && metadataSet.size()>0){
+ @SuppressWarnings("unchecked")
+ Multimap<String, String> metadata = (Multimap<String, String>) metadataSet.iterator().next();
+ if (metadata != null && !metadata.isEmpty()) {
+ for (String name : metadata.keySet())
+ for (String value : metadata.get(name)) {
+ Element metaElement = doc.createElement(xmlElements.get(12));
+ metaElement.setAttribute(NAME_ATTRIBUTE, name);
+ metaElement.setTextContent(value);
+ document.appendChild(metaElement);
+ }
+ } }
+
+ // ACL
+ Collection<Object> aclSet = data_fields
+ .get(xmlElements.get(13));
+ if(aclSet!=null &&aclSet.size()>0){
+ @SuppressWarnings("unchecked")
+ Multimap<String, String> acls = (Multimap<String, String>) aclSet.iterator().next();
+ if (acls != null && !acls.isEmpty()) {
+ for (String type : acls.keySet())
+ for (String value : acls.get(type)) {
+ Element aclElement = doc.createElement(xmlElements.get(13));
+ aclElement.setAttribute(ACL_TYPE_ATTRIBUTE, type);
+ aclElement.setTextContent(value);
+ document.appendChild(aclElement);
+ }
+ } }
+ }
+
+ return getStringFromDocument(doc);
+
+ }
+
+ /**
+ * <p>Transform a {@code Document} to its XML string representation</p>
+ * @param doc the document to transform
+ * @return the document in the XML-String format
+ */
+ private String getStringFromDocument(Document doc) {
+ try {
+ DOMSource domSource = new DOMSource(doc);
+ StringWriter writer = new StringWriter();
+ StreamResult result = new StreamResult(writer);
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer transformer = tf.newTransformer();
+// transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
+ transformer.transform(domSource, result);
+ return writer.toString();
+ } catch (TransformerException ex) {
+ ex.printStackTrace();
+ return null;
+ }
+
+ }
+}
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java Thu Mar 12 17:41:48 2015
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.searchblox;
+
+
+/**
+ * @author Rafa Haro <rh...@apache.org>
+ */
+public class SearchBloxException
+ extends Exception {
+
+ private static final long serialVersionUID = -6792055510634993398L;
+
+ public SearchBloxException(String reason, Throwable cause) {
+ super(reason, cause);
+ }
+
+ public SearchBloxException(String reason) {
+ super(reason);
+ }
+
+ public SearchBloxException(Throwable cause) {
+ super(cause);
+ }
+}
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/common_en_US.properties?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/common_en_US.properties (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/searchblox/common_en_US.properties Thu Mar 12 17:41:48 2015
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SearchBloxConnector.Parameters=SearchBlox Settings
+SearchBloxConnector.Configuration=SearchBlox
+
+SearchBloxConnector.ApiKey=Api Key
+SearchBloxConnector.Endpoint=Endpoint
+
+
+SearchBloxConnector.PleaseSupplyValidEndpoint=Endpoint can't be empty. Please supply a valid endpoint
+SearchBloxConnector.PleaseSupplyValidApiKey=Api Key can't be empty. Please supply a valid api key
+
+SearchBloxConnector.IndexFormat=Indexing format
+SearchBloxConnector.JSON=JSON
+SearchBloxConnector.XML=XML
+
+SearchBloxConnector.TitleBoost=Title boost
+SearchBloxConnector.ContentBoost=Content Boost
+SearchBloxConnector.KeywordsBoost=Keyword Boost
+SearchBloxConnector.DescriptionBoost=Description Boost
+
+SearchBloxConnector.PoolSize=Pool Size
+SearchBloxConnector.TimeoutConnection=Timeout Connection
+SearchBloxConnector.TimeoutSocket=Timeout Socket
+SearchBloxConnector.Collection=Collection Name
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration.js?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration.js (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration.js Thu Mar 12 17:41:48 2015
@@ -0,0 +1,61 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function checkConfig() {
+ if (editconnection.endpoint) {
+ if (editconnection.endpoint.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('SearchBloxConnector.PleaseSupplyValidEndpoint'))");
+ editconnection.endpoint.focus();
+ return false;
+ }
+ }
+
+ if (editconnection.apikey) {
+ if (editconnection.apikey.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('SearchBloxConnector.PleaseSupplyValidApiKey'))");
+ editconnection.apikey.focus();
+ return false;
+ }
+ }
+
+ return true;
+}
+
+function checkConfigForSave() {
+ if (editconnection.endpoint) {
+ if (editconnection.endpoint.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('SearchBloxConnector.PleaseSupplyValidEndpoint'))");
+ SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('SearchBloxConnector.Parameters'))");
+ editconnection.serverlocation.focus();
+ return false;
+ }
+ }
+ if (editconnection.apikey) {
+ if (editconnection.apikey.value == "") {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('SearchBloxConnector.PleaseSupplyValidApiKey'))");
+ SelectTab("$Encoder.javascriptBodyEscape($ResourceBundle.getString('SearchBloxConnector.Parameters'))");
+ editconnection.apikey.focus();
+ return false;
+ }
+ }
+
+ return true;
+}
+//-->
+</script>
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration_Parameters.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration_Parameters.html?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration_Parameters.html (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editConfiguration_Parameters.html Thu Mar 12 17:41:48 2015
@@ -0,0 +1,42 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('SearchBloxConnector.Parameters'))
+
+<table class="displaytable">
+ <tr>
+ <td class="description">
+ $Encoder.bodyEscape($ResourceBundle.getString('SearchBloxConnector.ApiKey'))
+ </td>
+ <td class="value"><input name="apikey" type="text"
+ value="$Encoder.attributeEscape($APIKEY)" size="48" />
+ </td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('SearchBloxConnector.Endpoint'))</nobr></td>
+ <td class="value"><input name="endpoint" type="text" value="$Encoder.attributeEscape($ENDPOINT)"
+ size="24" /></td>
+ </tr>
+</table>
+
+#else
+
+<input type="hidden" name="apikey" value="$Encoder.attributeEscape($APIKEY)" />
+<input type="hidden" name="endpoint" value="$Encoder.attributeEscape($ENDPOINT)" />
+
+
+#end
Added: manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification.js?rev=1666263&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification.js (added)
+++ manifoldcf/branches/CONNECTORS-1168/connectors/searchblox/connector/src/main/resources/org/apache/manifoldcf/agents/output/searchblox/editSpecification.js Thu Mar 12 17:41:48 2015
@@ -0,0 +1,23 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+
+
+//-->
+</script>