You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/05/16 13:34:15 UTC

svn commit: r1679730 [2/2] - in /manifoldcf/trunk: ./ connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/

Modified: manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java?rev=1679730&r1=1679729&r2=1679730&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java (original)
+++ manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxDocument.java Sat May 16 11:34:14 2015
@@ -51,316 +51,316 @@ import java.util.*;
  */
 public class SearchBloxDocument {
 
-	static final String API_KEY = "apikey";
-	static final String SEARCHBLOX_COLLECTION = "collection";
-	static final String DATE_FORMAT = "dd MMMM yyyy HH:mm:ss z";
-
-	public enum IndexingFormat {
-		JSON, XML
-	}
-
-	public enum DocumentAction {
-		ADD_UPDATE, DELETE, STATUS, CREATE, CLEAR
-	}
-	static final List<String> xmlElements= Lists.newArrayList("searchblox","document","url","title","keywords","content","description","lastmodified","size",
-			"alpha","contenttype","category","meta","uid");
-
-	static final String COLNAME_ATTRIBUTE = "colname";
-	static final String APIKEY_ATTRIBUTE = "apikey";
-	static final String NAME_ATTRIBUTE = "name";
-	static final String UID_ATTRIBUTE = "uid";
-	static final String BOOST_ATTRIBUTE = "boost";
-
-	private Multimap<String, Object> data_fields = HashMultimap.create();
-
-	/**
-	 * API key accessible in the SearchBlox Admin Console.
-	 */
-	String apiKey;
-
-	/**
-	 * Name of the Custom collection
-	 */
-	String colName;
-
-	/**
-	 * unique identifer for a document (default when unassigned is url location)
-	 */
-	String uid;
-
-	public SearchBloxDocument(String apikey) {
-		this.apiKey = apikey;
-	}
-
-	public SearchBloxDocument(String apikey, String documentURI,
-			RepositoryDocument rd, Map<String, List<String>> args) {
-		this(apikey);
-		SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
-
-		this.uid = documentURI;
-		this.colName = args.get(SEARCHBLOX_COLLECTION).get(0);
-
-		Date date = rd.getModifiedDate();
-		if(date!=null){
-			data_fields.put(xmlElements.get(7),
-					dateFormat.format(rd.getModifiedDate()));
-		}	
-
-		// content
-		String content = "";
-		try {
-			if (rd.getField(xmlElements.get(5)) != null)
-				content = (String) rd.getField(xmlElements.get(5))[0];
-			else
-				content = this.buildString(rd.getBinaryStream());
-		} catch (IOException e) {
-			Logging.connectors
-			.error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer",
-					e);
-		}
-		data_fields.put(xmlElements.get(5), this.clean(content));
-
-		// Content Type
-		data_fields.put(xmlElements.get(10), rd.getMimeType());
-
-		// Size
-		data_fields.put(xmlElements.get(8), "" + rd.getBinaryLength());
-
-		// Boosting
-		for(String boostId:args.keySet()){
-			if(boostId.endsWith("_boost")){
-				List<String> argBoost = args.get(boostId);
-				if(argBoost!=null && !argBoost.isEmpty())
-					data_fields.put(boostId,argBoost.get(0));
-			}
-		}
-
-		// Metadata
-		Multimap<String, String> metadata = HashMultimap.create();
-		Iterator<String> it = rd.getFields();
-		while (it.hasNext()) {
-			String name = it.next();
-			try {
-				String[] values = rd.getFieldAsStrings(name);
-				for (String value : values) {
-					String key = name.toLowerCase();
-					int indexOf = xmlElements.indexOf(key);
-					if(indexOf != 5)
-                    if (indexOf != -1 &&
-							indexOf != 0 &&
-							indexOf != 7 &&
-							indexOf != 8) {
-						data_fields.put(key, value);
-					} else
-						metadata.put(name, value);
-				}
-			} catch (IOException e) {
-				Logging.connectors.error(
-						"[Getting Field Values]Impossible to read value for metadata "
-								+ name, e);
-			}
-		}
-
-		// ACLS must be stored as metadata, as Searchblox use that construct to index custom named fields
-		//the approach has been implemented and tested live
-		Iterator<String> aclTypes = rd.securityTypesIterator();
-		while (aclTypes.hasNext()) {
-			String aclType = aclTypes.next();
-			String[] allow_tokens = rd.getSecurityACL(aclType);
-			for (String token : allow_tokens)
-				metadata.put(aclType+"_allow", token);
-			String[] deny_tokens = rd.getSecurityDenyACL(aclType);
-			for (String token : deny_tokens)
-				metadata.put(aclType+"_deny", token);
-		}
-		data_fields.put(xmlElements.get(12), metadata);
-	}
-
-	/**
-	 * Clean a String from html tags or  break lines
-	 * @param content
-	 * @return
-	 */
-	private String clean(String content) {
-		content = content.replaceAll("(\r\n|\n)", " ");
-		String cleanContent= Jsoup.parseBodyFragment(content).text();
-		return cleanContent;
-	}
-
-	private String buildString(InputStream binaryStream) throws IOException {
-		StringWriter writer = new StringWriter();
-		IOUtils.copy(binaryStream, writer, "UTF-8");
-		return writer.toString();
-	}
-
-	public String toString(IndexingFormat format, DocumentAction action)
-			throws SearchBloxException {
-		if(format == IndexingFormat.XML)
-			return toStringXML(action);
-		else
-			return toStringJSON(action);
-	}
-
-	private String toStringJSON(DocumentAction action) throws SearchBloxException {
-		JSONObject result = new JSONObject();
-		if (apiKey == null)
-			throw new SearchBloxException(
-					"The API Key for accessing SearchBlox Server CAN'T be NULL");
-		try {
-			result.put(APIKEY_ATTRIBUTE, apiKey);
-
-			JSONObject document = new JSONObject();
-			if (colName == null)
-				throw new SearchBloxException(
-						"The Collection Name of the SearchBlox Server CAN'T be NULL");
-			document.put(COLNAME_ATTRIBUTE, colName);
-			document.put(UID_ATTRIBUTE, uid);
-
-			if(action == DocumentAction.ADD_UPDATE){
-				for(String element:xmlElements){
-					if (!element.equals(xmlElements.get(12))) {
-						Collection<Object> values = data_fields.get(element);
-						if (values!=null && values.size()>0) {
-							Object next = values.iterator()
-									.next();
-							String value =(String) next;
-							if (value != null && !value.isEmpty()) {
-								if(element.equals("keywords"))
-									document.put(element, StringUtils.join(values, ','));
-								else
-									document.put(element, value);
-//								}
-								
-							}
-						}
-					}
-				}
-
-				// Metadata
-				Collection<Object> metadataSet = data_fields
-						.get(xmlElements.get(12));
-				JSONObject metaObject = new JSONObject();
-				if(metadataSet!=null && metadataSet.size()>0){
-					Multimap<String, String> metadata = (Multimap<String, String>) metadataSet.iterator().next();
-					if (metadata != null && !metadata.isEmpty()) {
-						for (String name : metadata.keySet()){
-							JSONArray nextMetadata = new JSONArray();
-							for (String value : metadata.get(name)) {
-								nextMetadata.put(value);
-							}
-							metaObject.put(name, nextMetadata);
-						}
-					}  
-				}
-				document.put(xmlElements.get(12), metaObject);
-			}
-
-			result.put(xmlElements.get(1), document);
-
-		} catch (JSONException e) {
-			throw new SearchBloxException("Error while building Document JSON object", e);
-		}
-		return result.toString();
-	}
-
-	private String toStringXML(DocumentAction action) throws SearchBloxException{
-		Document doc = null;
-		try {
-			doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-					.newDocument();
-
-		} catch (ParserConfigurationException e) {
-			throw new SearchBloxException(e);
-		}
-
-		// Document Base Data
-		Element root = doc.createElement(xmlElements.get(0));
-		if (apiKey == null)
-			throw new SearchBloxException(
-					"The API Key for accessing SearchBlox Server CAN'T be NULL");
-		root.setAttribute(APIKEY_ATTRIBUTE, apiKey);
-		doc.appendChild(root);
-		Element document = doc.createElement(xmlElements.get(1));
-		if (colName == null)
-			throw new SearchBloxException(
-					"The Collection Name of the SearchBlox Server CAN'T be NULL");
-		document.setAttribute(COLNAME_ATTRIBUTE, colName);
-		if(action == DocumentAction.DELETE)
-			document.setAttribute(UID_ATTRIBUTE,uid);
-		root.appendChild(document);
-
-		if (action == DocumentAction.ADD_UPDATE) {
-			// Uid
-			if (uid != null && !uid.isEmpty()) {
-				Element uidElement = doc.createElement(xmlElements.get(13));
-				uidElement.setTextContent(uid);
-				document.appendChild(uidElement);
-			}
-
-			for(String element:xmlElements){
-				if (!element.equals(xmlElements.get(12))) {
-					Collection<Object> values = data_fields.get(element);
-					if (values!=null && values.size()>0) {
-						Object next = values.iterator()
-								.next();
-						String value =(String) next;
-						if (value != null && !value.isEmpty()) {
-							Element eValue = doc.createElement(element);
-							if(element.equals("keywords"))
-								eValue.setTextContent(StringUtils.join(values, ','));
-							else
-								eValue.setTextContent(value);
-							Collection<Object> boostElement = data_fields
-									.get(element + "_boost");
-							if(boostElement!=null && boostElement.size()>0){
-								String value_boost = (String) boostElement.iterator()
-										.next();
-								eValue.setAttribute(BOOST_ATTRIBUTE, "" + value_boost);
-							}
-							document.appendChild(eValue);
-						}
-					}
-				}
-			}
-
-			// Metadata
-			Collection<Object> metadataSet = data_fields
-					.get(xmlElements.get(12));
-			if(metadataSet!=null && metadataSet.size()>0){
-				Multimap<String, String> metadata = (Multimap<String, String>) metadataSet.iterator().next();
-				if (metadata != null && !metadata.isEmpty()) {
-					for (String name : metadata.keySet())
-						for (String value : metadata.get(name)) {
-							Element metaElement = doc.createElement(xmlElements.get(12));
-							metaElement.setAttribute(NAME_ATTRIBUTE, name);
-							metaElement.setTextContent(value);
-							document.appendChild(metaElement);
-						}
-				}  }
-		}
-
-		return getStringFromDocument(doc);
-	}
-
-	/**
-	 * <p>Transform a {@code Document} to its XML string representation</p>
-	 * @param doc the document to transform
-	 * @return the document in the XML-String format
-	 */
-	private String getStringFromDocument(Document doc) {
-		try {
-			DOMSource domSource = new DOMSource(doc);
-			StringWriter writer = new StringWriter();
-			StreamResult result = new StreamResult(writer);
-			TransformerFactory tf = TransformerFactory.newInstance();
-			Transformer transformer = tf.newTransformer();
-			//			transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
-			transformer.transform(domSource, result);
-			return writer.toString();
-		} catch (TransformerException ex) {
-			ex.printStackTrace();
-			return null;
-		}
+  static final String API_KEY = "apikey";
+  static final String SEARCHBLOX_COLLECTION = "collection";
+  static final String DATE_FORMAT = "dd MMMM yyyy HH:mm:ss z";
+
+  public enum IndexingFormat {
+    JSON, XML
+  }
+
+  public enum DocumentAction {
+    ADD_UPDATE, DELETE, STATUS, CREATE, CLEAR
+  }
+  static final List<String> xmlElements= Lists.newArrayList("searchblox","document","url","title","keywords","content","description","lastmodified","size",
+      "alpha","contenttype","category","meta","uid");
+
+  static final String COLNAME_ATTRIBUTE = "colname";
+  static final String APIKEY_ATTRIBUTE = "apikey";
+  static final String NAME_ATTRIBUTE = "name";
+  static final String UID_ATTRIBUTE = "uid";
+  static final String BOOST_ATTRIBUTE = "boost";
+
+  private Multimap<String, Object> data_fields = HashMultimap.create();
+
+  /**
+   * API key accessible in the SearchBlox Admin Console.
+   */
+  String apiKey;
+
+  /**
+   * Name of the Custom collection
+   */
+  String colName;
+
+  /**
+   * unique identifer for a document (default when unassigned is url location)
+   */
+  String uid;
+
+  public SearchBloxDocument(String apikey) {
+    this.apiKey = apikey;
+  }
+
+  public SearchBloxDocument(String apikey, String documentURI,
+      RepositoryDocument rd, Map<String, List<String>> args) {
+    this(apikey);
+    SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
+
+    this.uid = documentURI;
+    this.colName = args.get(SEARCHBLOX_COLLECTION).get(0);
+
+    Date date = rd.getModifiedDate();
+    if(date!=null){
+      data_fields.put(xmlElements.get(7),
+          dateFormat.format(rd.getModifiedDate()));
+    }  
+
+    // content
+    String content = "";
+    try {
+      if (rd.getField(xmlElements.get(5)) != null)
+        content = (String) rd.getField(xmlElements.get(5))[0];
+      else
+        content = this.buildString(rd.getBinaryStream());
+    } catch (IOException e) {
+      Logging.connectors
+      .error("[Parsing Content]Content is not text plain, verify you are properly using Apache Tika Transformer",
+          e);
+    }
+    data_fields.put(xmlElements.get(5), this.clean(content));
+
+    // Content Type
+    data_fields.put(xmlElements.get(10), rd.getMimeType());
+
+    // Size
+    data_fields.put(xmlElements.get(8), "" + rd.getBinaryLength());
+
+    // Boosting
+    for(String boostId:args.keySet()){
+      if(boostId.endsWith("_boost")){
+        List<String> argBoost = args.get(boostId);
+        if(argBoost!=null && !argBoost.isEmpty())
+          data_fields.put(boostId,argBoost.get(0));
+      }
+    }
+
+    // Metadata
+    Multimap<String, String> metadata = HashMultimap.create();
+    Iterator<String> it = rd.getFields();
+    while (it.hasNext()) {
+      String name = it.next();
+      try {
+        String[] values = rd.getFieldAsStrings(name);
+        for (String value : values) {
+          String key = name.toLowerCase();
+          int indexOf = xmlElements.indexOf(key);
+          if(indexOf != 5)
+            if (indexOf != -1 &&
+              indexOf != 0 &&
+              indexOf != 7 &&
+              indexOf != 8) {
+              data_fields.put(key, value);
+            } else
+              metadata.put(name, value);
+        }
+      } catch (IOException e) {
+        Logging.connectors.error(
+            "[Getting Field Values]Impossible to read value for metadata "
+                + name, e);
+      }
+    }
+
+    // ACLS must be stored as metadata, as Searchblox use that construct to index custom named fields
+    //the approach has been implemented and tested live
+    Iterator<String> aclTypes = rd.securityTypesIterator();
+    while (aclTypes.hasNext()) {
+      String aclType = aclTypes.next();
+      String[] allow_tokens = rd.getSecurityACL(aclType);
+      for (String token : allow_tokens)
+        metadata.put(aclType+"_allow", token);
+      String[] deny_tokens = rd.getSecurityDenyACL(aclType);
+      for (String token : deny_tokens)
+        metadata.put(aclType+"_deny", token);
+    }
+    data_fields.put(xmlElements.get(12), metadata);
+  }
+
+  /**
+   * Clean a String from html tags or  break lines
+   * @param content
+   * @return
+   */
+  private String clean(String content) {
+    content = content.replaceAll("(\r\n|\n)", " ");
+    String cleanContent= Jsoup.parseBodyFragment(content).text();
+    return cleanContent;
+  }
+
+  private String buildString(InputStream binaryStream) throws IOException {
+    StringWriter writer = new StringWriter();
+    IOUtils.copy(binaryStream, writer, "UTF-8");
+    return writer.toString();
+  }
+
+  public String toString(IndexingFormat format, DocumentAction action)
+      throws SearchBloxException {
+    if(format == IndexingFormat.XML)
+      return toStringXML(action);
+    else
+      return toStringJSON(action);
+  }
+
+  private String toStringJSON(DocumentAction action) throws SearchBloxException {
+    JSONObject result = new JSONObject();
+    if (apiKey == null)
+      throw new SearchBloxException(
+          "The API Key for accessing SearchBlox Server CAN'T be NULL");
+    try {
+      result.put(APIKEY_ATTRIBUTE, apiKey);
+
+      JSONObject document = new JSONObject();
+      if (colName == null)
+        throw new SearchBloxException(
+            "The Collection Name of the SearchBlox Server CAN'T be NULL");
+      document.put(COLNAME_ATTRIBUTE, colName);
+      document.put(UID_ATTRIBUTE, uid);
+
+      if(action == DocumentAction.ADD_UPDATE){
+        for(String element:xmlElements){
+          if (!element.equals(xmlElements.get(12))) {
+            Collection<Object> values = data_fields.get(element);
+            if (values!=null && values.size()>0) {
+              Object next = values.iterator()
+                  .next();
+              String value =(String) next;
+              if (value != null && !value.isEmpty()) {
+                if(element.equals("keywords"))
+                  document.put(element, StringUtils.join(values, ','));
+                else
+                  document.put(element, value);
+                
+              }
+            }
+          }
+        }
+
+        // Metadata
+        Collection<Object> metadataSet = data_fields
+            .get(xmlElements.get(12));
+        JSONObject metaObject = new JSONObject();
+        if(metadataSet!=null && metadataSet.size()>0){
+          Multimap<String, String> metadata = (Multimap<String, String>) metadataSet.iterator().next();
+          if (metadata != null && !metadata.isEmpty()) {
+            for (String name : metadata.keySet()){
+              JSONArray nextMetadata = new JSONArray();
+              for (String value : metadata.get(name)) {
+                nextMetadata.put(value);
+              }
+              metaObject.put(name, nextMetadata);
+            }
+          }  
+        }
+        document.put(xmlElements.get(12), metaObject);
+      }
+
+      result.put(xmlElements.get(1), document);
+
+    } catch (JSONException e) {
+      throw new SearchBloxException("Error while building Document JSON object", e);
+    }
+    return result.toString();
+  }
+
+  private String toStringXML(DocumentAction action) throws SearchBloxException{
+    Document doc = null;
+    try {
+      doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+          .newDocument();
+
+    } catch (ParserConfigurationException e) {
+      throw new SearchBloxException(e);
+    }
+
+    // Document Base Data
+    Element root = doc.createElement(xmlElements.get(0));
+    if (apiKey == null)
+      throw new SearchBloxException(
+          "The API Key for accessing SearchBlox Server CAN'T be NULL");
+    root.setAttribute(APIKEY_ATTRIBUTE, apiKey);
+    doc.appendChild(root);
+    Element document = doc.createElement(xmlElements.get(1));
+    if (colName == null)
+      throw new SearchBloxException(
+          "The Collection Name of the SearchBlox Server CAN'T be NULL");
+    document.setAttribute(COLNAME_ATTRIBUTE, colName);
+    if(action == DocumentAction.DELETE)
+      document.setAttribute(UID_ATTRIBUTE,uid);
+    root.appendChild(document);
+
+    if (action == DocumentAction.ADD_UPDATE) {
+      // Uid
+      if (uid != null && !uid.isEmpty()) {
+        Element uidElement = doc.createElement(xmlElements.get(13));
+        uidElement.setTextContent(uid);
+        document.appendChild(uidElement);
+      }
+
+      for(String element:xmlElements){
+        if (!element.equals(xmlElements.get(12))) {
+          Collection<Object> values = data_fields.get(element);
+          if (values!=null && values.size()>0) {
+            Object next = values.iterator()
+                .next();
+            String value =(String) next;
+            if (value != null && !value.isEmpty()) {
+              Element eValue = doc.createElement(element);
+              if(element.equals("keywords"))
+                eValue.setTextContent(StringUtils.join(values, ','));
+              else
+                eValue.setTextContent(value);
+              Collection<Object> boostElement = data_fields
+                  .get(element + "_boost");
+              if(boostElement!=null && boostElement.size()>0){
+                String value_boost = (String) boostElement.iterator()
+                    .next();
+                eValue.setAttribute(BOOST_ATTRIBUTE, "" + value_boost);
+              }
+              document.appendChild(eValue);
+            }
+          }
+        }
+      }
+
+      // Metadata
+      Collection<Object> metadataSet = data_fields
+          .get(xmlElements.get(12));
+      if(metadataSet!=null && metadataSet.size()>0){
+        Multimap<String, String> metadata = (Multimap<String, String>) metadataSet.iterator().next();
+        if (metadata != null && !metadata.isEmpty()) {
+          for (String name : metadata.keySet())
+            for (String value : metadata.get(name)) {
+              Element metaElement = doc.createElement(xmlElements.get(12));
+              metaElement.setAttribute(NAME_ATTRIBUTE, name);
+              metaElement.setTextContent(value);
+              document.appendChild(metaElement);
+            }
+        }
+      }
+    }
+
+    return getStringFromDocument(doc);
+  }
+
+  /**
+   * <p>Transform a {@code Document} to its XML string representation</p>
+   * @param doc the document to transform
+   * @return the document in the XML-String format
+   */
+  private String getStringFromDocument(Document doc) {
+    try {
+      DOMSource domSource = new DOMSource(doc);
+      StringWriter writer = new StringWriter();
+      StreamResult result = new StreamResult(writer);
+      TransformerFactory tf = TransformerFactory.newInstance();
+      Transformer transformer = tf.newTransformer();
+      //      transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
+      transformer.transform(domSource, result);
+      return writer.toString();
+    } catch (TransformerException ex) {
+      ex.printStackTrace();
+      return null;
+    }
 
-	}
+  }
 }

Modified: manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java?rev=1679730&r1=1679729&r2=1679730&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java (original)
+++ manifoldcf/trunk/connectors/searchblox/connector/src/main/java/org/apache/manifoldcf/agents/output/searchblox/SearchBloxException.java Sat May 16 11:34:14 2015
@@ -21,19 +21,19 @@ package org.apache.manifoldcf.agents.out
  * @author Rafa Haro <rh...@apache.org>
  */
 public class SearchBloxException
-        extends Exception {
+    extends Exception {
 
-    private static final long serialVersionUID = -6792055510634993398L;
+  private static final long serialVersionUID = -6792055510634993398L;
 
-    public SearchBloxException(String reason, Throwable cause) {
-        super(reason, cause);
-    }
+  public SearchBloxException(String reason, Throwable cause) {
+    super(reason, cause);
+  }
 
-    public SearchBloxException(String reason) {
-        super(reason);
-    }
+  public SearchBloxException(String reason) {
+    super(reason);
+  }
 
-    public SearchBloxException(Throwable cause) {
-        super(cause);
-    }
+  public SearchBloxException(Throwable cause) {
+    super(cause);
+  }
 }