You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/03/30 17:24:54 UTC
[25/73] [abbrv] lucene-solr:jira/solr-6203: SOLR-9221: Remove Solr
contribs: map-reduce, morphlines-core and morphlines-cell
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java
deleted file mode 100644
index 6931f87..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.Locale;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.service.CompositeService;
-import org.apache.hadoop.util.Shell;
-import org.apache.hadoop.util.Shell.ShellCommandExecutor;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.api.ResourceTracker;
-import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
-import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
-import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
-import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
-import org.apache.hadoop.yarn.server.nodemanager.Context;
-import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
-import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
-import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
-import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-
-public class MiniYARNCluster extends CompositeService {
-
- private static final Log LOG = LogFactory.getLog(MiniYARNCluster.class);
-
- // temp fix until metrics system can auto-detect itself running in unit test:
- static {
- DefaultMetricsSystem.setMiniClusterMode(true);
- }
-
- private NodeManager[] nodeManagers;
- private ResourceManager resourceManager;
-
- private ResourceManagerWrapper resourceManagerWrapper;
-
- private File testWorkDir;
-
- // Number of nm-local-dirs per nodemanager
- private int numLocalDirs;
- // Number of nm-log-dirs per nodemanager
- private int numLogDirs;
-
- /**
- * @param testName name of the test
- * @param noOfNodeManagers the number of node managers in the cluster
- * @param numLocalDirs the number of nm-local-dirs per nodemanager
- * @param numLogDirs the number of nm-log-dirs per nodemanager
- */
- public MiniYARNCluster(String testName, int noOfNodeManagers,
- int numLocalDirs, int numLogDirs, File testWorkDir) {
- super(testName.replace("$", ""));
- this.numLocalDirs = numLocalDirs;
- this.numLogDirs = numLogDirs;
- String testSubDir = testName.replace("$", "");
- File targetWorkDir = new File(testWorkDir, testSubDir);
- try {
- FileContext.getLocalFSFileContext().delete(
- new Path(targetWorkDir.getAbsolutePath()), true);
- } catch (Exception e) {
- LOG.warn("COULD NOT CLEANUP", e);
- throw new YarnRuntimeException("could not cleanup test dir: "+ e, e);
- }
-
- if (Shell.WINDOWS) {
- // The test working directory can exceed the maximum path length supported
- // by some Windows APIs and cmd.exe (260 characters). To work around this,
- // create a symlink in temporary storage with a much shorter path,
- // targeting the full path to the test working directory. Then, use the
- // symlink as the test working directory.
- String targetPath = targetWorkDir.getAbsolutePath();
- File link = new File(System.getProperty("java.io.tmpdir"),
- String.valueOf(System.nanoTime()));
- String linkPath = link.getAbsolutePath();
-
- try {
- FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
- } catch (IOException e) {
- throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
- }
-
- // Guarantee target exists before creating symlink.
- targetWorkDir.mkdirs();
-
- ShellCommandExecutor shexec = new ShellCommandExecutor(
- Shell.getSymlinkCommand(targetPath, linkPath));
- try {
- shexec.execute();
- } catch (IOException e) {
- throw new YarnRuntimeException(String.format(Locale.ENGLISH,
- "failed to create symlink from %s to %s, shell output: %s", linkPath,
- targetPath, shexec.getOutput()), e);
- }
-
- this.testWorkDir = link;
- } else {
- this.testWorkDir = targetWorkDir;
- }
-
- resourceManagerWrapper = new ResourceManagerWrapper();
- addService(resourceManagerWrapper);
- nodeManagers = new CustomNodeManager[noOfNodeManagers];
- for(int index = 0; index < noOfNodeManagers; index++) {
- addService(new NodeManagerWrapper(index));
- nodeManagers[index] = new CustomNodeManager();
- }
- }
-
- @Override
- public void serviceInit(Configuration conf) throws Exception {
- super.serviceInit(conf instanceof YarnConfiguration ? conf
- : new YarnConfiguration(
- conf));
- }
-
- public File getTestWorkDir() {
- return testWorkDir;
- }
-
- public ResourceManager getResourceManager() {
- return this.resourceManager;
- }
-
- public NodeManager getNodeManager(int i) {
- return this.nodeManagers[i];
- }
-
- public static String getHostname() {
- try {
- return InetAddress.getLocalHost().getHostName();
- }
- catch (UnknownHostException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- private class ResourceManagerWrapper extends AbstractService {
- public ResourceManagerWrapper() {
- super(ResourceManagerWrapper.class.getName());
- }
-
- @Override
- public synchronized void serviceStart() throws Exception {
- try {
- getConfig().setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true);
- if (!getConfig().getBoolean(
- YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS,
- YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) {
- // pick free random ports.
- String hostname = MiniYARNCluster.getHostname();
- getConfig().set(YarnConfiguration.RM_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_ADMIN_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_SCHEDULER_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS,
- hostname + ":0");
- }
- resourceManager = new ResourceManager() {
- @Override
- protected void doSecureLogin() throws IOException {
- // Don't try to login using keytab in the testcase.
- };
- };
- resourceManager.init(getConfig());
- new Thread() {
- public void run() {
- resourceManager.start();
- };
- }.start();
- int waitCount = 0;
- while (resourceManager.getServiceState() == STATE.INITED
- && waitCount++ < 60) {
- LOG.info("Waiting for RM to start...");
- Thread.sleep(1500);
- }
- if (resourceManager.getServiceState() != STATE.STARTED) {
- // RM could have failed.
- throw new IOException(
- "ResourceManager failed to start. Final state is "
- + resourceManager.getServiceState());
- }
- super.serviceStart();
- } catch (Throwable t) {
- throw new YarnRuntimeException(t);
- }
- LOG.info("MiniYARN ResourceManager address: " +
- getConfig().get(YarnConfiguration.RM_ADDRESS));
- LOG.info("MiniYARN ResourceManager web address: " +
- getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS));
- }
-
- @Override
- public synchronized void serviceStop() throws Exception {
- if (resourceManager != null) {
- resourceManager.stop();
- }
- super.serviceStop();
-
- if (Shell.WINDOWS) {
- // On Windows, clean up the short temporary symlink that was created to
- // work around path length limitation.
- String testWorkDirPath = testWorkDir.getAbsolutePath();
- try {
- FileContext.getLocalFSFileContext().delete(new Path(testWorkDirPath),
- true);
- } catch (IOException e) {
- LOG.warn("could not cleanup symlink: " +
- testWorkDir.getAbsolutePath());
- }
- }
- }
- }
-
- private class NodeManagerWrapper extends AbstractService {
- int index = 0;
-
- public NodeManagerWrapper(int i) {
- super(NodeManagerWrapper.class.getName() + "_" + i);
- index = i;
- }
-
- public synchronized void serviceInit(Configuration conf) throws Exception {
- Configuration config = new YarnConfiguration(conf);
- super.serviceInit(config);
- }
-
- /**
- * Create local/log directories
- * @param dirType type of directories i.e. local dirs or log dirs
- * @param numDirs number of directories
- * @return the created directories as a comma delimited String
- */
- private String prepareDirs(String dirType, int numDirs) {
- File []dirs = new File[numDirs];
- String dirsString = "";
- for (int i = 0; i < numDirs; i++) {
- dirs[i]= new File(testWorkDir, MiniYARNCluster.this.getName()
- + "-" + dirType + "Dir-nm-" + index + "_" + i);
- dirs[i].mkdirs();
- LOG.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath());
- String delimiter = (i > 0) ? "," : "";
- dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath());
- }
- return dirsString;
- }
-
- public synchronized void serviceStart() throws Exception {
- try {
- // create nm-local-dirs and configure them for the nodemanager
- String localDirsString = prepareDirs("local", numLocalDirs);
- getConfig().set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString);
- // create nm-log-dirs and configure them for the nodemanager
- String logDirsString = prepareDirs("log", numLogDirs);
- getConfig().set(YarnConfiguration.NM_LOG_DIRS, logDirsString);
-
- File remoteLogDir =
- new File(testWorkDir, MiniYARNCluster.this.getName()
- + "-remoteLogDir-nm-" + index);
- remoteLogDir.mkdir();
- getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
- remoteLogDir.getAbsolutePath());
- // By default AM + 2 containers
- getConfig().setInt(YarnConfiguration.NM_PMEM_MB, 4*1024);
- getConfig().set(YarnConfiguration.NM_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- getConfig().set(YarnConfiguration.NM_LOCALIZER_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- getConfig().set(YarnConfiguration.NM_WEBAPP_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
-
- // Disable resource checks by default
- if (!getConfig().getBoolean(
- YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
- YarnConfiguration.
- DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
- getConfig().setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
- getConfig().setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
- }
-
- LOG.info("Starting NM: " + index);
- nodeManagers[index].init(getConfig());
- new Thread() {
- public void run() {
- nodeManagers[index].start();
- };
- }.start();
- int waitCount = 0;
- while (nodeManagers[index].getServiceState() == STATE.INITED
- && waitCount++ < 60) {
- LOG.info("Waiting for NM " + index + " to start...");
- Thread.sleep(1000);
- }
- if (nodeManagers[index].getServiceState() != STATE.STARTED) {
- // RM could have failed.
- throw new IOException("NodeManager " + index + " failed to start");
- }
- super.serviceStart();
- } catch (Throwable t) {
- throw new YarnRuntimeException(t);
- }
- }
-
- @Override
- public synchronized void serviceStop() throws Exception {
- if (nodeManagers[index] != null) {
- nodeManagers[index].stop();
- }
- super.serviceStop();
- }
- }
-
- private class CustomNodeManager extends NodeManager {
- @Override
- protected void doSecureLogin() throws IOException {
- // Don't try to login using keytab in the testcase.
- };
-
- @Override
- protected NodeStatusUpdater createNodeStatusUpdater(Context context,
- Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
- return new NodeStatusUpdaterImpl(context, dispatcher,
- healthChecker, metrics) {
- @Override
- protected ResourceTracker getRMClient() {
- final ResourceTrackerService rt = resourceManager
- .getResourceTrackerService();
- final RecordFactory recordFactory =
- RecordFactoryProvider.getRecordFactory(null);
-
- // For in-process communication without RPC
- return new ResourceTracker() {
-
- @Override
- public NodeHeartbeatResponse nodeHeartbeat(
- NodeHeartbeatRequest request) throws YarnException,
- IOException {
- NodeHeartbeatResponse response = recordFactory.newRecordInstance(
- NodeHeartbeatResponse.class);
- try {
- response = rt.nodeHeartbeat(request);
- } catch (YarnException e) {
- LOG.info("Exception in heartbeat from node " +
- request.getNodeStatus().getNodeId(), e);
- throw e;
- }
- return response;
- }
-
- @Override
- public RegisterNodeManagerResponse registerNodeManager(
- RegisterNodeManagerRequest request)
- throws YarnException, IOException {
- RegisterNodeManagerResponse response = recordFactory.
- newRecordInstance(RegisterNodeManagerResponse.class);
- try {
- response = rt.registerNodeManager(request);
- } catch (YarnException e) {
- LOG.info("Exception in node registration from "
- + request.getNodeId().toString(), e);
- throw e;
- }
- return response;
- }
- };
- };
-
- @Override
- protected void stopRMProxy() {
- return;
- }
- };
- };
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/README.txt b/solr/contrib/morphlines-cell/README.txt
deleted file mode 100644
index a3a1ba9..0000000
--- a/solr/contrib/morphlines-cell/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Apache Solr Morphlines-Cell
-
-*Experimental* - This contrib is currently subject to change in ways that may
-break back compatibility.
-
-This contrib provides a variety of Kite Morphlines features for Solr Cell type functionality.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/build.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/build.xml b/solr/contrib/morphlines-cell/build.xml
deleted file mode 100644
index 397472d..0000000
--- a/solr/contrib/morphlines-cell/build.xml
+++ /dev/null
@@ -1,144 +0,0 @@
-<?xml version="1.0"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project name="solr-morphlines-cell" default="default">
-
- <description>
- Solr Cell Morphline commands.
- </description>
-
- <import file="../contrib-build.xml"/>
-
- <solr-contrib-uptodate name="extraction"
- property="solr-extraction.uptodate"
- classpath.property="solr-cell.jar"/>
-
- <target name="compile-solr-extraction" unless="solr-extraction.uptodate">
- <ant dir="${common-solr.dir}/contrib/extraction" target="compile-core" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <solr-contrib-uptodate name="morphlines-core"
- property="solr-morphlines-core.uptodate"/>
-
- <target name="compile-morphlines-core" unless="solr-morphlines-core.uptodate">
- <ant dir="${common-solr.dir}/contrib/morphlines-core" target="compile-core" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- <ant dir="${common-solr.dir}/contrib/morphlines-core" target="compile-test" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <solr-contrib-uptodate name="map-reduce"
- property="solr-map-reduce.uptodate"
- classpath.property="MapReduceIndexerTool.jar"/>
- <target name="compile-map-reduce" unless="solr-map-reduce.uptodate">
-
- <ant dir="${common-solr.dir}/contrib/map-reduce" target="compile-core" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="resolve-extraction-libs">
- <ant dir="${common-solr.dir}/contrib/extraction" target="resolve" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="resolve-morphlines-core-libs">
- <ant dir="${common-solr.dir}/contrib/morphlines-core" target="resolve" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="resolve-map-reduce-libs">
- <ant dir="${common-solr.dir}/contrib/map-reduce" target="resolve" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <path id="classpath.additions">
- <pathelement location="${common-solr.dir}/build/contrib/solr-cell/classes/java"/>
- <fileset dir="${common-solr.dir}/contrib/extraction/lib" excludes="${common.classpath.excludes}"/>
- <pathelement location="${common-solr.dir}/build/contrib/solr-morphlines-core/classes/java"/>
- <fileset dir="${common-solr.dir}/contrib/morphlines-core/lib" excludes="${common.classpath.excludes}"/>
- <!-- <pathelement location="${common-solr.dir}/build/contrib/solr-map-reduce/classes/java"/> -->
- <!-- <fileset dir="${common-solr.dir}/contrib/map-reduce/lib" excludes="${common.classpath.excludes}"/> -->
- </path>
-
- <path id="classpath">
- <path refid="solr.base.classpath"/>
- <path refid="classpath.additions"/>
- </path>
-
- <path id="test.classpath">
- <path refid="solr.test.base.classpath"/>
- <path refid="classpath.additions"/>
- <pathelement location="${common-solr.dir}/build/contrib/solr-morphlines-core/classes/test"/>
- <pathelement location="${common-solr.dir}/contrib/morphlines-core/src/test-files"/>
- <fileset dir="${common-solr.dir}/contrib/morphlines-core/test-lib" excludes="${common.classpath.excludes}"/>
- </path>
-
- <path id="javadoc.classpath">
- <path refid="junit-path"/>
- <path refid="classpath"/>
- <pathelement location="${ant.home}/lib/ant.jar"/>
- <fileset dir=".">
- <exclude name="build/**/*.jar"/>
- <include name="**/lib/*.jar"/>
- </fileset>
- </path>
-
- <!-- TODO: make this nicer like lucene? -->
- <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,javadocs-extraction,javadocs-morphlines-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
- <sequential>
- <mkdir dir="${javadoc.dir}/${name}"/>
- <solr-invoke-javadoc>
- <solrsources>
- <packageset dir="${src.dir}"/>
- </solrsources>
- <links>
- <link href="../solr-solrj"/>
- <link href="../solr-core"/>
- <link href="../solr-cell"/>
- <link href="../solr-morphlines-core"/>
- </links>
- </solr-invoke-javadoc>
- <solr-jarify basedir="${javadoc.dir}/${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
- </sequential>
- </target>
-
- <target name="javadocs-extraction">
- <ant dir="${common-solr.dir}/contrib/extraction" target="javadocs" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="javadocs-morphlines-core">
- <ant dir="${common-solr.dir}/contrib/morphlines-core" target="javadocs" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="compile-core" depends="resolve-extraction-libs, resolve-morphlines-core-libs, resolve-map-reduce-libs, compile-solr-extraction, compile-morphlines-core, solr-contrib-build.compile-core"/>
- <target name="dist" depends="common-solr.dist"/>
-
-</project>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/ivy.xml b/solr/contrib/morphlines-cell/ivy.xml
deleted file mode 100644
index c090f21..0000000
--- a/solr/contrib/morphlines-cell/ivy.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-<ivy-module version="2.0">
- <info organisation="org.apache.solr" module="morphlines-cell" />
- <configurations defaultconfmapping="compile->master;test->master">
- <conf name="compile" transitive="false" />
- <conf name="test" transitive="false" />
- </configurations>
-
- <dependencies>
- <dependency org="org.kitesdk" name="kite-morphlines-tika-core" rev="${/org.kitesdk/kite-morphlines-tika-core}" conf="compile" />
- <dependency org="org.kitesdk" name="kite-morphlines-tika-decompress" rev="${/org.kitesdk/kite-morphlines-tika-decompress}" conf="compile" />
- <dependency org="org.kitesdk" name="kite-morphlines-json" rev="${/org.kitesdk/kite-morphlines-json}" conf="compile" />
- <dependency org="org.kitesdk" name="kite-morphlines-twitter" rev="${/org.kitesdk/kite-morphlines-twitter}" conf="compile" />
-
- <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}" />
-
- </dependencies>
-</ivy-module>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
deleted file mode 100644
index d4483a5..0000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.IllformedLocaleException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
-import java.util.stream.Collectors;
-
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.io.Closeables;
-import com.typesafe.config.Config;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.common.params.MultiMapSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.handler.extraction.ExtractingParams;
-import org.apache.solr.handler.extraction.ExtractionDateUtil;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.morphlines.solr.SolrLocator;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.tika.sax.xpath.Matcher;
-import org.apache.tika.sax.xpath.MatchingContentHandler;
-import org.apache.tika.sax.xpath.XPathParser;
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.Configs;
-import org.kitesdk.morphline.base.Fields;
-import org.kitesdk.morphline.stdio.AbstractParser;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
- * the Tika output back to a record using SolrCell.
- * <p>
- * The Tika parser is chosen from the configurable list of parsers, depending on the MIME type
- * specified in the input record. Typically, this requires an upstream DetectMimeTypeBuilder
- * in a prior command.
- */
-public final class SolrCellBuilder implements CommandBuilder {
-
- @Override
- public Collection<String> getNames() {
- return Collections.singletonList("solrCell");
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new SolrCell(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class SolrCell extends AbstractParser {
-
- private final IndexSchema schema;
- private final List<String> dateFormats;
- private final String xpathExpr;
- private final List<Parser> parsers = new ArrayList<>();
- private final SolrContentHandlerFactory solrContentHandlerFactory;
- private final Locale locale;
-
- private final SolrParams solrParams;
- private final Map<MediaType, Parser> mediaTypeToParserMap;
-
- private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
-
- public static final String ADDITIONAL_SUPPORTED_MIME_TYPES = "additionalSupportedMimeTypes";
-
- public SolrCell(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
-
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- this.schema = Objects.requireNonNull(locator.getIndexSchema());
- if (LOG.isTraceEnabled()) {
- LOG.trace("Solr schema: \n" + schema.getFields().entrySet().stream()
- .sorted(Map.Entry.comparingByKey()).map(Map.Entry::getValue).map(Object::toString)
- .collect(Collectors.joining("\n")));
- }
-
- ListMultimap<String, String> cellParams = ArrayListMultimap.create();
- String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null);
- if (uprefix != null) {
- cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix);
- }
- for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.<String>emptyList())) {
- cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture);
- }
- Config fmapConfig = getConfigs().getConfig(config, "fmap", null);
- if (fmapConfig != null) {
- for (Map.Entry<String, Object> entry : new Configs().getEntrySet(fmapConfig)) {
- cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString());
- }
- }
- String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null);
- if (captureAttributes != null) {
- cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes);
- }
- String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null);
- if (lowerNames != null) {
- cellParams.put(ExtractingParams.LOWERNAMES, lowerNames);
- }
- String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null);
- if (defaultField != null) {
- cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField);
- }
- xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null);
- if (xpathExpr != null) {
- cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
- }
-
- this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
-
- String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
- Class<? extends SolrContentHandlerFactory> factoryClass;
- try {
- factoryClass = Class.forName(handlerStr).asSubclass(SolrContentHandlerFactory.class);
- } catch (ClassNotFoundException cnfe) {
- throw new MorphlineCompilationException("Could not find class "
- + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe);
- }
- this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
-
- this.locale = getLocale(getConfigs().getString(config, "locale", null));
-
- this.mediaTypeToParserMap = new HashMap<>();
- //MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
-
- List<? extends Config> parserConfigs = getConfigs().getConfigList(config, "parsers");
- for (Config parserConfig : parserConfigs) {
- String parserClassName = getConfigs().getString(parserConfig, "parser");
-
- Object obj;
- try {
- obj = Class.forName(parserClassName).newInstance();
- } catch (Throwable e) {
- throw new MorphlineCompilationException("Cannot instantiate Tika parser: " + parserClassName, config, e);
- }
- if (!(obj instanceof Parser)) {
- throw new MorphlineCompilationException("Tika parser " + obj.getClass().getName()
- + " must be an instance of class " + Parser.class.getName(), config);
- }
- Parser parser = (Parser) obj;
- this.parsers.add(parser);
-
- List<String> mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
- for (String mediaTypeStr : mediaTypes) {
- MediaType mediaType = parseMediaType(mediaTypeStr);
- addSupportedMimeType(mediaTypeStr);
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
-
- if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) {
- for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) {
- mediaType = mediaType.getBaseType();
- addSupportedMimeType(mediaType.toString());
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
- List<String> extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
- for (String mediaTypeStr : extras) {
- MediaType mediaType = parseMediaType(mediaTypeStr);
- addSupportedMimeType(mediaTypeStr);
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
- }
- }
- //LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap);
-
- Map<String, String[]> tmp = new HashMap<>();
- for (Map.Entry<String,Collection<String>> entry : cellParams.asMap().entrySet()) {
- tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()]));
- }
- this.solrParams = new MultiMapSolrParams(tmp);
- validateArguments();
- }
-
- @Override
- protected boolean doProcess(Record record, InputStream inputStream) {
- Parser parser = detectParser(record);
- if (parser == null) {
- return false;
- }
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(Locale.class, locale);
-
- Metadata metadata = new Metadata();
- for (Entry<String, Object> entry : record.getFields().entries()) {
- metadata.add(entry.getKey(), entry.getValue().toString());
- }
-
- SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema);
- try {
- inputStream = TikaInputStream.get(inputStream);
-
- ContentHandler parsingHandler = handler;
-
- // String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
- if (xpathExpr != null) {
- Matcher matcher = PARSER.parse(xpathExpr);
- parsingHandler = new MatchingContentHandler(parsingHandler, matcher);
- }
-
- try {
- parser.parse(inputStream, parsingHandler, metadata, parseContext);
- } catch (IOException | TikaException | SAXException e) {
- throw new MorphlineRuntimeException("Cannot parse", e);
- }
- } finally {
- if (inputStream != null) {
- Closeables.closeQuietly(inputStream);
- }
- }
-
- SolrInputDocument doc = handler.newDocument();
- LOG.debug("solr doc: {}", doc);
- Record outputRecord = toRecord(doc);
- return getChild().process(outputRecord);
- }
-
- private Parser detectParser(Record record) {
- if (!hasAtLeastOneMimeType(record)) {
- return null;
- }
- String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE);
- assert mediaTypeStr != null;
-
- MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType();
- Parser parser = mediaTypeToParserMap.get(mediaType); // fast path
- if (parser != null) {
- return parser;
- }
- // wildcard matching
- for (Map.Entry<MediaType, Parser> entry : mediaTypeToParserMap.entrySet()) {
- if (isMediaTypeMatch(mediaType, entry.getKey())) {
- return entry.getValue();
- }
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("No supported MIME type parser found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr);
- }
- return null;
- }
-
- private boolean hasAtLeastOneMimeType(Record record) {
- if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) {
- LOG.debug("Command failed because of missing MIME type for record: {}", record);
- return false;
- }
- return true;
- }
-
- private MediaType parseMediaType(String mediaTypeStr) {
- MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT));
- return mediaType.getBaseType();
- };
-
- /** Returns true if mediaType falls withing the given range (pattern), false otherwise */
- private boolean isMediaTypeMatch(MediaType mediaType, MediaType rangePattern) {
- String WILDCARD = "*";
- String rangePatternType = rangePattern.getType();
- String rangePatternSubtype = rangePattern.getSubtype();
- return (rangePatternType.equals(WILDCARD) || rangePatternType.equals(mediaType.getType()))
- && (rangePatternSubtype.equals(WILDCARD) || rangePatternSubtype.equals(mediaType.getSubtype()));
- }
-
- private static SolrContentHandlerFactory getSolrContentHandlerFactory(
- Class<? extends SolrContentHandlerFactory> factoryClass, Collection<String> dateFormats, Config config) {
- try {
- return factoryClass.getConstructor(Collection.class).newInstance(dateFormats);
- } catch (NoSuchMethodException nsme) {
- throw new MorphlineCompilationException("Unable to find valid constructor of type "
- + factoryClass.getName() + " for creating SolrContentHandler", config, nsme);
- } catch (Exception e) {
- throw new MorphlineCompilationException("Unexpected exception when trying to create SolrContentHandlerFactory of type "
- + factoryClass.getName(), config, e);
- }
- }
-
- private Record toRecord(SolrInputDocument doc) {
- Record record = new Record();
- for (Entry<String, SolrInputField> entry : doc.entrySet()) {
- record.getFields().putAll(entry.getKey(), entry.getValue().getValues());
- }
- return record;
- }
-
- @SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
- private Locale getLocale(String name) {
- if (name == null) {
- return Locale.ROOT;
- }
- for (Locale locale : Locale.getAvailableLocales()) {
- if (locale.toString().equals(name)) {
- return locale;
- }
- }
- try {
- return new Locale.Builder().setLanguageTag(name).build();
- } catch (IllformedLocaleException ex) {
- throw new MorphlineCompilationException("Malformed / non-existent locale: " + name, getConfig(), ex);
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
deleted file mode 100644
index 81f49af..0000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.util.Collection;
-
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
-
-/**
- * {@link SolrContentHandler} and associated factory that strips non-characters and trims on output.
- * This prevents exceptions on parsing integer fields inside Solr server.
- */
-public class StripNonCharSolrContentHandlerFactory extends SolrContentHandlerFactory {
-
- public StripNonCharSolrContentHandlerFactory(Collection<String> dateFormats) {
- super(dateFormats);
- }
-
- @Override
- public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
- return new StripNonCharSolrContentHandler(metadata, params, schema, dateFormats);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class StripNonCharSolrContentHandler extends SolrContentHandler {
-
- public StripNonCharSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection<String> dateFormats) {
- super(metadata, params, schema, dateFormats);
- }
-
- /**
- * Strip all non-characters, which can cause SolrReducer problems if present.
- * This is borrowed from Apache Nutch.
- */
- private static String stripNonCharCodepoints(String input) {
- StringBuilder stripped = new StringBuilder(input.length());
- char ch;
- for (int i = 0; i < input.length(); i++) {
- ch = input.charAt(i);
- // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
- // and non-printable control characters except tabulator, new line and carriage return
- if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
- ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
- (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
- (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
- stripped.append(ch);
- }
- }
- return stripped.toString();
- }
-
- @Override
- protected String transformValue(String val, SchemaField schemaField) {
- String ret = super.transformValue(val, schemaField).trim();
- ret = stripNonCharCodepoints(ret);
- return ret;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
deleted file mode 100644
index 6e7df59..0000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.util.Collection;
-
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
-
-/**
- * {@link SolrContentHandler} and associated factory that trims field values on output.
- * This prevents exceptions on parsing integer fields inside Solr server.
- */
-public class TrimSolrContentHandlerFactory extends SolrContentHandlerFactory {
-
- public TrimSolrContentHandlerFactory(Collection<String> dateFormats) {
- super(dateFormats);
- }
-
- @Override
- public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
- return new TrimSolrContentHandler(metadata, params, schema, dateFormats);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class TrimSolrContentHandler extends SolrContentHandler {
-
- public TrimSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection<String> dateFormats) {
- super(metadata, params, schema, dateFormats);
- }
-
- @Override
- protected String transformValue(String val, SchemaField schemaField) {
- return super.transformValue(val, schemaField).trim();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java
deleted file mode 100644
index 0f44a70..0000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Morphlines Solr Cell related code.
- */
-package org.apache.solr.morphlines.cell;
-
-
-
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/java/overview.html
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/overview.html b/solr/contrib/morphlines-cell/src/java/overview.html
deleted file mode 100644
index 3e25367..0000000
--- a/solr/contrib/morphlines-cell/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-Apache Solr Search Server: Solr Cell Morphline Commands
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/test-files/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/test-files/README.txt b/solr/contrib/morphlines-cell/src/test-files/README.txt
deleted file mode 100644
index 8905df2..0000000
--- a/solr/contrib/morphlines-cell/src/test-files/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-The test-files by this module are located in the morphlines-core module.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
deleted file mode 100644
index e0872b6..0000000
--- a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.lucene.util.Constants;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.handler.extraction.ExtractionDateUtil;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.metadata.Metadata;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
-
- private Map<String,Integer> expectedRecords = new HashMap<>();
- private Map<String, Map<String, Object>> expectedRecordContents = new HashMap<>();
-
- @BeforeClass
- public static void beforeClass2() {
- assumeFalse("FIXME: Morphlines currently has issues with Windows paths", Constants.WINDOWS);
- }
-
- @Before
- public void setUp() throws Exception {
- super.setUp();
-
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- expectedRecords.put(path + "sample-statuses-20120906-141433.avro", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433.gz", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433.bz2", 2);
- expectedRecords.put(path + "cars.csv", 6);
- expectedRecords.put(path + "cars.csv.gz", 6);
- expectedRecords.put(path + "cars.tar.gz", 4);
- expectedRecords.put(path + "cars.tsv", 6);
- expectedRecords.put(path + "cars.ssv", 6);
- expectedRecords.put(path + "test-documents.7z", 9);
- expectedRecords.put(path + "test-documents.cpio", 9);
- expectedRecords.put(path + "test-documents.tar", 9);
- expectedRecords.put(path + "test-documents.tbz2", 9);
- expectedRecords.put(path + "test-documents.tgz", 9);
- expectedRecords.put(path + "test-documents.zip", 9);
- expectedRecords.put(path + "multiline-stacktrace.log", 4);
-
- {
- Map<String, Object> record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "image/jpeg");
- record.put("ignored_exif_isospeedratings", "400");
- record.put("ignored_meta_creation_date", "2009-08-11T09:09:45");
- record.put("ignored_tiff_model", "Canon EOS 40D");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put("/testJPEG_EXIF.jpg", record);
- expectedRecordContents.put("/testJPEG_EXIF.jpg.tar", record);
- expectedRecordContents.put("/testJPEG_EXIF.jpg.tar.gz", record);
- }
-
- {
- String file = path + "testWORD_various.doc";
- Map<String, Object> record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/msword");
- record.put("ignored_author", "Michael McCandless");
- record.put("ignored_creation_date", "2011-09-02T10:11:00Z");
- record.put("ignored_title", "");
- record.put("ignored_keywords", "Keyword1 Keyword2");
- record.put("ignored_subject", "Subject is here");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "testPDF.pdf";
- Map<String, Object> record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/pdf");
- record.put("ignored_author", "Bertrand Delacr�taz");
- record.put("ignored_creation_date", "2007-09-15T09:02:31Z");
- record.put("ignored_title", "Apache Tika - Apache Tika");
- record.put("ignored_xmp_creatortool", "Firefox");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "email.eml";
- Map<String, Object> record = new LinkedHashMap();
- String name = "Patrick Foo <fo...@cloudera.com>";
- record.put("ignored__attachment_mimetype", "message/rfc822");
- record.put("ignored_author", name);
- //record.put("ignored_content_length", "1068");
- record.put("ignored_creation_date", "2013-11-27T20:01:23Z");
- record.put("ignored_message_from", name);
- record.put("ignored_message_to", name);
- record.put("ignored_creator", name);
- record.put("ignored_dc_creator", name);
- record.put("ignored_dc_title", "Test EML");
- record.put("ignored_dcterms_created", "2013-11-27T20:01:23Z");
- record.put("ignored_meta_author", name);
- record.put("ignored_meta_creation_date", "2013-11-27T20:01:23Z");
- record.put("ignored_subject", "Test EML");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "testEXCEL.xlsx";
- Map<String, Object> record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- record.put("ignored_author", "Keith Bennett");
- record.put("ignored_creation_date", "2007-10-01T16:13:56Z");
- record.put("ignored_title", "Simple Excel document");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-6489")
- public void testSolrCellJPGCompressed() throws Exception {
- morphline = createMorphline("test-morphlines" + File.separator + "solrCellJPGCompressed");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testJPEG_EXIF.jpg",
- path + "testJPEG_EXIF.jpg.gz",
- path + "testJPEG_EXIF.jpg.tar.gz",
- //path + "jpeg2000.jp2",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- public void testSolrCellXML() throws Exception {
- morphline = createMorphline("test-morphlines" + File.separator + "solrCellXML");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testXML2.xml",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-6489")
- public void testSolrCellDocumentTypes() throws Exception {
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
-
- morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testBMPfp.txt",
- path + "boilerplate.html",
- path + "NullHeader.docx",
- path + "testWORD_various.doc",
- path + "testPDF.pdf",
- path + "testJPEG_EXIF.jpg",
- path + "testJPEG_EXIF.jpg.gz",
- path + "testJPEG_EXIF.jpg.tar.gz",
- path + "testXML.xml",
- path + "cars.csv",
-// path + "cars.tsv",
-// path + "cars.ssv",
- path + "cars.csv.gz",
- path + "cars.tar.gz",
- path + "sample-statuses-20120906-141433.avro",
- path + "sample-statuses-20120906-141433",
- path + "sample-statuses-20120906-141433.gz",
- path + "sample-statuses-20120906-141433.bz2",
- path + "email.eml",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9220")
- public void testSolrCellDocumentTypes2() throws Exception {
-
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
-
- morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testPPT_various.ppt",
- path + "testPPT_various.pptx",
- path + "testEXCEL.xlsx",
- path + "testEXCEL.xls",
- path + "testPages.pages",
- //path + "testNumbers.numbers",
- //path + "testKeynote.key",
-
- path + "testRTFVarious.rtf",
- path + "complex.mbox",
- path + "test-outlook.msg",
- path + "testEMLX.emlx",
- path + "testRFC822",
- path + "rsstest.rss",
-// path + "testDITA.dita",
-
- path + "testMP3i18n.mp3",
- path + "testAIFF.aif",
- path + "testFLAC.flac",
-// path + "testFLAC.oga",
-// path + "testVORBIS.ogg",
- path + "testMP4.m4a",
- path + "testWAV.wav",
-// path + "testWMA.wma",
-
- path + "testFLV.flv",
-// path + "testWMV.wmv",
-
- path + "testBMP.bmp",
- path + "testPNG.png",
- path + "testPSD.psd",
- path + "testSVG.svg",
- path + "testTIFF.tif",
-
-// path + "test-documents.7z",
-// path + "test-documents.cpio",
-// path + "test-documents.tar",
-// path + "test-documents.tbz2",
-// path + "test-documents.tgz",
-// path + "test-documents.zip",
-// path + "test-zip-of-zip.zip",
-// path + "testJAR.jar",
-
-// path + "testKML.kml",
-// path + "testRDF.rdf",
- path + "testVISIO.vsd",
-// path + "testWAR.war",
-// path + "testWindows-x86-32.exe",
-// path + "testWINMAIL.dat",
-// path + "testWMF.wmf",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- /**
- * Test that the ContentHandler properly strips the illegal characters
- */
- @Test
- public void testTransformValue() {
- String fieldName = "user_name";
- assertFalse("foobar".equals(getFoobarWithNonChars()));
-
- Metadata metadata = new Metadata();
- // load illegal char string into a metadata field and generate a new document,
- // which will cause the ContentHandler to be invoked.
- metadata.set(fieldName, getFoobarWithNonChars());
- StripNonCharSolrContentHandlerFactory contentHandlerFactory =
- new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
- IndexSchema schema = h.getCore().getLatestSchema();
- SolrContentHandler contentHandler =
- contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
- SolrInputDocument doc = contentHandler.newDocument();
- String foobar = doc.getFieldValue(fieldName).toString();
- assertTrue("foobar".equals(foobar));
- }
-
- /**
- * Returns string "foobar" with illegal characters interspersed.
- */
- private String getFoobarWithNonChars() {
- char illegalChar = '\uffff';
- StringBuilder builder = new StringBuilder();
- builder.append(illegalChar).append(illegalChar).append("foo").append(illegalChar)
- .append(illegalChar).append("bar").append(illegalChar).append(illegalChar);
- return builder.toString();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/README.txt b/solr/contrib/morphlines-core/README.txt
deleted file mode 100644
index 0efa467..0000000
--- a/solr/contrib/morphlines-core/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Apache Solr Morphlines-Core
-
-*Experimental* - This contrib is currently subject to change in ways that may
-break back compatibility.
-
-This contrib provides a variety of Kite Morphlines features for Solr.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/build.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/build.xml b/solr/contrib/morphlines-core/build.xml
deleted file mode 100644
index 2cf6261..0000000
--- a/solr/contrib/morphlines-core/build.xml
+++ /dev/null
@@ -1,105 +0,0 @@
-<?xml version="1.0"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project name="solr-morphlines-core" default="default" xmlns:ivy="antlib:org.apache.ivy.ant">
-
- <description>
- Solr Morphlines commands.
- </description>
-
- <import file="../contrib-build.xml"/>
-
- <solr-contrib-uptodate name="extraction"
- property="solr-extraction.uptodate"
- classpath.property="solr-cell.jar"/>
-
- <target name="compile-solr-extraction" unless="solr-extraction.uptodate">
- <ant dir="${common-solr.dir}/contrib/extraction" target="compile-core" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="resolve-extraction-libs">
- <ant dir="${common-solr.dir}/contrib/extraction" target="resolve" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <path id="classpath.additions">
- <pathelement location="${common-solr.dir}/build/contrib/solr-cell/classes/java"/>
- <fileset dir="${common-solr.dir}/contrib/extraction/lib" excludes="${common.classpath.excludes}"/>
- </path>
-
- <path id="classpath">
- <path refid="solr.base.classpath"/>
- <path refid="classpath.additions"/>
- </path>
-
- <path id="test.classpath">
- <path refid="solr.test.base.classpath"/>
- <path refid="classpath.additions"/>
- <fileset dir="${test.lib.dir}" includes="*.jar"/>
- </path>
-
- <path id="javadoc.classpath">
- <path refid="junit-path"/>
- <path refid="classpath"/>
- <pathelement location="${ant.home}/lib/ant.jar"/>
- <fileset dir=".">
- <exclude name="build/**/*.jar"/>
- <include name="**/lib/*.jar"/>
- </fileset>
- </path>
-
- <!-- TODO: make this nicer like lucene? -->
- <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,javadocs-extraction,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
- <sequential>
- <mkdir dir="${javadoc.dir}/${name}"/>
- <solr-invoke-javadoc>
- <solrsources>
- <packageset dir="${src.dir}"/>
- </solrsources>
- <links>
- <link href="../solr-solrj"/>
- <link href="../solr-core"/>
- <link href="../solr-cell"/>
- </links>
- </solr-invoke-javadoc>
- <solr-jarify basedir="${javadoc.dir}/${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
- </sequential>
- </target>
-
- <target name="javadocs-extraction">
- <ant dir="${common-solr.dir}/contrib/extraction" target="javadocs" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="resolve" depends="ivy-availability-check,ivy-fail,ivy-configure">
- <sequential>
- <ivy:retrieve conf="compile" type="jar,bundle" sync="${ivy.sync}" log="download-only" symlink="${ivy.symlink}"/>
- <ivy:retrieve conf="test,test.DfsMiniCluster" type="jar,bundle,test" sync="${ivy.sync}" log="download-only" symlink="${ivy.symlink}"
- pattern="${test.lib.dir}/[artifact]-[revision](-[classifier]).[ext]"/>
- </sequential>
- </target>
-
- <target name="compile-core" depends="resolve-extraction-libs, compile-solr-extraction, solr-contrib-build.compile-core"/>
- <target name="dist" depends="common-solr.dist"/>
-
-</project>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/ivy.xml b/solr/contrib/morphlines-core/ivy.xml
deleted file mode 100644
index ad47aec..0000000
--- a/solr/contrib/morphlines-core/ivy.xml
+++ /dev/null
@@ -1,128 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-<ivy-module version="2.0" xmlns:maven="http://ant.apache.org/ivy/maven">
- <info organisation="org.apache.solr" module="morphlines-core" />
- <configurations defaultconfmapping="compile->master;test->master;test.DfsMiniCluster->master">
- <!-- artifacts in the "compile" configuration will go into morphlines-core/lib/ -->
- <conf name="compile" transitive="false" />
- <!-- artifacts in the "test" and "test.DfsMiniCluster" configuration will go into morphlines-core/test-lib/ -->
- <conf name="test" transitive="false" />
- <conf name="test.DfsMiniCluster" transitive="false" />
- </configurations>
-
- <dependencies>
- <dependency org="org.kitesdk" name="kite-morphlines-core" rev="${/org.kitesdk/kite-morphlines-core}" conf="compile;test">
- <artifact name="kite-morphlines-core" ext="jar" />
- <artifact name="kite-morphlines-core" type="test" ext="jar" maven:classifier="tests" />
- </dependency>
-
- <dependency org="org.kitesdk" name="kite-morphlines-avro" rev="${/org.kitesdk/kite-morphlines-avro}" conf="compile" />
-
- <dependency org="io.dropwizard.metrics" name="metrics-core" rev="${/io.dropwizard.metrics/metrics-core}" conf="compile" />
- <dependency org="io.dropwizard.metrics" name="metrics-healthchecks" rev="${/io.dropwizard.metrics/metrics-healthchecks}" conf="compile" />
- <dependency org="com.typesafe" name="config" rev="${/com.typesafe/config}" conf="compile" />
-
- <!-- Test Dependencies -->
-
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="${/org.apache.hadoop/hadoop-mapreduce-client-core}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-common" rev="${/org.apache.hadoop/hadoop-yarn-common}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-api" rev="${/org.apache.hadoop/hadoop-yarn-api}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-client" rev="${/org.apache.hadoop/hadoop-yarn-client}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-tests" rev="${/org.apache.hadoop/hadoop-yarn-server-tests}" conf="test">
- <artifact name="hadoop-yarn-server-tests" type="test" ext="jar" maven:classifier="tests" />
- </dependency>
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-common" rev="${/org.apache.hadoop/hadoop-yarn-server-common}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-nodemanager" rev="${/org.apache.hadoop/hadoop-yarn-server-nodemanager}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-app" rev="${/org.apache.hadoop/hadoop-mapreduce-client-app}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" rev="${/org.apache.hadoop/hadoop-mapreduce-client-common}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-hs" rev="${/org.apache.hadoop/hadoop-mapreduce-client-hs}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-resourcemanager" rev="${/org.apache.hadoop/hadoop-yarn-server-resourcemanager}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-shuffle" rev="${/org.apache.hadoop/hadoop-mapreduce-client-shuffle}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-web-proxy" rev="${/org.apache.hadoop/hadoop-yarn-server-web-proxy}" conf="test" />
- <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="${/org.apache.hadoop/hadoop-mapreduce-client-jobclient}" conf="test">
- <artifact name="hadoop-mapreduce-client-jobclient" type="jar" ext="jar" />
- <artifact name="hadoop-mapreduce-client-jobclient" type="test" ext="jar" maven:classifier="tests" />
- </dependency>
-
- <dependency org="org.apache.hadoop" name="hadoop-yarn-server-applicationhistoryservice" rev="${/org.apache.hadoop/hadoop-yarn-server-applicationhistoryservice}" conf="test"/>
- <dependency org="org.fusesource.leveldbjni" name="leveldbjni" rev="${/org.fusesource.leveldbjni/leveldbjni}" conf="test"/>
- <dependency org="org.iq80.leveldb" name="leveldb" rev="${/org.iq80.leveldb/leveldb}" conf="test.DfsMiniCluster"/>
- <dependency org="org.iq80.leveldb" name="leveldb-api" rev="${/org.iq80.leveldb/leveldb-api}" conf="test.DfsMiniCluster"/>
- <dependency org="org.apache.curator" name="curator-framework" rev="${/org.apache.curator/curator-framework}" conf="test"/>
- <dependency org="org.apache.curator" name="curator-client" rev="${/org.apache.curator/curator-client}" conf="test"/>
-
- <dependency org="aopalliance" name="aopalliance" rev="${/aopalliance/aopalliance}" conf="test" />
- <dependency org="com.sun.xml.bind" name="jaxb-impl" rev="${/com.sun.xml.bind/jaxb-impl}" conf="test" />
- <dependency org="io.netty" name="netty-all" rev="${/io.netty/netty-all}" conf="test" />
- <dependency org="org.apache.mrunit" name="mrunit" rev="${/org.apache.mrunit/mrunit}" conf="test">
- <artifact name="mrunit" maven:classifier="hadoop2" />
- <exclude org="log4j" module="log4j" />
- </dependency>
-
- <!-- Mocking -->
- <dependency org="org.mockito" name="mockito-core" rev="${/org.mockito/mockito-core}" conf="test"/>
- <dependency org="net.bytebuddy" name="byte-buddy" rev="${/net.bytebuddy/byte-buddy}" conf="test"/>
- <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
-
- <dependency org="commons-collections" name="commons-collections" rev="${/commons-collections/commons-collections}" conf="test" />
-
- <!-- FasterXml Jackson Dependencies -->
- <dependency org="com.fasterxml.jackson.core" name="jackson-core" rev="${/com.fasterxml.jackson.core/jackson-core}" conf="test" />
- <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="${/com.fasterxml.jackson.core/jackson-databind}" conf="test" />
- <dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="${/com.fasterxml.jackson.core/jackson-annotations}" conf="test" />
-
- <!-- CodeHaus Jackson Dependencies -->
- <dependency org="org.codehaus.jackson" name="jackson-jaxrs" rev="${/org.codehaus.jackson/jackson-jaxrs}" conf="test" />
- <dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="${/org.codehaus.jackson/jackson-mapper-asl}" conf="test" />
- <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${/org.codehaus.jackson/jackson-core-asl}" conf="test" />
-
- <!-- Jersey Dependencies -->
- <dependency org="com.sun.jersey.contribs" name="jersey-guice" rev="${/com.sun.jersey.contribs/jersey-guice}" conf="test" />
- <dependency org="com.sun.jersey" name="jersey-core" rev="${/com.sun.jersey/jersey-core}" conf="test" />
- <dependency org="com.sun.jersey" name="jersey-json" rev="${/com.sun.jersey/jersey-json}" conf="test" />
- <dependency org="com.sun.jersey" name="jersey-server" rev="${/com.sun.jersey/jersey-server}" conf="test" />
- <dependency org="com.sun.jersey" name="jersey-bundle" rev="${/com.sun.jersey/jersey-bundle}" conf="test" />
-
- <!-- Guice Dependencies -->
- <dependency org="com.google.inject" name="guice" rev="${/com.google.inject/guice}" conf="test" />
- <dependency org="com.google.inject.extensions" name="guice-servlet" rev="${/com.google.inject.extensions/guice-servlet}" conf="test" />
- <dependency org="javax.inject" name="javax.inject" rev="${/javax.inject/javax.inject}" conf="test" />
-
- <!-- Avro Dependencies -->
- <dependency org="org.apache.avro" name="avro" rev="${/org.apache.avro/avro}" conf="test" />
- <dependency org="com.thoughtworks.paranamer" name="paranamer" rev="${/com.thoughtworks.paranamer/paranamer}" conf="test" />
- <dependency org="org.xerial.snappy" name="snappy-java" rev="${/org.xerial.snappy/snappy-java}" conf="test" />
-
- <!-- Hadoop DfsMiniCluster Dependencies -->
- <dependency org="org.apache.hadoop" name="hadoop-common" rev="${/org.apache.hadoop/hadoop-common}" conf="test.DfsMiniCluster">
- <artifact name="hadoop-common" type="jar" ext="jar" />
- <artifact name="hadoop-common" type="test" ext="jar" maven:classifier="tests" />
- </dependency>
- <dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="${/org.apache.hadoop/hadoop-hdfs}" conf="test.DfsMiniCluster">
- <artifact name="hadoop-hdfs" type="test" ext="jar" maven:classifier="tests" />
- </dependency>
- <dependency org="org.mortbay.jetty" name="jetty" rev="${/org.mortbay.jetty/jetty}" conf="test.DfsMiniCluster" />
- <dependency org="org.mortbay.jetty" name="jetty-util" rev="${/org.mortbay.jetty/jetty-util}" conf="test.DfsMiniCluster" />
- <dependency org="com.sun.jersey" name="jersey-core" rev="${/com.sun.jersey/jersey-core}" conf="test.DfsMiniCluster" />
- <dependency org="org.apache.htrace" name="htrace-core" rev="${/org.apache.htrace/htrace-core}" conf="test.DfsMiniCluster"/>
-
- <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}" />
-
- </dependencies>
-</ivy-module>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
deleted file mode 100644
index f303024..0000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.IOException;
-
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * A vehicle to load a list of Solr documents into some kind of destination,
- * such as a SolrServer or MapReduce RecordWriter.
- */
-public interface DocumentLoader {
-
- /** Begins a transaction */
- public void beginTransaction() throws IOException, SolrServerException;
-
- /** Loads the given document into the destination */
- public void load(SolrInputDocument doc) throws IOException, SolrServerException;
-
- /**
- * Sends any outstanding documents to the destination and waits for a positive
- * or negative ack (i.e. exception). Depending on the outcome the caller
- * should then commit or rollback the current flume transaction
- * correspondingly.
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public void commitTransaction() throws IOException, SolrServerException;
-
- /**
- * Performs a rollback of all non-committed documents pending.
- * <p>
- * Note that this is not a true rollback as in databases. Content you have
- * previously added may have already been committed due to autoCommit, buffer
- * full, other client performing a commit etc. So this is only a best-effort
- * rollback.
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public UpdateResponse rollbackTransaction() throws IOException, SolrServerException;
-
- /** Releases allocated resources */
- public void shutdown() throws IOException, SolrServerException;
-
- /**
- * Issues a ping request to check if the server is alive
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public SolrPingResponse ping() throws IOException, SolrServerException;
-
-}