You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by am...@apache.org on 2016/01/03 18:40:59 UTC

[01/21] incubator-asterixdb git commit: First stage of external data cleanup

Repository: incubator-asterixdb
Updated Branches:
  refs/heads/master 1d5cf6403 -> 284590ed9


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapter.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapter.java
deleted file mode 100644
index 7dd5130..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapter.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.ServerSocket;
-import java.net.Socket;
-import java.util.logging.Level;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.external.dataset.adapter.StreamBasedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public class GenericSocketFeedAdapter extends StreamBasedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private final int port;
-    private SocketFeedServer socketFeedServer;
-
-    public GenericSocketFeedAdapter(ITupleParserFactory parserFactory, ARecordType outputType, int port,
-            IHyracksTaskContext ctx, int partition) throws AsterixException, IOException {
-        super(parserFactory, outputType, ctx, partition);
-        this.port = port;
-        this.socketFeedServer = new SocketFeedServer(outputType, port);
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        super.start(partition, writer);
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        return socketFeedServer.getInputStream();
-    }
-
-    private static class SocketFeedServer {
-        private ServerSocket serverSocket;
-        private InputStream inputStream;
-
-        public SocketFeedServer(ARecordType outputtype, int port) throws IOException, AsterixException {
-            try {
-                serverSocket = new ServerSocket(port);
-            } catch (Exception e) {
-                if (LOGGER.isLoggable(Level.INFO)) {
-                    LOGGER.info("port: " + port + " unusable ");
-                }
-            }
-            if (LOGGER.isLoggable(Level.INFO)) {
-                LOGGER.info("Feed server configured to use port: " + port);
-            }
-        }
-
-        public InputStream getInputStream() {
-            Socket socket;
-            try {
-                if (LOGGER.isLoggable(Level.INFO)) {
-                    LOGGER.info("waiting for client at " + serverSocket.getLocalPort());
-                }
-                socket = serverSocket.accept();
-                inputStream = socket.getInputStream();
-            } catch (IOException e) {
-                if (LOGGER.isLoggable(Level.SEVERE)) {
-                    LOGGER.severe("Unable to create input stream required for feed ingestion");
-                }
-            }
-            return inputStream;
-        }
-
-        public void stop() throws IOException {
-            try {
-                serverSocket.close();
-            } catch (IOException ioe) {
-                if (LOGGER.isLoggable(Level.WARNING)) {
-                    LOGGER.warning("Unable to close socket at " + serverSocket.getLocalPort());
-                }
-            }
-        }
-
-    }
-
-    @Override
-    public void stop() throws Exception {
-        socketFeedServer.stop();
-    }
-
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PUSH;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        try {
-            this.socketFeedServer = new SocketFeedServer((ARecordType) sourceDatatype, port);
-            return true;
-        } catch (Exception re) {
-            return false;
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapterFactory.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapterFactory.java
deleted file mode 100644
index 5d28f3d..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GenericSocketFeedAdapterFactory.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.commons.lang3.StringUtils;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
-import org.apache.asterix.external.adapter.factory.StreamBasedAdapterFactory;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.util.AsterixRuntimeUtil;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.algebricks.common.utils.Pair;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Factory class for creating @see{GenericSocketFeedAdapter} The
- * adapter listens at a port for receiving data (from external world).
- * Data received is transformed into Asterix Data Format (ADM).
- */
-public class GenericSocketFeedAdapterFactory extends StreamBasedAdapterFactory implements IFeedAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private ARecordType outputType;
-
-    private List<Pair<String, Integer>> sockets;
-
-    private Mode mode = Mode.IP;
-
-    public static final String KEY_SOCKETS = "sockets";
-
-    public static final String KEY_MODE = "address-type";
-
-    public static enum Mode {
-        NC,
-        IP
-    }
-
-    @Override
-    public String getName() {
-        return "socket_adapter";
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    public List<Pair<String, Integer>> getSockets() {
-        return sockets;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        this.configureSockets(configuration);
-        this.configureFormat(outputType);
-        this.outputType = (ARecordType) outputType;
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        List<String> locations = new ArrayList<String>();
-        for (Pair<String, Integer> socket : sockets) {
-            locations.add(socket.first);
-        }
-        return new AlgebricksAbsolutePartitionConstraint(locations.toArray(new String[] {}));
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        Pair<String, Integer> socket = sockets.get(partition);
-        return new GenericSocketFeedAdapter(parserFactory, outputType, socket.second, ctx, partition);
-    }
-
-    private void configureSockets(Map<String, String> configuration) throws Exception {
-        sockets = new ArrayList<Pair<String, Integer>>();
-        String modeValue = configuration.get(KEY_MODE);
-        if (modeValue != null) {
-            mode = Mode.valueOf(modeValue.trim().toUpperCase());
-        }
-        String socketsValue = configuration.get(KEY_SOCKETS);
-        if (socketsValue == null) {
-            throw new IllegalArgumentException("\'sockets\' parameter not specified as part of adapter configuration");
-        }
-        Map<InetAddress, Set<String>> ncMap = AsterixRuntimeUtil.getNodeControllerMap();
-        List<String> ncs = AsterixRuntimeUtil.getAllNodeControllers();
-        String[] socketsArray = socketsValue.split(",");
-        Random random = new Random();
-        for (String socket : socketsArray) {
-            String[] socketTokens = socket.split(":");
-            String host = socketTokens[0].trim();
-            int port = Integer.parseInt(socketTokens[1].trim());
-            Pair<String, Integer> p = null;
-            switch (mode) {
-                case IP:
-                    Set<String> ncsOnIp = ncMap.get(InetAddress.getByName(host));
-                    if (ncsOnIp == null || ncsOnIp.isEmpty()) {
-                        throw new IllegalArgumentException("Invalid host " + host
-                                + " as it is not part of the AsterixDB cluster. Valid choices are "
-                                + StringUtils.join(ncMap.keySet(), ", "));
-                    }
-                    String[] ncArray = ncsOnIp.toArray(new String[] {});
-                    String nc = ncArray[random.nextInt(ncArray.length)];
-                    p = new Pair<String, Integer>(nc, port);
-                    break;
-
-                case NC:
-                    p = new Pair<String, Integer>(host, port);
-                    if (!ncs.contains(host)) {
-                        throw new IllegalArgumentException("Invalid NC " + host
-                                + " as it is not part of the AsterixDB cluster. Valid choices are "
-                                + StringUtils.join(ncs, ", "));
-
-                    }
-                    break;
-            }
-            sockets.add(p);
-        }
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.UNKNOWN;
-    }
-
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        throw new UnsupportedOperationException("Tracking of ingested records not enabled");
-    }
-    
-    public void setFiles(List<ExternalFile> files) throws AlgebricksException {
-        throw new AlgebricksException("files access not supported for this adapter");
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java
deleted file mode 100644
index 679f1af..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * An adapter that simulates a feed from the contents of a source file. The file
- * can be on the local file system or on HDFS. The feed ends when the content of
- * the source file has been ingested.
- */
-
-public class RateControlledFileSystemBasedAdapter extends FileSystemBasedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-    private FileSystemBasedAdapter coreAdapter;
-
-    public RateControlledFileSystemBasedAdapter(ARecordType atype, Map<String, String> configuration,
-            FileSystemBasedAdapter coreAdapter, String format, ITupleParserFactory parserFactory,
-            IHyracksTaskContext ctx) throws Exception {
-        super(parserFactory, atype, ctx);
-        this.coreAdapter = coreAdapter;
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        return coreAdapter.getInputStream(partition);
-    }
-
-    @Override
-    public void stop() {
-       // ((RateControlledTupleParser) tupleParser).stop();
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PULL;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return false;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
deleted file mode 100644
index a8c77ac..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
-import org.apache.asterix.external.adapter.factory.NCFileSystemAdapterFactory;
-import org.apache.asterix.external.adapter.factory.StreamBasedAdapterFactory;
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Factory class for creating @see{RateControllerFileSystemBasedAdapter} The
- * adapter simulates a feed from the contents of a source file. The file can be
- * on the local file system or on HDFS. The feed ends when the content of the
- * source file has been ingested.
- */
-public class RateControlledFileSystemBasedAdapterFactory extends StreamBasedAdapterFactory implements
-        IFeedAdapterFactory {
-    private static final long serialVersionUID = 1L;
-
-    public static final String KEY_FILE_SYSTEM = "fs";
-    public static final String LOCAL_FS = "localfs";
-    public static final String HDFS = "hdfs";
-    public static final String KEY_PATH = "path";
-    public static final String KEY_FORMAT = "format";
-
-    private IAdapterFactory adapterFactory;
-    private String format;
-    private ARecordType atype;
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        FileSystemBasedAdapter coreAdapter = (FileSystemBasedAdapter) adapterFactory.createAdapter(ctx, partition);
-        return new RateControlledFileSystemBasedAdapter(atype, configuration, coreAdapter, format, parserFactory, ctx);
-    }
-
-    @Override
-    public String getName() {
-        return "file_feed";
-    }
-
-    private void checkRequiredArgs(Map<String, String> configuration) throws Exception {
-        if (configuration.get(KEY_FILE_SYSTEM) == null) {
-            throw new Exception("File system type not specified. (fs=?) File system could be 'localfs' or 'hdfs'");
-        }
-        if (configuration.get(IAdapterFactory.KEY_TYPE_NAME) == null) {
-            throw new Exception("Record type not specified (type-name=?)");
-        }
-        if (configuration.get(KEY_PATH) == null) {
-            throw new Exception("File path not specified (path=?)");
-        }
-        if (configuration.get(KEY_FORMAT) == null) {
-            throw new Exception("File format not specified (format=?)");
-        }
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        checkRequiredArgs(configuration);
-        String fileSystem = (String) configuration.get(KEY_FILE_SYSTEM);
-        String adapterFactoryClass = null;
-        if (fileSystem.equalsIgnoreCase(LOCAL_FS)) {
-            adapterFactoryClass = NCFileSystemAdapterFactory.class.getName();
-        } else if (fileSystem.equals(HDFS)) {
-            adapterFactoryClass = HDFSAdapterFactory.class.getName();
-        } else {
-            throw new AsterixException("Unsupported file system type " + fileSystem);
-        }
-        this.atype = outputType;
-        format = configuration.get(KEY_FORMAT);
-        adapterFactory = (IAdapterFactory) Class.forName(adapterFactoryClass).newInstance();
-        adapterFactory.configure(configuration, outputType);
-        configureFormat(outputType);
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return adapterFactory.getPartitionConstraint();
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return atype;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.UNKNOWN;
-    }
-
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        throw new UnsupportedOperationException("Tracking of ingested records not enabled");
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapter.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapter.java
deleted file mode 100644
index e537ef7..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapter.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.OutputStream;
-import java.net.Socket;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.hyracks.api.comm.IFrameWriter;
-
-public class SocketClientAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final Logger LOGGER = Logger.getLogger(SocketClientAdapter.class.getName());
-
-    private static final String LOCALHOST = "127.0.0.1";
-
-    private static final long RECONNECT_PERIOD = 2000;
-
-    private final String localFile;
-
-    private final int port;
-
-    private boolean continueStreaming = true;
-
-    public SocketClientAdapter(Integer port, String localFile) {
-        this.localFile = localFile;
-        this.port = port;
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        Socket socket = waitForReceiver();
-        OutputStream os = socket.getOutputStream();
-        FileInputStream fin = new FileInputStream(new File(localFile));
-        byte[] chunk = new byte[1024];
-        int read;
-        try {
-            while (continueStreaming) {
-                read = fin.read(chunk);
-                if (read > 0) {
-                    os.write(chunk, 0, read);
-                } else {
-                    break;
-                }
-            }
-            if (LOGGER.isLoggable(Level.INFO)) {
-                LOGGER.info("Finished streaming file " + localFile + "to port [" + port + "]");
-            }
-
-        } finally {
-            socket.close();
-            fin.close();
-        }
-
-    }
-
-    private Socket waitForReceiver() throws Exception {
-        Socket socket = null;
-        while (socket == null) {
-            try {
-                socket = new Socket(LOCALHOST, port);
-            } catch (Exception e) {
-                if (LOGGER.isLoggable(Level.WARNING)) {
-                    LOGGER.warning("Receiver not ready, would wait for " + (RECONNECT_PERIOD / 1000)
-                            + " seconds before reconnecting");
-                }
-                Thread.sleep(RECONNECT_PERIOD);
-            }
-        }
-        return socket;
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PUSH;
-    }
-
-    @Override
-    public void stop() throws Exception {
-        continueStreaming = false;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return false;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapterFactory.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapterFactory.java
deleted file mode 100644
index 3d2f5af..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/SocketClientAdapterFactory.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.algebricks.common.utils.Pair;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public class SocketClientAdapterFactory implements IFeedAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private ARecordType outputType;
-
-    private GenericSocketFeedAdapterFactory genericSocketAdapterFactory;
-
-    private String[] fileSplits;
-
-    public static final String KEY_FILE_SPLITS = "file_splits";
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.outputType = outputType;
-        String fileSplitsValue = configuration.get(KEY_FILE_SPLITS);
-        if (fileSplitsValue == null) {
-            throw new IllegalArgumentException(
-                    "File splits not specified. File split is specified as a comma separated list of paths");
-        }
-        fileSplits = fileSplitsValue.trim().split(",");
-        genericSocketAdapterFactory = new GenericSocketFeedAdapterFactory();
-        genericSocketAdapterFactory.configure(configuration, outputType);
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public String getName() {
-        return "socket_client";
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return genericSocketAdapterFactory.getPartitionConstraint();
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        Pair<String, Integer> socket = genericSocketAdapterFactory.getSockets().get(partition);
-        return new SocketClientAdapter(socket.second, fileSplits[partition]);
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TweetGenerator.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TweetGenerator.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TweetGenerator.java
deleted file mode 100644
index 85195fb..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TweetGenerator.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.tools.external.data.DataGenerator.InitializationInfo;
-import org.apache.asterix.tools.external.data.DataGenerator.TweetMessage;
-import org.apache.asterix.tools.external.data.DataGenerator.TweetMessageIterator;
-
-public class TweetGenerator {
-
-    private static Logger LOGGER = Logger.getLogger(TweetGenerator.class.getName());
-
-    public static final String KEY_DURATION = "duration";
-    public static final String KEY_TPS = "tps";
-    public static final String KEY_VERBOSE = "verbose";
-    public static final String KEY_FIELDS = "fields";
-    public static final int INFINITY = 0;
-
-    private static final int DEFAULT_DURATION = INFINITY;
-
-    private int duration;
-    private TweetMessageIterator tweetIterator = null;
-    private int partition;
-    private long tweetCount = 0;
-    private int frameTweetCount = 0;
-    private int numFlushedTweets = 0;
-    private DataGenerator dataGenerator = null;
-    private ByteBuffer outputBuffer = ByteBuffer.allocate(32 * 1024);
-    private String[] fields;
-    private final List<OutputStream> subscribers;
-    private final Object lock = new Object();
-    private final List<OutputStream> subscribersForRemoval = new ArrayList<OutputStream>();
-
-    public TweetGenerator(Map<String, String> configuration, int partition) throws Exception {
-        this.partition = partition;
-        String value = configuration.get(KEY_DURATION);
-        this.duration = value != null ? Integer.parseInt(value) : DEFAULT_DURATION;
-        dataGenerator = new DataGenerator(new InitializationInfo());
-        tweetIterator = dataGenerator.new TweetMessageIterator(duration);
-        this.fields = configuration.get(KEY_FIELDS) != null ? configuration.get(KEY_FIELDS).split(",") : null;
-        this.subscribers = new ArrayList<OutputStream>();
-    }
-
-    private void writeTweetString(TweetMessage tweetMessage) throws IOException {
-        String tweet = tweetMessage.getAdmEquivalent(fields) + "\n";
-        System.out.println(tweet);
-        tweetCount++;
-        byte[] b = tweet.getBytes();
-        if (outputBuffer.position() + b.length > outputBuffer.limit()) {
-            flush();
-            numFlushedTweets += frameTweetCount;
-            frameTweetCount = 0;
-            outputBuffer.put(b);
-        } else {
-            outputBuffer.put(b);
-        }
-        frameTweetCount++;
-    }
-
-    private void flush() throws IOException {
-        outputBuffer.flip();
-        synchronized (lock) {
-            for (OutputStream os : subscribers) {
-                try {
-                    os.write(outputBuffer.array(), 0, outputBuffer.limit());
-                } catch (Exception e) {
-                    subscribersForRemoval.add(os);
-                }
-            }
-            if (!subscribersForRemoval.isEmpty()) {
-                subscribers.removeAll(subscribersForRemoval);
-                subscribersForRemoval.clear();
-            }
-        }
-        outputBuffer.position(0);
-        outputBuffer.limit(32 * 1024);
-    }
-
-    public boolean generateNextBatch(int numTweets) throws Exception {
-        boolean moreData = tweetIterator.hasNext();
-        if (!moreData) {
-            if (outputBuffer.position() > 0) {
-                flush();
-            }
-            if (LOGGER.isLoggable(Level.INFO)) {
-                LOGGER.info("Reached end of batch. Tweet Count: [" + partition + "]" + tweetCount);
-            }
-            return false;
-        } else {
-            int count = 0;
-            while (count < numTweets) {
-                writeTweetString(tweetIterator.next());
-                count++;
-            }
-            return true;
-        }
-    }
-
-    public int getNumFlushedTweets() {
-        return numFlushedTweets;
-    }
-
-    public void registerSubscriber(OutputStream os) {
-        synchronized (lock) {
-            subscribers.add(os);
-        }
-    }
-
-    public void deregisterSubscribers(OutputStream os) {
-        synchronized (lock) {
-            subscribers.remove(os);
-        }
-    }
-
-    public void close() throws IOException {
-        synchronized (lock) {
-            for (OutputStream os : subscribers) {
-                os.close();
-            }
-        }
-    }
-
-    public boolean isSubscribed() {
-        return !subscribers.isEmpty();
-    }
-
-    public long getTweetCount() {
-        return tweetCount;
-    }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapter.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapter.java
deleted file mode 100644
index fffbc17..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapter.java
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.PipedInputStream;
-import java.io.PipedOutputStream;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.external.dataset.adapter.StreamBasedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * A simulator of the Twitter Firehose. Generates meaningful tweets
- * at a configurable rate
- */
-public class TwitterFirehoseFeedAdapter extends StreamBasedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final Logger LOGGER = Logger.getLogger(TwitterFirehoseFeedAdapter.class.getName());
-
-    private ExecutorService executorService = Executors.newCachedThreadPool();
-
-    private PipedOutputStream outputStream = new PipedOutputStream();
-
-    private PipedInputStream inputStream = new PipedInputStream(outputStream);
-
-    private final TwitterServer twitterServer;
-
-    public TwitterFirehoseFeedAdapter(Map<String, String> configuration, ITupleParserFactory parserFactory,
-            ARecordType outputtype, IHyracksTaskContext ctx, int partition) throws Exception {
-        super(parserFactory, outputtype, ctx, partition);
-        this.twitterServer = new TwitterServer(configuration, partition, outputtype, outputStream, executorService);
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        twitterServer.start();
-        super.start(partition, writer);
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        return inputStream;
-    }
-
-    private static class TwitterServer {
-        private final DataProvider dataProvider;
-        private final ExecutorService executorService;
-
-        public TwitterServer(Map<String, String> configuration, int partition, ARecordType outputtype, OutputStream os,
-                ExecutorService executorService) throws Exception {
-            dataProvider = new DataProvider(configuration, outputtype, partition, os);
-            this.executorService = executorService;
-        }
-
-        public void stop() throws IOException {
-            dataProvider.stop();
-        }
-
-        public void start() {
-            executorService.execute(dataProvider);
-        }
-
-    }
-
-    private static class DataProvider implements Runnable {
-
-        public static final String KEY_MODE = "mode";
-
-        private TweetGenerator tweetGenerator;
-        private boolean continuePush = true;
-        private int batchSize;
-        private final Mode mode;
-        private final OutputStream os;
-
-        public static enum Mode {
-            AGGRESSIVE,
-            CONTROLLED
-        }
-
-        public DataProvider(Map<String, String> configuration, ARecordType outputtype, int partition, OutputStream os)
-                throws Exception {
-            this.tweetGenerator = new TweetGenerator(configuration, partition);
-            this.tweetGenerator.registerSubscriber(os);
-            this.os = os;
-            mode = configuration.get(KEY_MODE) != null ? Mode.valueOf(configuration.get(KEY_MODE).toUpperCase())
-                    : Mode.AGGRESSIVE;
-            switch (mode) {
-                case CONTROLLED:
-                    String tpsValue = configuration.get(TweetGenerator.KEY_TPS);
-                    if (tpsValue == null) {
-                        throw new IllegalArgumentException("TPS value not configured. use tps=<value>");
-                    }
-                    batchSize = Integer.parseInt(tpsValue);
-                    break;
-                case AGGRESSIVE:
-                    batchSize = 5000;
-                    break;
-            }
-        }
-
-        @Override
-        public void run() {
-            boolean moreData = true;
-            long startBatch;
-            long endBatch;
-
-            while (true) {
-                try {
-                    while (moreData && continuePush) {
-                        switch (mode) {
-                            case AGGRESSIVE:
-                                moreData = tweetGenerator.generateNextBatch(batchSize);
-                                break;
-                            case CONTROLLED:
-                                startBatch = System.currentTimeMillis();
-                                moreData = tweetGenerator.generateNextBatch(batchSize);
-                                endBatch = System.currentTimeMillis();
-                                if (endBatch - startBatch < 1000) {
-                                    Thread.sleep(1000 - (endBatch - startBatch));
-                                }
-                                break;
-                        }
-                    }
-                    os.close();
-                    break;
-                } catch (Exception e) {
-                    if (LOGGER.isLoggable(Level.WARNING)) {
-                        LOGGER.warning("Exception in adaptor " + e.getMessage());
-                    }
-                }
-            }
-        }
-
-        public void stop() {
-            continuePush = false;
-        }
-
-    }
-
-    @Override
-    public void stop() throws Exception {
-        twitterServer.stop();
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PUSH;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        try {
-            twitterServer.stop();
-        } catch (Exception re) {
-            re.printStackTrace();
-            return false;
-        }
-        twitterServer.start();
-        return true;
-    }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapterFactory.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapterFactory.java
deleted file mode 100644
index f7e79f7..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/TwitterFirehoseFeedAdapterFactory.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
-import org.apache.asterix.external.adapter.factory.StreamBasedAdapterFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.util.AsterixClusterProperties;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Factory class for creating @see{TwitterFirehoseFeedAdapter}. The adapter
- * simulates a twitter firehose with tweets being "pushed" into Asterix at a
- * configurable rate measured in terms of TPS (tweets/second). The stream of
- * tweets lasts for a configurable duration (measured in seconds).
- */
-public class TwitterFirehoseFeedAdapterFactory extends StreamBasedAdapterFactory implements IFeedAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    /**
-     * Degree of parallelism for feed ingestion activity. Defaults to 1. This
-     * determines the count constraint for the ingestion operator.
-     **/
-    private static final String KEY_INGESTION_CARDINALITY = "ingestion-cardinality";
-
-    /**
-     * The absolute locations where ingestion operator instances will be placed.
-     **/
-    private static final String KEY_INGESTION_LOCATIONS = "ingestion-location";
-
-    private ARecordType outputType;
-
-    @Override
-    public String getName() {
-        return "twitter_firehose";
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        configuration.put(AsterixTupleParserFactory.KEY_FORMAT, AsterixTupleParserFactory.FORMAT_ADM);
-        this.configuration = configuration;
-        this.outputType = outputType;
-        this.configureFormat(outputType);
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        String ingestionCardinalityParam = configuration.get(KEY_INGESTION_CARDINALITY);
-        String ingestionLocationParam = configuration.get(KEY_INGESTION_LOCATIONS);
-        String[] locations = null;
-        if (ingestionLocationParam != null) {
-            locations = ingestionLocationParam.split(",");
-        }
-        int count = locations != null ? locations.length : 1;
-        if (ingestionCardinalityParam != null) {
-            count = Integer.parseInt(ingestionCardinalityParam);
-        }
-
-        List<String> chosenLocations = new ArrayList<String>();
-        String[] availableLocations = locations != null ? locations
-                : AsterixClusterProperties.INSTANCE.getParticipantNodes().toArray(new String[] {});
-        for (int i = 0, k = 0; i < count; i++, k = (k + 1) % availableLocations.length) {
-            chosenLocations.add(availableLocations[k]);
-        }
-        return new AlgebricksAbsolutePartitionConstraint(chosenLocations.toArray(new String[] {}));
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        return new TwitterFirehoseFeedAdapter(configuration, parserFactory, outputType, ctx, partition);
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.ADM;
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        throw new UnsupportedOperationException("Tracking of ingested records not enabled");
-    }
-
-}
\ No newline at end of file


[21/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
First stage of external data cleanup

In this change, different parts of external data were refactored.
The goal was to make it more modular, easier to maintain and allow
higher flexibility for extension in addition to reducing code redundancy.

Change-Id: I04a8c4e494d8d1363992b6fe0bdbe6b2b3b7b767
Reviewed-on: https://asterix-gerrit.ics.uci.edu/566
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <hu...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/commit/284590ed
Tree: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/tree/284590ed
Diff: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/diff/284590ed

Branch: refs/heads/master
Commit: 284590ed9c30349058fdc448852ebb6dc698912f
Parents: 1d5cf64
Author: Abdullah Alamoudi <ba...@gmail.com>
Authored: Sun Jan 3 15:42:18 2016 +0300
Committer: abdullah alamoudi <ba...@gmail.com>
Committed: Sun Jan 3 09:35:48 2016 -0800

----------------------------------------------------------------------
 asterix-algebra/pom.xml                         |    1 -
 asterix-app/pom.xml                             |   12 +-
 .../asterix/aql/translator/QueryTranslator.java |   16 +-
 .../file/ExternalIndexingOperations.java        |  132 +-
 .../org/apache/asterix/file/FeedOperations.java |   13 +-
 .../file/SecondaryIndexOperationsHelper.java    |    2 +-
 .../bootstrap/ExternalLibraryBootstrap.java     |    4 +-
 .../asterix/test/optimizer/OptimizerTest.java   |    4 +-
 .../asterix/test/runtime/ExecutionTest.java     |   10 +-
 .../asterix/test/runtime/ExecutionTestUtil.java |    4 +-
 .../metadata/results/basic/meta15/meta15.1.adm  |   15 +-
 .../common/api/IAsterixAppRuntimeContext.java   |    1 -
 .../common/feeds/BasicMonitoredBuffer.java      |    2 +
 .../feeds/ComputeSideMonitoredBuffer.java       |    2 +
 .../common/feeds/IFramePostProcessor.java       |   28 -
 .../common/feeds/IFramePreprocessor.java        |   26 -
 .../common/feeds/IntakeSideMonitoredBuffer.java |    2 +
 .../asterix/common/feeds/MonitoredBuffer.java   |    2 +
 .../feeds/StorageSideMonitoredBuffer.java       |    2 +
 .../feeds/api/IAdapterRuntimeManager.java       |    4 +-
 .../common/feeds/api/IDataSourceAdapter.java    |   60 +
 .../common/feeds/api/IDatasourceAdapter.java    |   47 -
 .../asterix/common/feeds/api/IFeedAdapter.java  |   60 -
 .../common/feeds/api/IFramePostProcessor.java   |   28 +
 .../common/feeds/api/IFramePreprocessor.java    |   26 +
 .../feeds/api/ITupleTrackingFeedAdapter.java    |    2 +-
 .../common/parse/IAsterixTupleParser.java       |   29 -
 .../common/parse/ITupleForwardPolicy.java       |   48 -
 .../asterix/common/parse/ITupleForwarder.java   |   46 +
 .../apache/asterix/test/aql/TestExecutor.java   |    9 +-
 .../asterix/event/service/ZooKeeperService.java |   39 +-
 asterix-external-data/pom.xml                   |   91 ++
 .../adapter/factory/CNNFeedAdapterFactory.java  |  153 ---
 .../adapter/factory/GenericAdapterFactory.java  |  101 ++
 .../adapter/factory/HDFSAdapterFactory.java     |  343 -----
 .../factory/HDFSIndexingAdapterFactory.java     |  208 ---
 .../adapter/factory/HiveAdapterFactory.java     |  127 --
 .../adapter/factory/IAdapterFactory.java        |   98 --
 .../factory/IControlledAdapterFactory.java      |   36 -
 .../adapter/factory/IFeedAdapterFactory.java    |   29 -
 .../adapter/factory/LookupAdapterFactory.java   |  111 ++
 .../factory/NCFileSystemAdapterFactory.java     |  166 ---
 .../PullBasedAzureTwitterAdapterFactory.java    |  132 --
 .../factory/PullBasedTwitterAdapterFactory.java |  118 --
 .../factory/PushBasedTwitterAdapterFactory.java |  108 --
 .../adapter/factory/RSSFeedAdapterFactory.java  |  114 --
 .../factory/StreamBasedAdapterFactory.java      |   49 -
 .../asterix/external/api/IAdapterFactory.java   |   79 ++
 .../external/api/IDataFlowController.java       |   55 +
 .../asterix/external/api/IDataParser.java       |  127 ++
 .../external/api/IDataParserFactory.java        |   52 +
 .../api/IExternalDataSourceFactory.java         |   64 +
 .../asterix/external/api/IExternalFunction.java |   27 +
 .../asterix/external/api/IExternalIndexer.java  |   53 +
 .../external/api/IExternalScalarFunction.java   |   25 +
 .../asterix/external/api/IFunctionFactory.java  |   25 +
 .../asterix/external/api/IFunctionHelper.java   |   39 +
 .../api/IIndexibleExternalDataSource.java       |   34 +
 .../external/api/IIndexingAdapterFactory.java   |   27 +
 .../external/api/IIndexingDatasource.java       |   25 +
 .../external/api/IInputStreamProvider.java      |   25 +
 .../api/IInputStreamProviderFactory.java        |   26 +
 .../asterix/external/api/IJListAccessor.java    |   31 +
 .../apache/asterix/external/api/IJObject.java   |   37 +
 .../asterix/external/api/IJObjectAccessor.java  |   29 +
 .../asterix/external/api/IJRecordAccessor.java  |   34 +
 .../org/apache/asterix/external/api/IJType.java |   29 +
 .../external/api/ILookupReaderFactory.java      |   27 +
 .../external/api/ILookupRecordReader.java       |   40 +
 .../asterix/external/api/INodeResolver.java     |   37 +
 .../external/api/INodeResolverFactory.java      |   34 +
 .../apache/asterix/external/api/IRawRecord.java |   51 +
 .../asterix/external/api/IRecordDataParser.java |   36 +
 .../external/api/IRecordDataParserFactory.java  |   32 +
 .../external/api/IRecordFlowController.java     |   27 +
 .../asterix/external/api/IRecordReader.java     |   66 +
 .../external/api/IRecordReaderFactory.java      |   29 +
 .../asterix/external/api/IResultCollector.java  |   45 +
 .../asterix/external/api/IStreamDataParser.java |   39 +
 .../external/api/IStreamDataParserFactory.java  |   29 +
 .../external/api/IStreamFlowController.java     |   23 +
 .../dataflow/AbstractDataFlowController.java    |   55 +
 .../dataflow/CounterTimerTupleForwarder.java    |  157 +++
 .../dataflow/FrameFullTupleForwarder.java       |   72 ++
 .../dataflow/IndexingDataFlowController.java    |   40 +
 .../dataflow/RateControlledTupleForwarder.java  |   87 ++
 .../dataflow/RecordDataFlowController.java      |   77 ++
 .../dataflow/StreamDataFlowController.java      |   64 +
 .../dataset/adapter/AzureTweetEntity.java       |   51 -
 .../adapter/AzureTweetMetadataEntity.java       |   95 --
 .../dataset/adapter/ClientBasedFeedAdapter.java |  155 ---
 .../external/dataset/adapter/FeedClient.java    |  174 ---
 .../dataset/adapter/FileSystemBasedAdapter.java |   62 -
 .../dataset/adapter/GenericAdapter.java         |   48 +
 .../external/dataset/adapter/HDFSAdapter.java   |   97 --
 .../dataset/adapter/HDFSIndexingAdapter.java    |   78 --
 .../external/dataset/adapter/HiveAdapter.java   |   49 -
 .../dataset/adapter/IControlledAdapter.java     |   65 -
 .../external/dataset/adapter/IFeedClient.java   |   46 -
 .../dataset/adapter/IFeedClientFactory.java     |   40 -
 .../dataset/adapter/IPullBasedFeedClient.java   |   46 -
 .../external/dataset/adapter/LookupAdapter.java |  162 +++
 .../dataset/adapter/NCFileSystemAdapter.java    |   69 -
 .../dataset/adapter/PullBasedAdapter.java       |  162 ---
 .../adapter/PullBasedAzureFeedClient.java       |  190 ---
 .../adapter/PullBasedAzureTwitterAdapter.java   |  103 --
 .../adapter/PullBasedTwitterAdapter.java        |   80 --
 .../adapter/PullBasedTwitterFeedClient.java     |  105 --
 .../adapter/PushBasedTwitterAdapter.java        |   70 --
 .../adapter/PushBasedTwitterFeedClient.java     |  111 --
 .../dataset/adapter/RSSFeedAdapter.java         |   98 --
 .../external/dataset/adapter/RSSFeedClient.java |  161 ---
 .../dataset/adapter/StreamBasedAdapter.java     |   11 +-
 .../external/feeds/IPullBasedFeedAdapter.java   |   34 -
 .../asterix/external/indexing/ExternalFile.java |   10 +
 .../indexing/ExternalFileIndexAccessor.java     |   59 +-
 .../indexing/FileIndexTupleTranslator.java      |   91 ++
 .../external/indexing/FileOffsetIndexer.java    |   74 ++
 .../external/indexing/IndexingScheduler.java    |  348 +++++
 .../external/indexing/RCRecordIdReader.java     |   43 +
 .../indexing/RecordColumnarIndexer.java         |   84 ++
 .../asterix/external/indexing/RecordId.java     |   54 +
 .../external/indexing/RecordIdReader.java       |   78 ++
 .../indexing/RecordIdReaderFactory.java         |   38 +
 .../dataflow/AbstractIndexingTupleParser.java   |   97 --
 .../AdmOrDelimitedControlledTupleParser.java    |  239 ----
 .../AdmOrDelimitedIndexingTupleParser.java      |  105 --
 .../dataflow/FileIndexTupleTranslator.java      |   95 --
 .../dataflow/HDFSIndexingParserFactory.java     |  140 ---
 .../indexing/dataflow/HDFSLookupAdapter.java    |  187 ---
 .../dataflow/HDFSLookupAdapterFactory.java      |   83 --
 .../dataflow/HDFSObjectTupleParser.java         |   81 --
 .../dataflow/HDFSObjectTupleParserFactory.java  |   69 -
 .../indexing/dataflow/HiveObjectParser.java     |  426 -------
 .../dataflow/IAsterixHDFSRecordParser.java      |   55 -
 .../dataflow/IControlledTupleParser.java        |   44 -
 .../dataflow/IControlledTupleParserFactory.java |   23 -
 .../indexing/dataflow/IndexingScheduler.java    |  348 -----
 .../dataflow/RCFileControlledTupleParser.java   |  197 ---
 .../dataflow/RCFileIndexingTupleParser.java     |   63 -
 .../dataflow/SeqOrTxtControlledTupleParser.java |  190 ---
 .../dataflow/TextOrSeqIndexingTupleParser.java  |   47 -
 .../input/AbstractHDFSLookupInputStream.java    |  117 --
 .../indexing/input/AbstractHDFSReader.java      |  107 --
 .../input/GenericFileAwareRecordReader.java     |  127 --
 .../indexing/input/GenericRecordReader.java     |  135 --
 .../indexing/input/HDFSSeekableLineReader.java  |  232 ----
 .../external/indexing/input/ILookupReader.java  |   24 -
 .../indexing/input/RCFileDataReader.java        |  178 ---
 .../indexing/input/RCFileLookupReader.java      |  106 --
 .../input/SequenceFileLookupInputStream.java    |   75 --
 .../input/SequenceFileLookupReader.java         |  109 --
 .../input/TextFileLookupInputStream.java        |   69 -
 .../indexing/input/TextFileLookupReader.java    |   97 --
 .../indexing/input/TextualDataReader.java       |  234 ----
 .../input/TextualFullScanDataReader.java        |  209 ---
 ...xternalDatasetIndexesOperatorDescriptor.java |  122 --
 ...alDatasetIndexesAbortOperatorDescriptor.java |   56 -
 ...lDatasetIndexesCommitOperatorDescriptor.java |   65 -
 ...DatasetIndexesRecoverOperatorDescriptor.java |   57 -
 ...ternalIndexBulkModifyOperatorDescriptor.java |   69 -
 ...rnalIndexBulkModifyOperatorNodePushable.java |  119 --
 .../ExternalLoopkupOperatorDiscriptor.java      |  141 ---
 .../operators/IndexInfoOperatorDescriptor.java  |  126 --
 .../external/input/HDFSDataSourceFactory.java   |  198 +++
 .../external/input/record/CharArrayRecord.java  |  106 ++
 .../external/input/record/GenericRecord.java    |   62 +
 .../reader/AbstractCharRecordLookupReader.java  |   78 ++
 .../reader/AbstractHDFSLookupRecordReader.java  |  113 ++
 .../reader/AbstractStreamRecordReader.java      |   75 ++
 .../AbstractStreamRecordReaderFactory.java      |   99 ++
 .../input/record/reader/EmptyRecordReader.java  |   56 +
 .../input/record/reader/HDFSRecordReader.java   |  194 +++
 .../input/record/reader/HDFSTextLineReader.java |  234 ++++
 .../input/record/reader/LineRecordReader.java   |  108 ++
 .../reader/LookupReaderFactoryProvider.java     |   44 +
 .../record/reader/QuotedLineRecordReader.java   |  115 ++
 .../input/record/reader/RCLookupReader.java     |   92 ++
 .../input/record/reader/RSSRecordReader.java    |  177 +++
 .../reader/SemiStructuredRecordReader.java      |  147 +++
 .../record/reader/SequenceLookupReader.java     |   71 ++
 .../input/record/reader/TextLookupReader.java   |   64 +
 .../record/reader/TwitterPullRecordReader.java  |  106 ++
 .../record/reader/TwitterPushRecordReader.java  |  122 ++
 .../reader/factory/HDFSLookupReaderFactory.java |   90 ++
 .../reader/factory/LineRecordReaderFactory.java |   55 +
 .../reader/factory/RSSRecordReaderFactory.java  |   87 ++
 .../SemiStructuredRecordReaderFactory.java      |   46 +
 .../factory/TwitterRecordReaderFactory.java     |  136 ++
 .../external/input/stream/AInputStream.java     |   28 +
 .../input/stream/AInputStreamReader.java        |   34 +
 .../external/input/stream/BasicInputStream.java |   86 ++
 .../input/stream/HDFSInputStreamProvider.java   |  118 ++
 .../stream/LocalFSInputStreamProvider.java      |   56 +
 .../input/stream/SocketInputStream.java         |  101 ++
 .../input/stream/SocketInputStreamProvider.java |   36 +
 .../TwitterFirehoseInputStreamProvider.java     |  183 +++
 .../LocalFSInputStreamProviderFactory.java      |  139 ++
 .../SocketInputStreamProviderFactory.java       |  130 ++
 .../TwitterFirehoseStreamProviderFactory.java   |   96 ++
 .../external/library/ExternalFunction.java      |    3 +
 .../library/ExternalFunctionProvider.java       |    3 +
 .../external/library/IExternalFunction.java     |   27 -
 .../library/IExternalScalarFunction.java        |   25 -
 .../external/library/IFunctionFactory.java      |   25 -
 .../external/library/IFunctionHelper.java       |   40 -
 .../external/library/IResultCollector.java      |   45 -
 .../external/library/JTypeObjectFactory.java    |    2 +-
 .../external/library/JavaFunctionHelper.java    |    3 +-
 .../external/library/ResultCollector.java       |    1 +
 .../asterix/external/library/TypeInfo.java      |    2 +-
 .../external/library/java/IJListAccessor.java   |   30 -
 .../asterix/external/library/java/IJObject.java |   37 -
 .../external/library/java/IJObjectAccessor.java |   29 -
 .../external/library/java/IJRecordAccessor.java |   33 -
 .../asterix/external/library/java/IJType.java   |   29 -
 .../external/library/java/JObjectAccessors.java |    4 +
 .../library/java/JObjectPointableVisitor.java   |    4 +
 .../external/library/java/JObjectUtil.java      |    1 +
 .../asterix/external/library/java/JObjects.java |    1 +
 ...xternalDatasetIndexesOperatorDescriptor.java |  121 ++
 .../ExternalBTreeSearchOperatorDescriptor.java  |   60 +
 ...ExternalBTreeSearchOperatorNodePushable.java |   87 ++
 ...alDatasetIndexesAbortOperatorDescriptor.java |   56 +
 ...lDatasetIndexesCommitOperatorDescriptor.java |   65 +
 ...DatasetIndexesRecoverOperatorDescriptor.java |   57 +
 .../ExternalFilesIndexOperatorDescriptor.java   |  165 +++
 ...ternalIndexBulkModifyOperatorDescriptor.java |   69 +
 ...rnalIndexBulkModifyOperatorNodePushable.java |  119 ++
 .../ExternalLookupOperatorDescriptor.java       |  132 ++
 .../ExternalRTreeSearchOperatorDescriptor.java  |   57 +
 ...ExternalRTreeSearchOperatorNodePushable.java |   87 ++
 .../operators/IndexInfoOperatorDescriptor.java  |  126 ++
 .../asterix/external/parser/ADMDataParser.java  | 1148 +++++++++++++++++
 .../external/parser/AbstractDataParser.java     |  522 ++++++++
 .../external/parser/DelimitedDataParser.java    |  208 +++
 .../external/parser/HiveRecordParser.java       |  385 ++++++
 .../asterix/external/parser/RSSParser.java      |   83 ++
 .../asterix/external/parser/TweetParser.java    |  144 +++
 .../parser/factory/ADMDataParserFactory.java    |   56 +
 .../AbstractRecordStreamParserFactory.java      |   51 +
 .../factory/DelimitedDataParserFactory.java     |   95 ++
 .../parser/factory/HiveDataParserFactory.java   |   67 +
 .../parser/factory/RSSParserFactory.java        |   68 +
 .../parser/factory/TweetParserFactory.java      |   67 +
 .../provider/AdapterFactoryProvider.java        |  114 ++
 .../provider/DataflowControllerProvider.java    |   96 ++
 .../provider/DatasourceFactoryProvider.java     |  101 ++
 .../provider/ExternalIndexerProvider.java       |   45 +
 .../provider/ParserFactoryProvider.java         |   65 +
 .../asterix/external/runtime/DataGenerator.java | 1188 ++++++++++++++++++
 .../runtime/GenericSocketFeedAdapter.java       |  118 ++
 .../GenericSocketFeedAdapterFactory.java        |  154 +++
 .../external/runtime/SocketClientAdapter.java   |  106 ++
 .../runtime/SocketClientAdapterFactory.java     |   77 ++
 .../external/runtime/TweetGenerator.java        |  156 +++
 .../asterix/external/util/DNSResolver.java      |   54 -
 .../external/util/DNSResolverFactory.java       |    7 +-
 .../asterix/external/util/DataflowUtils.java    |   69 +
 .../util/ExternalDataCompatibilityUtils.java    |   85 ++
 .../external/util/ExternalDataConstants.java    |  176 +++
 .../util/ExternalDataExceptionUtils.java        |   32 +
 .../external/util/ExternalDataUtils.java        |  222 ++++
 .../apache/asterix/external/util/HDFSUtils.java |  232 ++++
 .../asterix/external/util/INodeResolver.java    |   37 -
 .../external/util/INodeResolverFactory.java     |   34 -
 .../external/util/IdentitiyResolverFactory.java |    3 +
 .../asterix/external/util/IdentityResolver.java |    2 +
 .../asterix/external/util/NodeResolver.java     |   98 ++
 .../src/main/resources/adm.grammar              |   86 ++
 .../external/library/AddHashTagsFactory.java    |    4 +-
 .../external/library/AddHashTagsFunction.java   |    2 +
 .../library/AddHashTagsInPlaceFactory.java      |    2 +
 .../library/AddHashTagsInPlaceFunction.java     |    2 +
 .../external/library/AllTypesFactory.java       |    4 +-
 .../external/library/AllTypesFunction.java      |    2 +
 .../external/library/CapitalFinderFactory.java  |    4 +-
 .../external/library/CapitalFinderFunction.java |    2 +
 .../external/library/EchoDelayFactory.java      |    2 +
 .../external/library/EchoDelayFunction.java     |    2 +
 .../external/library/ParseTweetFactory.java     |    4 +-
 .../external/library/ParseTweetFunction.java    |    2 +
 .../asterix/external/library/SumFactory.java    |    2 +
 .../asterix/external/library/SumFunction.java   |    4 +-
 .../external/library/UpperCaseFactory.java      |    4 +-
 .../external/library/UpperCaseFunction.java     |    2 +
 .../library/adapter/TestTypedAdapter.java       |   13 +-
 .../adapter/TestTypedAdapterFactory.java        |   85 +-
 .../operator/file/ADMDataParserTest.java        |  116 ++
 .../installer/test/AbstractExecutionIT.java     |   16 +-
 .../test/AsterixExternalLibraryIT.java          |   25 +-
 .../test/AsterixInstallerIntegrationUtil.java   |    1 +
 .../installer/test/ClusterExecutionIT.java      |   20 +-
 .../installer/test/ManagixExecutionIT.java      |   17 -
 .../installer/test/ManagixSqlppExecutionIT.java |   13 +-
 .../aql/statement/SubscribeFeedStatement.java   |    4 +-
 .../src/main/resources/Lexer.java               |   27 +-
 .../metadata/bootstrap/MetadataBootstrap.java   |   38 +-
 .../metadata/declared/AqlMetadataProvider.java  |  223 +---
 .../declared/FieldExtractingAdapter.java        |  170 ---
 .../declared/FieldExtractingAdapterFactory.java |   86 --
 .../metadata/declared/PKGeneratingAdapter.java  |  184 ---
 .../declared/PKGeneratingAdapterFactory.java    |   86 --
 .../feeds/AbstractDatasourceAdapter.java        |    4 +-
 .../feeds/AbstractFeedDatasourceAdapter.java    |    4 +-
 .../asterix/metadata/feeds/AdapterExecutor.java |    6 +-
 .../metadata/feeds/AdapterRuntimeManager.java   |   11 +-
 .../ExternalDataScanOperatorDescriptor.java     |    6 +-
 .../feeds/FeedIntakeOperatorDescriptor.java     |   20 +-
 .../feeds/FeedIntakeOperatorNodePushable.java   |   21 +-
 .../apache/asterix/metadata/feeds/FeedUtil.java |   42 +-
 .../metadata/feeds/ITypedAdapterFactory.java    |    2 +-
 .../asterix/formats/base/IDataFormat.java       |   10 +-
 .../asterix/om/util/AsterixRuntimeUtil.java     |    4 +
 asterix-runtime/pom.xml                         |  226 +---
 .../ExternalBTreeSearchOperatorDescriptor.java  |   60 -
 ...ExternalBTreeSearchOperatorNodePushable.java |   87 --
 .../ExternalRTreeSearchOperatorDescriptor.java  |   57 -
 ...ExternalRTreeSearchOperatorNodePushable.java |   87 --
 .../runtime/formats/NonTaggedDataFormat.java    |  162 ++-
 .../runtime/operators/file/ADMDataParser.java   | 1100 ----------------
 .../operators/file/AbstractDataParser.java      |  521 --------
 .../operators/file/AbstractTupleParser.java     |   81 --
 .../file/AsterixTupleParserFactory.java         |  272 ----
 .../file/CounterTimerTupleForwardPolicy.java    |  158 ---
 .../operators/file/DelimitedDataParser.java     |  176 ---
 .../file/FrameFullTupleForwardPolicy.java       |   76 --
 .../runtime/operators/file/IDataParser.java     |   59 -
 .../file/RateContolledParserPolicy.java         |   97 --
 .../file/RateControlledTupleForwardPolicy.java  |   88 --
 asterix-runtime/src/main/resources/adm.grammar  |   86 --
 .../operator/file/ADMDataParserTest.java        |  116 --
 .../tools/external/data/DataGenerator.java      | 1188 ------------------
 .../ExternalFilesIndexOperatorDescriptor.java   |  165 ---
 .../tools/external/data/GULongIDGenerator.java  |   50 -
 .../external/data/GenericSocketFeedAdapter.java |  123 --
 .../data/GenericSocketFeedAdapterFactory.java   |  172 ---
 .../RateControlledFileSystemBasedAdapter.java   |   69 -
 ...ControlledFileSystemBasedAdapterFactory.java |  131 --
 .../external/data/SocketClientAdapter.java      |  110 --
 .../data/SocketClientAdapterFactory.java        |   92 --
 .../tools/external/data/TweetGenerator.java     |  156 ---
 .../data/TwitterFirehoseFeedAdapter.java        |  191 ---
 .../data/TwitterFirehoseFeedAdapterFactory.java |  125 --
 344 files changed, 15151 insertions(+), 16580 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-algebra/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-algebra/pom.xml b/asterix-algebra/pom.xml
index 3e76d43..bf38393 100644
--- a/asterix-algebra/pom.xml
+++ b/asterix-algebra/pom.xml
@@ -112,7 +112,6 @@
 			<groupId>org.apache.asterix</groupId>
 			<artifactId>asterix-external-data</artifactId>
 			<version>0.8.8-SNAPSHOT</version>
-			<scope>compile</scope>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.asterix</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-app/pom.xml b/asterix-app/pom.xml
index 0360529..9b87d9f 100644
--- a/asterix-app/pom.xml
+++ b/asterix-app/pom.xml
@@ -187,7 +187,6 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-client</artifactId>
-            <type>jar</type>
         </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
@@ -196,6 +195,12 @@
             <scope>test</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs</artifactId>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
             <groupId>xerces</groupId>
             <artifactId>xercesImpl</artifactId>
             <version>2.9.1</version>
@@ -233,5 +238,10 @@
             <version>0.8.8-SNAPSHOT</version>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>org.apache.asterix</groupId>
+            <artifactId>asterix-external-data</artifactId>
+            <version>0.8.8-SNAPSHOT</version>
+        </dependency>
     </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/main/java/org/apache/asterix/aql/translator/QueryTranslator.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/main/java/org/apache/asterix/aql/translator/QueryTranslator.java b/asterix-app/src/main/java/org/apache/asterix/aql/translator/QueryTranslator.java
index 08b92e7..7c055e2 100644
--- a/asterix-app/src/main/java/org/apache/asterix/aql/translator/QueryTranslator.java
+++ b/asterix-app/src/main/java/org/apache/asterix/aql/translator/QueryTranslator.java
@@ -63,7 +63,7 @@ import org.apache.asterix.common.feeds.api.IFeedLifecycleEventSubscriber.FeedLif
 import org.apache.asterix.common.feeds.api.IFeedLifecycleListener.ConnectionLocation;
 import org.apache.asterix.common.functions.FunctionSignature;
 import org.apache.asterix.compiler.provider.ILangCompilationProvider;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.external.indexing.ExternalFile;
 import org.apache.asterix.feeds.CentralFeedManager;
 import org.apache.asterix.feeds.FeedJoint;
@@ -238,13 +238,12 @@ public class QueryTranslator extends AbstractLangTranslator {
 
     /**
      * Compiles and submits for execution a list of AQL statements.
-     *
      * @param hcc
-     *            A Hyracks client connection that is used to submit a jobspec to Hyracks.
+     *        A Hyracks client connection that is used to submit a jobspec to Hyracks.
      * @param hdc
-     *            A Hyracks dataset client object that is used to read the results.
+     *        A Hyracks dataset client object that is used to read the results.
      * @param resultDelivery
-     *            True if the results should be read asynchronously or false if we should wait for results to be read.
+     *        True if the results should be read asynchronously or false if we should wait for results to be read.
      * @return A List<QueryResult> containing a QueryResult instance corresponding to each submitted query.
      * @throws Exception
      */
@@ -2133,7 +2132,7 @@ public class QueryTranslator extends AbstractLangTranslator {
                 FeedId feedId = connectionRequest.getFeedJointKey().getFeedId();
                 PrimaryFeed primaryFeed = (PrimaryFeed) MetadataManager.INSTANCE.getFeed(mdTxnCtx,
                         feedId.getDataverse(), feedId.getFeedName());
-                Pair<JobSpecification, IFeedAdapterFactory> pair = FeedOperations.buildFeedIntakeJobSpec(primaryFeed,
+                Pair<JobSpecification, IAdapterFactory> pair = FeedOperations.buildFeedIntakeJobSpec(primaryFeed,
                         metadataProvider, policyAccessor);
                 // adapter configuration are valid at this stage
                 // register the feed joints (these are auto-de-registered)
@@ -2141,11 +2140,13 @@ public class QueryTranslator extends AbstractLangTranslator {
                     FeedLifecycleListener.INSTANCE.registerFeedJoint(fj);
                 }
                 JobUtils.runJob(hcc, pair.first, false);
-                IFeedAdapterFactory adapterFactory = pair.second;
+                /* TODO: Fix record tracking
+                 * IFeedAdapterFactory adapterFactory = pair.second;
                 if (adapterFactory.isRecordTrackingEnabled()) {
                     FeedLifecycleListener.INSTANCE.registerFeedIntakeProgressTracker(feedConnId,
                             adapterFactory.createIntakeProgressTracker());
                 }
+                */
                 eventSubscriber.assertEvent(FeedLifecycleEvent.FEED_INTAKE_STARTED);
             } else {
                 for (IFeedJoint fj : triple.third) {
@@ -2186,7 +2187,6 @@ public class QueryTranslator extends AbstractLangTranslator {
     /**
      * Generates a subscription request corresponding to a connect feed request. In addition, provides a boolean
      * flag indicating if feed intake job needs to be started (source primary feed not found to be active).
-     *
      * @param dataverse
      * @param feed
      * @param dataset

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java b/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java
index d3fade5..595fe4c 100644
--- a/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java
+++ b/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java
@@ -27,11 +27,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 import org.apache.asterix.common.api.ILocalResourceMetadata;
 import org.apache.asterix.common.config.AsterixStorageProperties;
 import org.apache.asterix.common.config.DatasetConfig.DatasetType;
@@ -45,16 +40,17 @@ import org.apache.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactor
 import org.apache.asterix.common.ioopcallbacks.LSMBTreeWithBuddyIOOperationCallbackFactory;
 import org.apache.asterix.common.ioopcallbacks.LSMRTreeIOOperationCallbackFactory;
 import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlPrimitiveValueProviderFactory;
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.external.adapter.factory.HDFSIndexingAdapterFactory;
-import org.apache.asterix.external.adapter.factory.HiveAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.external.indexing.ExternalFile;
 import org.apache.asterix.external.indexing.FilesIndexDescription;
 import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.operators.ExternalDatasetIndexesAbortOperatorDescriptor;
-import org.apache.asterix.external.indexing.operators.ExternalDatasetIndexesCommitOperatorDescriptor;
-import org.apache.asterix.external.indexing.operators.ExternalDatasetIndexesRecoverOperatorDescriptor;
-import org.apache.asterix.external.indexing.operators.IndexInfoOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalDatasetIndexesAbortOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalDatasetIndexesCommitOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalDatasetIndexesRecoverOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalFilesIndexOperatorDescriptor;
+import org.apache.asterix.external.operators.IndexInfoOperatorDescriptor;
+import org.apache.asterix.external.provider.AdapterFactoryProvider;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import org.apache.asterix.formats.nontagged.AqlTypeTraitProvider;
@@ -73,13 +69,17 @@ import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.IAType;
 import org.apache.asterix.om.util.AsterixAppContextInfo;
 import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.asterix.tools.external.data.ExternalFilesIndexOperatorDescriptor;
 import org.apache.asterix.transaction.management.opcallbacks.SecondaryIndexOperationTrackerProvider;
 import org.apache.asterix.transaction.management.resource.ExternalBTreeLocalResourceMetadata;
 import org.apache.asterix.transaction.management.resource.PersistentLocalResourceFactoryProvider;
 import org.apache.asterix.transaction.management.service.transaction.AsterixRuntimeComponentsProvider;
 import org.apache.asterix.translator.CompiledStatements.CompiledCreateIndexStatement;
 import org.apache.asterix.translator.CompiledStatements.CompiledIndexDropStatement;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
 import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraintHelper;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -107,6 +107,7 @@ public class ExternalIndexingOperations {
 
     public static final List<List<String>> FILE_INDEX_FIELD_NAMES = new ArrayList<List<String>>();
     public static final ArrayList<IAType> FILE_INDEX_FIELD_TYPES = new ArrayList<IAType>();
+
     static {
         FILE_INDEX_FIELD_NAMES.add(new ArrayList<String>(Arrays.asList("")));
         FILE_INDEX_FIELD_TYPES.add(BuiltinType.ASTRING);
@@ -114,9 +115,7 @@ public class ExternalIndexingOperations {
 
     public static boolean isIndexible(ExternalDatasetDetails ds) {
         String adapter = ds.getAdapter();
-        if (adapter.equalsIgnoreCase("hdfs") || adapter.equalsIgnoreCase("hive")
-                || adapter.equalsIgnoreCase("org.apache.asterix.external.dataset.adapter.HDFSAdapter")
-                || adapter.equalsIgnoreCase("org.apache.asterix.external.dataset.adapter.HIVEAdapter")) {
+        if (adapter.equalsIgnoreCase(ExternalDataConstants.ALIAS_HDFS_ADAPTER)) {
             return true;
         }
         return false;
@@ -126,12 +125,6 @@ public class ExternalIndexingOperations {
         return ds.getState() != ExternalDatasetTransactionState.COMMIT;
     }
 
-    public static boolean datasetUsesHiveAdapter(ExternalDatasetDetails ds) {
-        String adapter = ds.getAdapter();
-        return (adapter.equalsIgnoreCase("hive") || adapter
-                .equalsIgnoreCase("org.apache.asterix.external.dataset.adapter.HIVEAdapter"));
-    }
-
     public static boolean isValidIndexName(String datasetName, String indexName) {
         return (!datasetName.concat(IndexingConstants.EXTERNAL_FILE_INDEX_NAME_SUFFIX).equals(indexName));
     }
@@ -154,18 +147,15 @@ public class ExternalIndexingOperations {
         return IndexingConstants.getBuddyBtreeComparatorFactories();
     }
 
-    public static ArrayList<ExternalFile> getSnapshotFromExternalFileSystem(Dataset dataset) throws AlgebricksException {
+    public static ArrayList<ExternalFile> getSnapshotFromExternalFileSystem(Dataset dataset)
+            throws AlgebricksException {
         ArrayList<ExternalFile> files = new ArrayList<ExternalFile>();
         ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
         try {
             // Create the file system object
             FileSystem fs = getFileSystemObject(datasetDetails.getProperties());
-            // If dataset uses hive adapter, add path to the dataset properties
-            if (datasetUsesHiveAdapter(datasetDetails)) {
-                HiveAdapterFactory.populateConfiguration(datasetDetails.getProperties());
-            }
             // Get paths of dataset
-            String path = datasetDetails.getProperties().get(HDFSAdapterFactory.KEY_PATH);
+            String path = datasetDetails.getProperties().get(ExternalDataConstants.KEY_PATH);
             String[] paths = path.split(",");
 
             // Add fileStatuses to files
@@ -176,9 +166,9 @@ public class ExternalIndexingOperations {
                     if (fileStatuses[i].isDirectory()) {
                         listSubFiles(dataset, fs, fileStatuses[i], files);
                     } else {
-                        files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(),
-                                nextFileNumber, fileStatuses[i].getPath().toUri().getPath(), new Date(fileStatuses[i]
-                                        .getModificationTime()), fileStatuses[i].getLen(),
+                        files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), nextFileNumber,
+                                fileStatuses[i].getPath().toUri().getPath(),
+                                new Date(fileStatuses[i].getModificationTime()), fileStatuses[i].getLen(),
                                 ExternalFilePendingOp.PENDING_NO_OP));
                     }
                 }
@@ -216,14 +206,14 @@ public class ExternalIndexingOperations {
 
     public static FileSystem getFileSystemObject(Map<String, String> map) throws IOException {
         Configuration conf = new Configuration();
-        conf.set("fs.default.name", map.get(HDFSAdapterFactory.KEY_HDFS_URL).trim());
-        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
+        conf.set(ExternalDataConstants.KEY_HADOOP_FILESYSTEM_URI, map.get(ExternalDataConstants.KEY_HDFS_URL).trim());
+        conf.set(ExternalDataConstants.KEY_HADOOP_FILESYSTEM_CLASS, DistributedFileSystem.class.getName());
         return FileSystem.get(conf);
     }
 
     public static JobSpecification buildFilesIndexReplicationJobSpec(Dataset dataset,
             ArrayList<ExternalFile> externalFilesSnapshot, AqlMetadataProvider metadataProvider, boolean createIndex)
-            throws MetadataException, AlgebricksException {
+                    throws MetadataException, AlgebricksException {
         JobSpecification spec = JobSpecificationUtils.createJobSpecification();
         IAsterixPropertiesProvider asterixPropertiesProvider = AsterixAppContextInfo.getInstance();
         AsterixStorageProperties storageProperties = asterixPropertiesProvider.getStorageProperties();
@@ -232,20 +222,20 @@ public class ExternalIndexingOperations {
         ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first;
         Map<String, String> mergePolicyFactoryProperties = compactionInfo.second;
         Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider
-                .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(),
-                        dataset.getDatasetName(), getFilesIndexName(dataset.getDatasetName()), true);
+                .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), dataset.getDatasetName(),
+                        getFilesIndexName(dataset.getDatasetName()), true);
         IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
         FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
         ILocalResourceMetadata localResourceMetadata = new ExternalBTreeLocalResourceMetadata(
-                filesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS,
-                filesIndexDescription.FILES_INDEX_COMP_FACTORIES, new int[] { 0 }, false, dataset.getDatasetId(),
-                mergePolicyFactory, mergePolicyFactoryProperties);
+                filesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS, filesIndexDescription.FILES_INDEX_COMP_FACTORIES,
+                new int[] { 0 }, false, dataset.getDatasetId(), mergePolicyFactory, mergePolicyFactoryProperties);
         PersistentLocalResourceFactoryProvider localResourceFactoryProvider = new PersistentLocalResourceFactoryProvider(
                 localResourceMetadata, LocalResource.ExternalBTreeResource);
         ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory(
-                mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(
-                        dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
-                LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(),
+                mergePolicyFactory, mergePolicyFactoryProperties,
+                new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()),
+                AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE,
+                storageProperties.getBloomFilterFalsePositiveRate(),
                 ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true);
         ExternalFilesIndexOperatorDescriptor externalFilesOp = new ExternalFilesIndexOperatorDescriptor(spec,
                 AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
@@ -260,7 +250,6 @@ public class ExternalIndexingOperations {
 
     /**
      * This method create an indexing operator that index records in HDFS
-     * 
      * @param jobSpec
      * @param itemType
      * @param dataset
@@ -272,10 +261,10 @@ public class ExternalIndexingOperations {
     private static Pair<ExternalDataScanOperatorDescriptor, AlgebricksPartitionConstraint> getExternalDataIndexingOperator(
             JobSpecification jobSpec, IAType itemType, Dataset dataset, List<ExternalFile> files,
             RecordDescriptor indexerDesc, AqlMetadataProvider metadataProvider) throws Exception {
-        HDFSIndexingAdapterFactory adapterFactory = new HDFSIndexingAdapterFactory();
-        adapterFactory.setFiles(files);
-        adapterFactory.configure(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties(),
-                (ARecordType) itemType);
+        ExternalDatasetDetails externalDatasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
+        Map<String, String> configuration = externalDatasetDetails.getProperties();
+        IAdapterFactory adapterFactory = AdapterFactoryProvider.getAdapterFactory(externalDatasetDetails.getAdapter(),
+                configuration, (ARecordType) itemType, files, true);
         return new Pair<ExternalDataScanOperatorDescriptor, AlgebricksPartitionConstraint>(
                 new ExternalDataScanOperatorDescriptor(jobSpec, indexerDesc, adapterFactory),
                 adapterFactory.getPartitionConstraint());
@@ -297,7 +286,6 @@ public class ExternalIndexingOperations {
      * deleteedFiles should contain files that are no longer there in the file system
      * appendedFiles should have the new file information of existing files
      * The method should return false in case of zero delta
-     * 
      * @param dataset
      * @param metadataFiles
      * @param addedFiles
@@ -309,7 +297,7 @@ public class ExternalIndexingOperations {
      */
     public static boolean isDatasetUptodate(Dataset dataset, List<ExternalFile> metadataFiles,
             List<ExternalFile> addedFiles, List<ExternalFile> deletedFiles, List<ExternalFile> appendedFiles)
-            throws MetadataException, AlgebricksException {
+                    throws MetadataException, AlgebricksException {
         boolean uptodate = true;
         int newFileNumber = metadataFiles.get(metadataFiles.size() - 1).getFileNumber() + 1;
 
@@ -340,9 +328,10 @@ public class ExternalIndexingOperations {
                     } else {
                         // Same file name, Different file mod date -> delete and add
                         metadataFile.setPendingOp(ExternalFilePendingOp.PENDING_DROP_OP);
-                        deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile
-                                .getDatasetName(), 0, metadataFile.getFileName(), metadataFile.getLastModefiedTime(),
-                                metadataFile.getSize(), ExternalFilePendingOp.PENDING_DROP_OP));
+                        deletedFiles
+                                .add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(), 0,
+                                        metadataFile.getFileName(), metadataFile.getLastModefiedTime(),
+                                        metadataFile.getSize(), ExternalFilePendingOp.PENDING_DROP_OP));
                         fileSystemFile.setPendingOp(ExternalFilePendingOp.PENDING_ADD_OP);
                         fileSystemFile.setFileNumber(newFileNumber);
                         addedFiles.add(fileSystemFile);
@@ -382,8 +371,8 @@ public class ExternalIndexingOperations {
             if (metadataFile.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP) {
                 metadataFile.setPendingOp(ExternalFilePendingOp.PENDING_DROP_OP);
                 deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(),
-                        newFileNumber, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), metadataFile
-                                .getSize(), metadataFile.getPendingOp()));
+                        newFileNumber, metadataFile.getFileName(), metadataFile.getLastModefiedTime(),
+                        metadataFile.getSize(), metadataFile.getPendingOp()));
                 newFileNumber++;
                 uptodate = false;
             }
@@ -421,13 +410,14 @@ public class ExternalIndexingOperations {
                 metadataProvider.getMetadataTxnContext());
         IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec,
                 AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
-                splitsAndConstraint.first, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(
-                        dataset.getDatasetId()), compactionInfo.first, compactionInfo.second,
+                splitsAndConstraint.first,
+                new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()),
+                        compactionInfo.first, compactionInfo.second,
                         new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()),
                         AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE,
                         storageProperties.getBloomFilterFalsePositiveRate(), false, null, null, null, null, !temp));
-        AlgebricksPartitionConstraintHelper
-                .setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second);
+        AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop,
+                splitsAndConstraint.second);
         spec.addRoot(btreeDrop);
 
         return spec;
@@ -443,9 +433,9 @@ public class ExternalIndexingOperations {
             else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_APPEND_OP) {
                 for (ExternalFile appendedFile : appendedFiles) {
                     if (appendedFile.getFileName().equals(file.getFileName())) {
-                        files.add(new ExternalFile(file.getDataverseName(), file.getDatasetName(),
-                                file.getFileNumber(), file.getFileName(), file.getLastModefiedTime(), appendedFile
-                                        .getSize(), ExternalFilePendingOp.PENDING_NO_OP));
+                        files.add(new ExternalFile(file.getDataverseName(), file.getDatasetName(), file.getFileNumber(),
+                                file.getFileName(), file.getLastModefiedTime(), appendedFile.getSize(),
+                                ExternalFilePendingOp.PENDING_NO_OP));
                     }
                 }
             }
@@ -557,8 +547,7 @@ public class ExternalIndexingOperations {
             AsterixStorageProperties storageProperties, JobSpecification spec) {
         return new ExternalBTreeWithBuddyDataflowHelperFactory(mergePolicyFactory, mergePolicyFactoryProperties,
                 new SecondaryIndexOperationTrackerProvider(ds.getDatasetId()),
-                AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
-                LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE,
+                AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE,
                 storageProperties.getBloomFilterFalsePositiveRate(), new int[] { index.getKeyFieldNames().size() },
                 ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(ds), true);
     }
@@ -567,7 +556,7 @@ public class ExternalIndexingOperations {
     private static ExternalRTreeDataflowHelperFactory getRTreeDataflowHelperFactory(Dataset ds, Index index,
             ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyFactoryProperties,
             AsterixStorageProperties storageProperties, AqlMetadataProvider metadataProvider, JobSpecification spec)
-            throws AlgebricksException, AsterixException {
+                    throws AlgebricksException, AsterixException {
         int numPrimaryKeys = getRIDSize(ds);
         List<List<String>> secondaryKeyFields = index.getKeyFieldNames();
         secondaryKeyFields.size();
@@ -594,8 +583,8 @@ public class ExternalIndexingOperations {
                     .getSerializerDeserializer(nestedKeyType);
             secondaryRecFields[i] = keySerde;
 
-            secondaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(
-                    nestedKeyType, true);
+            secondaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE
+                    .getBinaryComparatorFactory(nestedKeyType, true);
             secondaryTypeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(nestedKeyType);
             valueProviderFactories[i] = AqlPrimitiveValueProviderFactory.INSTANCE;
         }
@@ -743,13 +732,14 @@ public class ExternalIndexingOperations {
         ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first;
         Map<String, String> mergePolicyFactoryProperties = compactionInfo.second;
         Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider
-                .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(),
-                        dataset.getDatasetName(), getFilesIndexName(dataset.getDatasetName()), true);
+                .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), dataset.getDatasetName(),
+                        getFilesIndexName(dataset.getDatasetName()), true);
         IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
         ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory(
-                mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(
-                        dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
-                LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(),
+                mergePolicyFactory, mergePolicyFactoryProperties,
+                new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()),
+                AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE,
+                storageProperties.getBloomFilterFalsePositiveRate(),
                 ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true);
         FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
         LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/main/java/org/apache/asterix/file/FeedOperations.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/main/java/org/apache/asterix/file/FeedOperations.java b/asterix-app/src/main/java/org/apache/asterix/file/FeedOperations.java
index c7bb0e3..cb55c5f 100644
--- a/asterix-app/src/main/java/org/apache/asterix/file/FeedOperations.java
+++ b/asterix-app/src/main/java/org/apache/asterix/file/FeedOperations.java
@@ -33,7 +33,7 @@ import org.apache.asterix.common.feeds.api.IFeedMessage;
 import org.apache.asterix.common.feeds.api.IFeedRuntime.FeedRuntimeType;
 import org.apache.asterix.common.feeds.message.EndFeedMessage;
 import org.apache.asterix.common.feeds.message.ThrottlingEnabledFeedMessage;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.feeds.FeedLifecycleListener;
 import org.apache.asterix.metadata.declared.AqlMetadataProvider;
 import org.apache.asterix.metadata.entities.PrimaryFeed;
@@ -58,23 +58,22 @@ public class FeedOperations {
 
     /**
      * Builds the job spec for ingesting a (primary) feed from its external source via the feed adaptor.
-     * 
      * @param primaryFeed
      * @param metadataProvider
      * @return JobSpecification the Hyracks job specification for receiving data from external source
      * @throws Exception
      */
-    public static Pair<JobSpecification, IFeedAdapterFactory> buildFeedIntakeJobSpec(PrimaryFeed primaryFeed,
+    public static Pair<JobSpecification, IAdapterFactory> buildFeedIntakeJobSpec(PrimaryFeed primaryFeed,
             AqlMetadataProvider metadataProvider, FeedPolicyAccessor policyAccessor) throws Exception {
 
         JobSpecification spec = JobSpecificationUtils.createJobSpecification();
         spec.setFrameSize(FeedConstants.JobConstants.DEFAULT_FRAME_SIZE);
-        IFeedAdapterFactory adapterFactory = null;
+        IAdapterFactory adapterFactory = null;
         IOperatorDescriptor feedIngestor;
         AlgebricksPartitionConstraint ingesterPc;
 
         try {
-            Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IFeedAdapterFactory> t = metadataProvider
+            Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IAdapterFactory> t = metadataProvider
                     .buildFeedIntakeRuntime(spec, primaryFeed, policyAccessor);
             feedIngestor = t.first;
             ingesterPc = t.second;
@@ -90,7 +89,7 @@ public class FeedOperations {
         AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, nullSink, ingesterPc);
         spec.connect(new OneToOneConnectorDescriptor(spec), feedIngestor, 0, nullSink, 0);
         spec.addRoot(nullSink);
-        return new Pair<JobSpecification, IFeedAdapterFactory>(spec, adapterFactory);
+        return new Pair<JobSpecification, IAdapterFactory>(spec, adapterFactory);
     }
 
     public static JobSpecification buildDiscontinueFeedSourceSpec(AqlMetadataProvider metadataProvider, FeedId feedId)
@@ -247,7 +246,7 @@ public class FeedOperations {
     private static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDisconnectFeedMessengerRuntime(
             JobSpecification jobSpec, FeedConnectionId feedConenctionId, List<String> locations,
             FeedRuntimeType sourceFeedRuntimeType, boolean completeDisconnection, FeedId sourceFeedId)
-            throws AlgebricksException {
+                    throws AlgebricksException {
         IFeedMessage feedMessage = new EndFeedMessage(feedConenctionId, sourceFeedRuntimeType, sourceFeedId,
                 completeDisconnection, EndFeedMessage.EndMessageType.DISCONNECT_FEED);
         return buildSendFeedMessageRuntime(jobSpec, feedConenctionId, feedMessage, locations);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java
index 4eb6944..44af0ff 100644
--- a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java
+++ b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java
@@ -38,7 +38,7 @@ import org.apache.asterix.common.transactions.IRecoveryManager.ResourceType;
 import org.apache.asterix.common.transactions.JobId;
 import org.apache.asterix.external.indexing.ExternalFile;
 import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.operators.ExternalIndexBulkModifyOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalIndexBulkModifyOperatorDescriptor;
 import org.apache.asterix.formats.nontagged.AqlBinaryBooleanInspectorImpl;
 import org.apache.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/ExternalLibraryBootstrap.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/ExternalLibraryBootstrap.java b/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/ExternalLibraryBootstrap.java
index 267be3d..01775ab 100755
--- a/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/ExternalLibraryBootstrap.java
+++ b/asterix-app/src/main/java/org/apache/asterix/hyracks/bootstrap/ExternalLibraryBootstrap.java
@@ -46,6 +46,7 @@ import org.apache.asterix.metadata.api.IMetadataEntity;
 import org.apache.asterix.metadata.entities.DatasourceAdapter;
 import org.apache.asterix.metadata.entities.DatasourceAdapter.AdapterType;
 import org.apache.asterix.metadata.entities.Dataverse;
+import org.apache.asterix.metadata.entities.Library;
 import org.apache.asterix.metadata.feeds.AdapterIdentifier;
 import org.apache.asterix.runtime.formats.NonTaggedDataFormat;
 
@@ -221,8 +222,7 @@ public class ExternalLibraryBootstrap {
                 LOGGER.info("Installed adapters contain in library :" + libraryName);
             }
 
-            MetadataManager.INSTANCE.addLibrary(mdTxnCtx,
-                    new org.apache.asterix.metadata.entities.Library(dataverse, libraryName));
+            MetadataManager.INSTANCE.addLibrary(mdTxnCtx, new Library(dataverse, libraryName));
 
             if (LOGGER.isLoggable(Level.INFO)) {
                 LOGGER.info("Added library " + libraryName + "to Metadata");

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java b/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java
index 2f8a910..9eee818 100644
--- a/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java
+++ b/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java
@@ -34,7 +34,7 @@ import org.apache.asterix.common.config.GlobalConfig;
 import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.compiler.provider.AqlCompilationProvider;
 import org.apache.asterix.compiler.provider.ILangCompilationProvider;
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.IdentitiyResolverFactory;
 import org.apache.asterix.test.base.AsterixTestHelper;
 import org.apache.asterix.test.common.TestHelper;
@@ -80,7 +80,7 @@ public class OptimizerTest {
         AsterixHyracksIntegrationUtil.init(true);
         // Set the node resolver to be the identity resolver that expects node names
         // to be node controller ids; a valid assumption in test environment.
-        System.setProperty(FileSystemBasedAdapter.NODE_RESOLVER_FACTORY_PROPERTY,
+        System.setProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY,
                 IdentitiyResolverFactory.class.getName());
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTest.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTest.java b/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTest.java
index 7a55c90..922486f 100644
--- a/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTest.java
+++ b/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTest.java
@@ -23,18 +23,14 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.logging.Logger;
 
-import org.apache.asterix.api.common.AsterixHyracksIntegrationUtil;
 import org.apache.asterix.common.config.AsterixTransactionProperties;
 import org.apache.asterix.test.aql.TestExecutor;
 import org.apache.asterix.testframework.context.TestCaseContext;
 import org.apache.asterix.testframework.xml.TestGroup;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.hyracks.api.lifecycle.ILifeCycleComponent;
-import org.apache.hyracks.api.lifecycle.ILifeCycleComponentManager;
-import org.apache.hyracks.control.nc.NodeControllerService;
-import org.apache.hyracks.control.nc.application.NCApplicationContext;
-import org.apache.hyracks.storage.common.buffercache.BufferCache;
-import org.junit.*;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameters;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTestUtil.java
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTestUtil.java b/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTestUtil.java
index b072e55..5e76ecb 100644
--- a/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTestUtil.java
+++ b/asterix-app/src/test/java/org/apache/asterix/test/runtime/ExecutionTestUtil.java
@@ -26,7 +26,7 @@ import java.util.logging.Logger;
 import org.apache.asterix.api.common.AsterixHyracksIntegrationUtil;
 import org.apache.asterix.common.api.IAsterixAppRuntimeContext;
 import org.apache.asterix.common.config.GlobalConfig;
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.IdentitiyResolverFactory;
 import org.apache.asterix.testframework.xml.TestGroup;
 import org.apache.asterix.testframework.xml.TestSuite;
@@ -64,7 +64,7 @@ public class ExecutionTestUtil {
         // Set the node resolver to be the identity resolver that expects node
         // names
         // to be node controller ids; a valid assumption in test environment.
-        System.setProperty(FileSystemBasedAdapter.NODE_RESOLVER_FACTORY_PROPERTY,
+        System.setProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY,
                 IdentitiyResolverFactory.class.getName());
 
         FailedGroup = new TestGroup();

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-app/src/test/resources/metadata/results/basic/meta15/meta15.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/metadata/results/basic/meta15/meta15.1.adm b/asterix-app/src/test/resources/metadata/results/basic/meta15/meta15.1.adm
index 8a23d78..c4dde05 100644
--- a/asterix-app/src/test/resources/metadata/results/basic/meta15/meta15.1.adm
+++ b/asterix-app/src/test/resources/metadata/results/basic/meta15/meta15.1.adm
@@ -1,12 +1,3 @@
-{ "DataverseName": "Metadata", "Name": "azure_twitter", "Classname": "org.apache.asterix.external.adapter.factory.PullBasedAzureTwitterAdapterFactory", "Type": "INTERNAL", "Timestamp": "Thu Oct 24 01:39:27 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "cnn_feed", "Classname": "org.apache.asterix.external.adapter.factory.CNNFeedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "file_feed", "Classname": "org.apache.asterix.tools.external.data.RateControlledFileSystemBasedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "hdfs", "Classname": "org.apache.asterix.external.adapter.factory.HDFSAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "hive", "Classname": "org.apache.asterix.external.adapter.factory.HiveAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "localfs", "Classname": "org.apache.asterix.external.adapter.factory.NCFileSystemAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "pull_twitter", "Classname": "org.apache.asterix.external.adapter.factory.PullBasedTwitterAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "push_twitter", "Classname": "org.apache.asterix.external.adapter.factory.PushBasedTwitterAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "rss_feed", "Classname": "org.apache.asterix.external.adapter.factory.RSSFeedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "socket_adapter", "Classname": "org.apache.asterix.tools.external.data.GenericSocketFeedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
-{ "DataverseName": "Metadata", "Name": "socket_client", "Classname": "org.apache.asterix.tools.external.data.SocketClientAdapterFactory", "Type": "INTERNAL", "Timestamp": "Wed Nov 20 14:45:58 IST 2013" }
-{ "DataverseName": "Metadata", "Name": "twitter_firehose", "Classname": "org.apache.asterix.tools.external.data.TwitterFirehoseFeedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Tue Jul 16 22:38:45 PDT 2013" }
+{ "DataverseName": "Metadata", "Name": "adapter", "Classname": "org.apache.asterix.external.adapter.factory.GenericAdapterFactory", "Type": "INTERNAL", "Timestamp": "Sun Jan 03 15:39:35 AST 2016" }
+{ "DataverseName": "Metadata", "Name": "socket_adapter", "Classname": "org.apache.asterix.external.runtime.GenericSocketFeedAdapterFactory", "Type": "INTERNAL", "Timestamp": "Sun Jan 03 15:39:35 AST 2016" }
+{ "DataverseName": "Metadata", "Name": "socket_client", "Classname": "org.apache.asterix.external.runtime.SocketClientAdapterFactory", "Type": "INTERNAL", "Timestamp": "Sun Jan 03 15:39:35 AST 2016" }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/api/IAsterixAppRuntimeContext.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/api/IAsterixAppRuntimeContext.java b/asterix-common/src/main/java/org/apache/asterix/common/api/IAsterixAppRuntimeContext.java
index cd829e7..b8c3f2f 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/api/IAsterixAppRuntimeContext.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/api/IAsterixAppRuntimeContext.java
@@ -40,7 +40,6 @@ import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 import org.apache.hyracks.storage.common.file.ILocalResourceRepository;
 import org.apache.hyracks.storage.common.file.IResourceIdFactory;
-import org.apache.hyracks.storage.common.file.ResourceIdFactory;
 
 public interface IAsterixAppRuntimeContext {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/BasicMonitoredBuffer.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/BasicMonitoredBuffer.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/BasicMonitoredBuffer.java
index c5594db..70833fc 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/BasicMonitoredBuffer.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/BasicMonitoredBuffer.java
@@ -21,6 +21,8 @@ package org.apache.asterix.common.feeds;
 import org.apache.asterix.common.feeds.api.IExceptionHandler;
 import org.apache.asterix.common.feeds.api.IFeedMetricCollector;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback;
+import org.apache.asterix.common.feeds.api.IFramePostProcessor;
+import org.apache.asterix.common.feeds.api.IFramePreprocessor;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/ComputeSideMonitoredBuffer.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/ComputeSideMonitoredBuffer.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/ComputeSideMonitoredBuffer.java
index 4ae288d..7ec3fdf 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/ComputeSideMonitoredBuffer.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/ComputeSideMonitoredBuffer.java
@@ -21,6 +21,8 @@ package org.apache.asterix.common.feeds;
 import org.apache.asterix.common.feeds.api.IExceptionHandler;
 import org.apache.asterix.common.feeds.api.IFeedMetricCollector;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback;
+import org.apache.asterix.common.feeds.api.IFramePostProcessor;
+import org.apache.asterix.common.feeds.api.IFramePreprocessor;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePostProcessor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePostProcessor.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePostProcessor.java
deleted file mode 100644
index 1dfbee9..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePostProcessor.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.feeds;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-
-public interface IFramePostProcessor {
-
-    public void postProcessFrame(ByteBuffer frame, FrameTupleAccessor frameAccessor);
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePreprocessor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePreprocessor.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePreprocessor.java
deleted file mode 100644
index f602656..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IFramePreprocessor.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.feeds;
-
-import java.nio.ByteBuffer;
-
-public interface IFramePreprocessor {
-
-    public void preProcess(ByteBuffer frame) throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/IntakeSideMonitoredBuffer.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IntakeSideMonitoredBuffer.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/IntakeSideMonitoredBuffer.java
index ed1e943..10b7ddb 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/IntakeSideMonitoredBuffer.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/IntakeSideMonitoredBuffer.java
@@ -21,6 +21,8 @@ package org.apache.asterix.common.feeds;
 import org.apache.asterix.common.feeds.api.IExceptionHandler;
 import org.apache.asterix.common.feeds.api.IFeedMetricCollector;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback;
+import org.apache.asterix.common.feeds.api.IFramePostProcessor;
+import org.apache.asterix.common.feeds.api.IFramePreprocessor;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/MonitoredBuffer.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/MonitoredBuffer.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/MonitoredBuffer.java
index 5761944..e5a22b5 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/MonitoredBuffer.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/MonitoredBuffer.java
@@ -35,6 +35,8 @@ import org.apache.asterix.common.feeds.api.IFeedMetricCollector.ValueType;
 import org.apache.asterix.common.feeds.api.IFeedRuntime.Mode;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback.FrameEvent;
+import org.apache.asterix.common.feeds.api.IFramePostProcessor;
+import org.apache.asterix.common.feeds.api.IFramePreprocessor;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/StorageSideMonitoredBuffer.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/StorageSideMonitoredBuffer.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/StorageSideMonitoredBuffer.java
index d545b09..4027237 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/StorageSideMonitoredBuffer.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/StorageSideMonitoredBuffer.java
@@ -24,6 +24,8 @@ import org.apache.asterix.common.feeds.FeedConstants.StatisticsConstants;
 import org.apache.asterix.common.feeds.api.IExceptionHandler;
 import org.apache.asterix.common.feeds.api.IFeedMetricCollector;
 import org.apache.asterix.common.feeds.api.IFrameEventCallback;
+import org.apache.asterix.common.feeds.api.IFramePostProcessor;
+import org.apache.asterix.common.feeds.api.IFramePreprocessor;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IAdapterRuntimeManager.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IAdapterRuntimeManager.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IAdapterRuntimeManager.java
index 86e0a73..2eb6caa 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IAdapterRuntimeManager.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IAdapterRuntimeManager.java
@@ -45,14 +45,12 @@ public interface IAdapterRuntimeManager {
 
     /**
      * Start feed ingestion
-     * 
      * @throws Exception
      */
     public void start() throws Exception;
 
     /**
      * Stop feed ingestion.
-     * 
      * @throws Exception
      */
     public void stop() throws Exception;
@@ -65,7 +63,7 @@ public interface IAdapterRuntimeManager {
     /**
      * @return the instance of the feed adapter (an implementation of {@code IFeedAdapter}) in use.
      */
-    public IFeedAdapter getFeedAdapter();
+    public IDataSourceAdapter getFeedAdapter();
 
     /**
      * @return state associated with the AdapterRuntimeManager. See {@code State}.

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDataSourceAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDataSourceAdapter.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDataSourceAdapter.java
new file mode 100644
index 0000000..9dd4e76
--- /dev/null
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDataSourceAdapter.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.common.feeds.api;
+
+import java.io.Serializable;
+
+import org.apache.hyracks.api.comm.IFrameWriter;
+
+/**
+ * A super interface implemented by a data source adapter. An adapter can be a
+ * pull based or push based. This interface provides all common APIs that need
+ * to be implemented by each adapter irrespective of the the kind of
+ * adapter(pull or push).
+ */
+public interface IDataSourceAdapter extends Serializable {
+
+    /**
+     * Triggers the adapter to begin ingesting data from the external source.
+     * 
+     * @param partition
+     *            The adapter could be running with a degree of parallelism.
+     *            partition corresponds to the i'th parallel instance.
+     * @param writer
+     *            The instance of frame writer that is used by the adapter to
+     *            write frame to. Adapter packs the fetched bytes (from external source),
+     *            packs them into frames and forwards the frames to an upstream receiving
+     *            operator using the instance of IFrameWriter.
+     * @throws Exception
+     */
+    public void start(int partition, IFrameWriter writer) throws Exception;
+
+    /**
+     * Discontinue the ingestion of data.
+     *
+     * @throws Exception
+     */
+    public boolean stop() throws Exception;
+
+    /**
+     * @param e
+     * @return true if the ingestion should continue post the exception else false
+     */
+    public boolean handleException(Throwable e);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDatasourceAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDatasourceAdapter.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDatasourceAdapter.java
deleted file mode 100644
index c1ee0f7..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IDatasourceAdapter.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.feeds.api;
-
-import java.io.Serializable;
-
-import org.apache.hyracks.api.comm.IFrameWriter;
-
-/**
- * A super interface implemented by a data source adapter. An adapter can be a
- * pull based or push based. This interface provides all common APIs that need
- * to be implemented by each adapter irrespective of the the kind of
- * adapter(pull or push).
- */
-public interface IDatasourceAdapter extends Serializable {
-
-    /**
-     * Triggers the adapter to begin ingesting data from the external source.
-     * 
-     * @param partition
-     *            The adapter could be running with a degree of parallelism.
-     *            partition corresponds to the i'th parallel instance.
-     * @param writer
-     *            The instance of frame writer that is used by the adapter to
-     *            write frame to. Adapter packs the fetched bytes (from external source),
-     *            packs them into frames and forwards the frames to an upstream receiving
-     *            operator using the instance of IFrameWriter.
-     * @throws Exception
-     */
-    public void start(int partition, IFrameWriter writer) throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFeedAdapter.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFeedAdapter.java
deleted file mode 100644
index 2307285..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFeedAdapter.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.feeds.api;
-
-/**
- * Interface implemented by a feed adapter.
- */
-public interface IFeedAdapter extends IDatasourceAdapter {
-
-    public enum DataExchangeMode {
-        /**
-         * PULL model requires the adaptor to make a separate request each time to receive data
-         **/
-        PULL,
-
-        /**
-         * PUSH mode involves the use o just one initial request (handshake) by the adaptor
-         * to the datasource for setting up the connection and providing any protocol-specific
-         * parameters. Once a connection is established, the data source "pushes" data to the adaptor.
-         **/
-        PUSH
-    }
-
-    /**
-     * Returns the data exchange mode (PULL/PUSH) associated with the flow.
-     * 
-     * @return
-     */
-    public DataExchangeMode getDataExchangeMode();
-
-    /**
-     * Discontinue the ingestion of data and end the feed.
-     * 
-     * @throws Exception
-     */
-    public void stop() throws Exception;
-
-    /**
-     * @param e
-     * @return true if the feed ingestion should continue post the exception else false
-     */
-    public boolean handleException(Exception e);
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePostProcessor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePostProcessor.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePostProcessor.java
new file mode 100644
index 0000000..ed74037
--- /dev/null
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePostProcessor.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.common.feeds.api;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+
+public interface IFramePostProcessor {
+
+    public void postProcessFrame(ByteBuffer frame, FrameTupleAccessor frameAccessor);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePreprocessor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePreprocessor.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePreprocessor.java
new file mode 100644
index 0000000..59a6c97
--- /dev/null
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/IFramePreprocessor.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.common.feeds.api;
+
+import java.nio.ByteBuffer;
+
+public interface IFramePreprocessor {
+
+    public void preProcess(ByteBuffer frame) throws Exception;
+}



[13/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalLoopkupOperatorDiscriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalLoopkupOperatorDiscriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalLoopkupOperatorDiscriptor.java
deleted file mode 100644
index ca2e7ca..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalLoopkupOperatorDiscriptor.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.external.adapter.factory.IControlledAdapterFactory;
-import org.apache.asterix.external.dataset.adapter.IControlledAdapter;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.asterix.external.indexing.FilesIndexDescription;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
-import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelper;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-
-/*
- * This operator is intended for using record ids to access data in external sources
- */
-public class ExternalLoopkupOperatorDiscriptor extends AbstractTreeIndexOperatorDescriptor {
-    private static final long serialVersionUID = 1L;
-    private final IControlledAdapterFactory adapterFactory;
-    private final INullWriterFactory iNullWriterFactory;
-
-    public ExternalLoopkupOperatorDiscriptor(IOperatorDescriptorRegistry spec, IControlledAdapterFactory adapterFactory,
-            RecordDescriptor outRecDesc, ExternalBTreeDataflowHelperFactory externalFilesIndexDataFlowHelperFactory,
-            boolean propagateInput, IIndexLifecycleManagerProvider lcManagerProvider,
-            IStorageManagerInterface storageManager, IFileSplitProvider fileSplitProvider, int datasetId,
-            double bloomFilterFalsePositiveRate, ISearchOperationCallbackFactory searchOpCallbackFactory,
-            boolean retainNull, INullWriterFactory iNullWriterFactory) {
-        super(spec, 1, 1, outRecDesc, storageManager, lcManagerProvider, fileSplitProvider,
-                new FilesIndexDescription().EXTERNAL_FILE_INDEX_TYPE_TRAITS,
-                new FilesIndexDescription().FILES_INDEX_COMP_FACTORIES, FilesIndexDescription.BLOOM_FILTER_FIELDS,
-                externalFilesIndexDataFlowHelperFactory, null, propagateInput, retainNull, iNullWriterFactory, null,
-                searchOpCallbackFactory, null);
-        this.adapterFactory = adapterFactory;
-        this.iNullWriterFactory = iNullWriterFactory;
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
-            final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
-                    throws HyracksDataException {
-        // Create a file index accessor to be used for files lookup operations
-        // Note that all file index accessors will use partition 0 since we only have 1 files index per NC 
-        final ExternalFileIndexAccessor fileIndexAccessor = new ExternalFileIndexAccessor(
-                (ExternalBTreeDataflowHelper) dataflowHelperFactory.createIndexDataflowHelper(this, ctx, partition),
-                this);
-        return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
-            // The adapter that uses the file index along with the coming tuples to access files in HDFS
-            private final IControlledAdapter adapter = adapterFactory.createAdapter(ctx, fileIndexAccessor,
-                    recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
-            private boolean indexOpen = false;
-            private boolean writerOpen = false;
-
-            @Override
-            public void open() throws HyracksDataException {
-                //Open the file index accessor here
-                fileIndexAccessor.openIndex();
-                indexOpen = true;
-                try {
-                    adapter.initialize(ctx, iNullWriterFactory);
-                } catch (Throwable th) {
-                    // close the files index
-                    fileIndexAccessor.closeIndex();
-                    throw new HyracksDataException(th);
-                }
-                writerOpen = true;
-                writer.open();
-            }
-
-            @Override
-            public void close() throws HyracksDataException {
-                try {
-                    adapter.close(writer);
-                } catch (Throwable th) {
-                    throw new HyracksDataException(th);
-                } finally {
-                    try {
-                        if (indexOpen) {
-                            //close the file index
-                            fileIndexAccessor.closeIndex();
-                        }
-                    } finally {
-                        if (writerOpen) {
-                            writer.close();
-                        }
-                    }
-                }
-            }
-
-            @Override
-            public void fail() throws HyracksDataException {
-                try {
-                    adapter.fail();
-                } catch (Throwable th) {
-                    throw new HyracksDataException(th);
-                } finally {
-                    if (writerOpen) {
-                        writer.fail();
-                    }
-                }
-            }
-
-            @Override
-            public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
-                try {
-                    adapter.nextFrame(buffer, writer);
-                } catch (Throwable th) {
-                    throw new HyracksDataException(th);
-                }
-            }
-        };
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/IndexInfoOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/IndexInfoOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/IndexInfoOperatorDescriptor.java
deleted file mode 100644
index 6f367d2..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/IndexInfoOperatorDescriptor.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.ActivityId;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
-import org.apache.hyracks.storage.am.common.api.ITupleFilterFactory;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider;
-
-/*
- * This is a hack used to optain multiple index instances in a single operator and it is not actually used as an operator
- */
-public class IndexInfoOperatorDescriptor implements IIndexOperatorDescriptor{
-
-    private static final long serialVersionUID = 1L;
-    private final IFileSplitProvider fileSplitProvider;
-    private final IStorageManagerInterface storageManager;
-    private final IIndexLifecycleManagerProvider lifecycleManagerProvider;
-    public IndexInfoOperatorDescriptor(IFileSplitProvider fileSplitProvider,IStorageManagerInterface storageManager,
-            IIndexLifecycleManagerProvider lifecycleManagerProvider){
-        this.fileSplitProvider = fileSplitProvider;
-        this.lifecycleManagerProvider = lifecycleManagerProvider;
-        this.storageManager = storageManager;
-        
-    }
-
-    @Override
-    public ActivityId getActivityId() {
-        return null;
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
-        return null;
-    }
-
-    @Override
-    public IFileSplitProvider getFileSplitProvider() {
-        return fileSplitProvider;
-    }
-
-    @Override
-    public IStorageManagerInterface getStorageManager() {
-        return storageManager;
-    }
-
-    @Override
-    public IIndexLifecycleManagerProvider getLifecycleManagerProvider() {
-        return lifecycleManagerProvider;
-    }
-
-    @Override
-    public RecordDescriptor getRecordDescriptor() {
-        return null;
-    }
-
-    @Override
-    public IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
-        return null;
-    }
-
-    @Override
-    public boolean getRetainInput() {
-        return false;
-    }
-
-    @Override
-    public ISearchOperationCallbackFactory getSearchOpCallbackFactory() {
-        return null;
-    }
-
-    @Override
-    public IModificationOperationCallbackFactory getModificationOpCallbackFactory() {
-        return null;
-    }
-
-    @Override
-    public ITupleFilterFactory getTupleFilterFactory() {
-        return null;
-    }
-
-    @Override
-    public ILocalResourceFactoryProvider getLocalResourceFactoryProvider() {
-        return null;
-    }
-
-    @Override
-    public boolean getRetainNull() {
-        return false;
-    }
-
-    @Override
-    public INullWriterFactory getNullWriterFactory() {
-        return null;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
new file mode 100644
index 0000000..7e9fdcb
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IIndexibleExternalDataSource;
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.indexing.IndexingScheduler;
+import org.apache.asterix.external.input.record.reader.HDFSRecordReader;
+import org.apache.asterix.external.input.stream.HDFSInputStreamProvider;
+import org.apache.asterix.external.provider.ExternalIndexerProvider;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.hdfs.dataflow.ConfFactory;
+import org.apache.hyracks.hdfs.dataflow.InputSplitsFactory;
+import org.apache.hyracks.hdfs.scheduler.Scheduler;
+
+public class HDFSDataSourceFactory
+        implements IInputStreamProviderFactory, IRecordReaderFactory<Object>, IIndexibleExternalDataSource {
+
+    protected static final long serialVersionUID = 1L;
+    protected transient AlgebricksPartitionConstraint clusterLocations;
+    protected String[] readSchedule;
+    protected boolean read[];
+    protected InputSplitsFactory inputSplitsFactory;
+    protected ConfFactory confFactory;
+    protected boolean configured = false;
+    protected static Scheduler hdfsScheduler;
+    protected static IndexingScheduler indexingScheduler;
+    protected static Boolean initialized = false;
+    protected List<ExternalFile> files;
+    protected Map<String, String> configuration;
+    protected Class<?> recordClass;
+    protected boolean indexingOp = false;
+    private JobConf conf;
+    private InputSplit[] inputSplits;
+    private String nodeName;
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        if (!HDFSDataSourceFactory.initialized) {
+            HDFSDataSourceFactory.initialize();
+        }
+        this.configuration = configuration;
+        JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
+        confFactory = new ConfFactory(conf);
+        clusterLocations = getPartitionConstraint();
+        int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
+        // if files list was set, we restrict the splits to the list
+        InputSplit[] inputSplits;
+        if (files == null) {
+            inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
+        } else {
+            inputSplits = HDFSUtils.getSplits(conf, files);
+        }
+        if (indexingOp) {
+            readSchedule = indexingScheduler.getLocationConstraints(inputSplits);
+        } else {
+            readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
+        }
+        inputSplitsFactory = new InputSplitsFactory(inputSplits);
+        read = new boolean[readSchedule.length];
+        Arrays.fill(read, false);
+        if (!ExternalDataUtils.isDataSourceStreamProvider(configuration)) {
+            RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL);
+            this.recordClass = reader.createValue().getClass();
+            reader.close();
+        }
+    }
+
+    // Used to tell the factory to restrict the splits to the intersection between this list and the actual files on hdfs side
+    @Override
+    public void setSnapshot(List<ExternalFile> files, boolean indexingOp) {
+        this.files = files;
+        this.indexingOp = indexingOp;
+    }
+
+    /*
+     * The method below was modified to take care of the following
+     * 1. when target files are not null, it generates a file aware input stream that validate against the files
+     * 2. if the data is binary, it returns a generic reader
+     */
+    @Override
+    public IInputStreamProvider createInputStreamProvider(IHyracksTaskContext ctx, int partition)
+            throws HyracksDataException {
+        try {
+            if (!configured) {
+                conf = confFactory.getConf();
+                inputSplits = inputSplitsFactory.getSplits();
+                nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
+                configured = true;
+            }
+            return new HDFSInputStreamProvider<Object>(read, inputSplits, readSchedule, nodeName, conf, configuration,
+                    files);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    /**
+     * Get the cluster locations for this input stream factory. This method specifies on which asterix nodes the
+     * external
+     * adapter will run and how many threads per node.
+     * @return
+     */
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() {
+        clusterLocations = HDFSUtils.getPartitionConstraints(clusterLocations);
+        return clusterLocations;
+    }
+
+    /**
+     * This method initialize the scheduler which assigns responsibility of reading different logical input splits from
+     * HDFS
+     */
+    private static void initialize() {
+        synchronized (initialized) {
+            if (!initialized) {
+                hdfsScheduler = HDFSUtils.initializeHDFSScheduler();
+                indexingScheduler = HDFSUtils.initializeIndexingHDFSScheduler();
+                initialized = true;
+            }
+        }
+    }
+
+    public JobConf getJobConf() throws HyracksDataException {
+        return confFactory.getConf();
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return (ExternalDataUtils.isDataSourceStreamProvider(configuration)) ? DataSourceType.STREAM
+                : DataSourceType.RECORDS;
+    }
+
+    @Override
+    public IRecordReader<? extends Writable> createRecordReader(IHyracksTaskContext ctx, int partition)
+            throws Exception {
+        JobConf conf = confFactory.getConf();
+        InputSplit[] inputSplits = inputSplitsFactory.getSplits();
+        String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
+        HDFSRecordReader<Object, Writable> recordReader = new HDFSRecordReader<Object, Writable>(read, inputSplits,
+                readSchedule, nodeName, conf);
+        if (files != null) {
+            recordReader.setSnapshot(files);
+            recordReader.setIndexer(ExternalIndexerProvider.getIndexer(configuration));
+        }
+        recordReader.configure(configuration);
+        return recordReader;
+    }
+
+    @Override
+    public Class<?> getRecordClass() {
+        return recordClass;
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return true;
+    }
+
+    @Override
+    public boolean isIndexingOp() {
+        return (files != null && indexingOp);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
new file mode 100644
index 0000000..fd5c397
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record;
+
+import java.util.Arrays;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.util.ExternalDataConstants;
+
+public class CharArrayRecord implements IRawRecord<char[]> {
+
+    private char[] value;
+    private int size;
+
+    @Override
+    public byte[] getBytes() {
+        return new String(value).getBytes();
+    }
+
+    @Override
+    public char[] get() {
+        return value;
+    }
+
+    @Override
+    public int size() {
+        return size;
+    }
+
+    public CharArrayRecord(int initialCapacity) {
+        value = new char[initialCapacity];
+        size = 0;
+    }
+
+    public CharArrayRecord() {
+        value = new char[ExternalDataConstants.DEFAULT_BUFFER_SIZE];
+        size = 0;
+    }
+
+    public void setValue(char[] recordBuffer, int offset, int length) {
+        if (value.length < length) {
+            value = new char[length];
+        }
+        System.arraycopy(recordBuffer, offset, value, 0, length);
+        size = length;
+    }
+
+    private void ensureCapacity(int len) {
+        if (value.length < len) {
+            value = Arrays.copyOf(value, (int) (len * 1.25));
+        }
+    }
+
+    public void append(char[] recordBuffer, int offset, int length) {
+        ensureCapacity(size + length);
+        System.arraycopy(recordBuffer, offset, value, size, length);
+        size += length;
+    }
+
+    @Override
+    public void reset() {
+        size = 0;
+    }
+
+    @Override
+    public String toString() {
+        return String.valueOf(value, 0, size);
+    }
+
+    public void setValue(char[] value) {
+        this.value = value;
+    }
+
+    public void endRecord() {
+        if (value[size - 1] != ExternalDataConstants.LF) {
+            appendChar(ExternalDataConstants.LF);
+        }
+    }
+
+    private void appendChar(char c) {
+        ensureCapacity(size + 1);
+        value[size] = c;
+        size++;
+    }
+
+    @Override
+    public Class<char[]> getRecordClass() {
+        return char[].class;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/GenericRecord.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/GenericRecord.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/GenericRecord.java
new file mode 100644
index 0000000..365bc22
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/GenericRecord.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record;
+
+import org.apache.asterix.external.api.IRawRecord;
+
+public class GenericRecord<T> implements IRawRecord<T> {
+
+    private T record;
+
+    public GenericRecord() {
+    }
+
+    public GenericRecord(T record) {
+        this.record = record;
+    }
+
+    @Override
+    public byte[] getBytes() {
+        return null;
+    }
+
+    @Override
+    public T get() {
+        return record;
+    }
+
+    @Override
+    public int size() {
+        return -1;
+    }
+
+    @Override
+    public Class<?> getRecordClass() {
+        return record.getClass();
+    }
+
+    public void set(T record) {
+        this.record = record;
+    }
+
+    @Override
+    public void reset() {
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractCharRecordLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractCharRecordLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractCharRecordLookupReader.java
new file mode 100644
index 0000000..1b84e7a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractCharRecordLookupReader.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.asterix.external.input.record.CharArrayRecord;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+
+public abstract class AbstractCharRecordLookupReader extends AbstractHDFSLookupRecordReader<char[]> {
+    public AbstractCharRecordLookupReader(ExternalFileIndexAccessor snapshotAccessor, FileSystem fs,
+            Configuration conf) {
+        super(snapshotAccessor, fs, conf);
+    }
+
+    protected CharArrayRecord record = new CharArrayRecord();
+    protected Text value = new Text();
+    protected CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
+    protected ByteBuffer reusableByteBuffer = ByteBuffer.allocateDirect(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+    protected CharBuffer reusableCharBuffer = CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+
+    @Override
+    public Class<?> getRecordClass() throws IOException {
+        return char[].class;
+    }
+
+    @Override
+    protected IRawRecord<char[]> lookup(RecordId rid) throws IOException {
+        record.reset();
+        readRecord(rid);
+        writeRecord();
+        return record;
+    }
+
+    protected abstract void readRecord(RecordId rid) throws IOException;
+
+    private void writeRecord() {
+        reusableByteBuffer.clear();
+        if (reusableByteBuffer.remaining() < value.getLength()) {
+            reusableByteBuffer = ByteBuffer
+                    .allocateDirect(value.getLength() + ExternalDataConstants.DEFAULT_BUFFER_INCREMENT);
+        }
+        reusableByteBuffer.put(value.getBytes(), 0, value.getLength());
+        reusableByteBuffer.flip();
+        while (reusableByteBuffer.hasRemaining()) {
+            decoder.decode(reusableByteBuffer, reusableCharBuffer, false);
+            record.append(reusableCharBuffer.array(), 0, reusableCharBuffer.position());
+            reusableCharBuffer.clear();
+        }
+        record.endRecord();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractHDFSLookupRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractHDFSLookupRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractHDFSLookupRecordReader.java
new file mode 100644
index 0000000..5a20962
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractHDFSLookupRecordReader.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.external.api.ILookupRecordReader;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public abstract class AbstractHDFSLookupRecordReader<T> implements ILookupRecordReader<T> {
+
+    protected int fileId;
+    private ExternalFileIndexAccessor snapshotAccessor;
+    protected ExternalFile file;
+    protected FileSystem fs;
+    protected Configuration conf;
+    protected boolean replaced;
+
+    public AbstractHDFSLookupRecordReader(ExternalFileIndexAccessor snapshotAccessor, FileSystem fs,
+            Configuration conf) {
+        this.snapshotAccessor = snapshotAccessor;
+        this.fs = fs;
+        this.conf = conf;
+        this.fileId = -1;
+        this.file = new ExternalFile();
+    }
+
+    @Override
+    public void configure(Map<String, String> configurations) throws Exception {
+    }
+
+    @Override
+    public IRawRecord<T> read(RecordId rid) throws Exception {
+        if (rid.getFileId() != fileId) {
+            // close current file
+            closeFile();
+            // lookup new file
+            snapshotAccessor.lookup(rid.getFileId(), file);
+            fileId = rid.getFileId();
+            try {
+                validate();
+                if (!replaced) {
+                    openFile();
+                    validate();
+                    if (replaced) {
+                        closeFile();
+                    }
+                }
+            } catch (FileNotFoundException e) {
+                replaced = true;
+            }
+        }
+        if (replaced) {
+            return null;
+        }
+        return lookup(rid);
+    }
+
+    protected abstract IRawRecord<T> lookup(RecordId rid) throws IOException;
+
+    private void validate() throws IllegalArgumentException, IOException {
+        FileStatus fileStatus = fs.getFileStatus(new Path(file.getFileName()));
+        replaced = fileStatus.getModificationTime() != file.getLastModefiedTime().getTime();
+    }
+
+    protected abstract void closeFile();
+
+    protected abstract void openFile() throws IllegalArgumentException, IOException;
+
+    @Override
+    public final void open() throws HyracksDataException {
+        snapshotAccessor.open();
+    }
+
+    @Override
+    public void close() throws IOException {
+        try {
+            closeFile();
+        } finally {
+            snapshotAccessor.close();
+        }
+    }
+
+    @Override
+    public void fail() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReader.java
new file mode 100644
index 0000000..3b59b98
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReader.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IIndexingDatasource;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.CharArrayRecord;
+import org.apache.asterix.external.input.stream.AInputStream;
+import org.apache.asterix.external.input.stream.AInputStreamReader;
+import org.apache.asterix.external.util.ExternalDataConstants;
+
+public abstract class AbstractStreamRecordReader implements IRecordReader<char[]>, IIndexingDatasource {
+    protected AInputStreamReader reader;
+    protected CharArrayRecord record;
+    protected char[] inputBuffer;
+    protected int bufferLength = 0;
+    protected int bufferPosn = 0;
+    protected IExternalIndexer indexer;
+
+    @Override
+    public IRawRecord<char[]> next() throws IOException {
+        return record;
+    }
+
+    @Override
+    public void close() throws IOException {
+        reader.close();
+    }
+
+    public void setInputStream(AInputStream inputStream) throws IOException {
+        this.reader = new AInputStreamReader(inputStream);
+    }
+
+    @Override
+    public Class<char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        record = new CharArrayRecord();
+        inputBuffer = new char[ExternalDataConstants.DEFAULT_BUFFER_SIZE];
+    }
+
+    @Override
+    public IExternalIndexer getIndexer() {
+        return indexer;
+    }
+
+    @Override
+    public void setIndexer(IExternalIndexer indexer) {
+        this.indexer = indexer;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReaderFactory.java
new file mode 100644
index 0000000..c7acb1a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/AbstractStreamRecordReaderFactory.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IIndexibleExternalDataSource;
+import org.apache.asterix.external.api.IIndexingDatasource;
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public abstract class AbstractStreamRecordReaderFactory<T>
+        implements IRecordReaderFactory<T>, IIndexibleExternalDataSource {
+
+    private static final long serialVersionUID = 1L;
+    protected IInputStreamProviderFactory inputStreamFactory;
+    protected Map<String, String> configuration;
+
+    public AbstractStreamRecordReaderFactory<T> setInputStreamFactoryProvider(
+            IInputStreamProviderFactory inputStreamFactory) {
+        this.inputStreamFactory = inputStreamFactory;
+        return this;
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return inputStreamFactory.getPartitionConstraint();
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        inputStreamFactory.configure(configuration);
+        configureStreamReaderFactory(configuration);
+    }
+
+    protected abstract void configureStreamReaderFactory(Map<String, String> configuration) throws Exception;
+
+    @Override
+    public boolean isIndexible() {
+        return inputStreamFactory.isIndexible();
+    }
+
+    @Override
+    public void setSnapshot(List<ExternalFile> files, boolean indexingOp) throws Exception {
+        ((IIndexibleExternalDataSource) inputStreamFactory).setSnapshot(files, indexingOp);
+    }
+
+    @Override
+    public boolean isIndexingOp() {
+        if (inputStreamFactory.isIndexible()) {
+            return ((IIndexibleExternalDataSource) inputStreamFactory).isIndexingOp();
+        }
+        return false;
+    }
+
+    protected IRecordReader<char[]> configureReader(AbstractStreamRecordReader recordReader, IHyracksTaskContext ctx,
+            int partition) throws Exception {
+        IInputStreamProvider inputStreamProvider = inputStreamFactory.createInputStreamProvider(ctx, partition);
+        IExternalIndexer indexer = null;
+        if (inputStreamFactory.isIndexible()) {
+            if (((IIndexibleExternalDataSource) inputStreamFactory).isIndexingOp()) {
+                indexer = ((IIndexingDatasource) inputStreamProvider).getIndexer();
+            }
+        }
+        recordReader.setInputStream(inputStreamProvider.getInputStream());
+        recordReader.setIndexer(indexer);
+        recordReader.configure(configuration);
+        return recordReader;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/EmptyRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/EmptyRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/EmptyRecordReader.java
new file mode 100644
index 0000000..e742b1e
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/EmptyRecordReader.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.RecordReader;
+
+public class EmptyRecordReader<K, V> implements RecordReader<K, V> {
+
+    @Override
+    public boolean next(K key, V value) throws IOException {
+        return false;
+    }
+
+    @Override
+    public K createKey() {
+        return null;
+    }
+
+    @Override
+    public V createValue() {
+        return null;
+    }
+
+    @Override
+    public long getPos() throws IOException {
+        return 0;
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public float getProgress() throws IOException {
+        return 0;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSRecordReader.java
new file mode 100644
index 0000000..d88f967
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSRecordReader.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IIndexingDatasource;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class HDFSRecordReader<K, V extends Writable> implements IRecordReader<Writable>, IIndexingDatasource {
+
+    protected RecordReader<K, Writable> reader;
+    protected V value = null;
+    protected K key = null;
+    protected int currentSplitIndex = 0;
+    protected boolean read[];
+    protected InputFormat<?, ?> inputFormat;
+    protected InputSplit[] inputSplits;
+    protected String[] readSchedule;
+    protected String nodeName;
+    protected JobConf conf;
+    protected GenericRecord<Writable> record;
+    // Indexing variables
+    protected IExternalIndexer indexer;
+    protected List<ExternalFile> snapshot;
+    protected FileSystem hdfs;
+
+    public HDFSRecordReader(boolean read[], InputSplit[] inputSplits, String[] readSchedule, String nodeName,
+            JobConf conf) {
+        this.read = read;
+        this.inputSplits = inputSplits;
+        this.readSchedule = readSchedule;
+        this.nodeName = nodeName;
+        this.conf = conf;
+        this.inputFormat = conf.getInputFormat();
+        this.reader = new EmptyRecordReader<K, Writable>();
+    }
+
+    @Override
+    public void close() throws IOException {
+        reader.close();
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        record = new GenericRecord<Writable>();
+        nextInputSplit();
+    }
+
+    @Override
+    public boolean hasNext() throws Exception {
+        if (reader.next(key, value)) {
+            return true;
+        }
+        while (nextInputSplit()) {
+            if (reader.next(key, value)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public IRawRecord<Writable> next() throws IOException {
+        record.set(value);
+        return record;
+    }
+
+    @Override
+    public Class<? extends Writable> getRecordClass() throws IOException {
+        if (value == null) {
+            if (!nextInputSplit()) {
+                return null;
+            }
+        }
+        return value.getClass();
+    }
+
+    private boolean nextInputSplit() throws IOException {
+        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+            /**
+             * read all the partitions scheduled to the current node
+             */
+            if (readSchedule[currentSplitIndex].equals(nodeName)) {
+                /**
+                 * pick an unread split to read synchronize among
+                 * simultaneous partitions in the same machine
+                 */
+                synchronized (read) {
+                    if (read[currentSplitIndex] == false) {
+                        read[currentSplitIndex] = true;
+                    } else {
+                        continue;
+                    }
+                }
+                if (snapshot != null) {
+                    String fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
+                    FileStatus fileStatus = hdfs.getFileStatus(new Path(fileName));
+                    // Skip if not the same file stored in the files snapshot
+                    if (fileStatus.getModificationTime() != snapshot.get(currentSplitIndex).getLastModefiedTime()
+                            .getTime())
+                        continue;
+                }
+
+                reader.close();
+                reader = getRecordReader(currentSplitIndex);
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @SuppressWarnings("unchecked")
+    private RecordReader<K, Writable> getRecordReader(int splitIndex) throws IOException {
+        reader = (RecordReader<K, Writable>) inputFormat.getRecordReader(inputSplits[splitIndex], conf, Reporter.NULL);
+        if (key == null) {
+            key = reader.createKey();
+            value = (V) reader.createValue();
+        }
+        if (indexer != null) {
+            try {
+                indexer.reset(this);
+            } catch (Exception e) {
+                throw new HyracksDataException(e);
+            }
+        }
+        return reader;
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+    @Override
+    public IExternalIndexer getIndexer() {
+        return indexer;
+    }
+
+    @Override
+    public void setIndexer(IExternalIndexer indexer) {
+        this.indexer = indexer;
+    }
+
+    public List<ExternalFile> getSnapshot() {
+        return snapshot;
+    }
+
+    public void setSnapshot(List<ExternalFile> snapshot) throws IOException {
+        this.snapshot = snapshot;
+        hdfs = FileSystem.get(conf);
+    }
+
+    public int getCurrentSplitIndex() {
+        return currentSplitIndex;
+    }
+
+    public RecordReader<K, Writable> getReader() {
+        return reader;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSTextLineReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSTextLineReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSTextLineReader.java
new file mode 100644
index 0000000..9466a96
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/HDFSTextLineReader.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
+
+public class HDFSTextLineReader {
+    private static final int DEFAULT_BUFFER_SIZE = 32 * 1024;
+    private int bufferSize = DEFAULT_BUFFER_SIZE;
+    private FSDataInputStream reader;
+
+    private byte[] buffer;
+    // the number of bytes of real data in the buffer
+    private int bufferLength = 0;
+    // the current position in the buffer
+    private int bufferPosn = 0;
+
+    private long currentFilePos = 0L;
+
+    private static final byte CR = '\r';
+    private static final byte LF = '\n';
+
+    public static final String KEY_BUFFER_SIZE = "io.file.buffer.size";
+
+    /**
+     * Create a line reader that reads from the given stream using the
+     * default buffer-size (32k).
+     * 
+     * @param in
+     *            The input stream
+     * @throws IOException
+     */
+    public HDFSTextLineReader(FSDataInputStream in) throws IOException {
+        this(in, DEFAULT_BUFFER_SIZE);
+    }
+
+    /**
+     * Create a line reader that reads from the given stream using the
+     * given buffer-size.
+     * 
+     * @param in
+     *            The input stream
+     * @param bufferSize
+     *            Size of the read buffer
+     * @throws IOException
+     */
+    public HDFSTextLineReader(FSDataInputStream in, int bufferSize) throws IOException {
+        this.reader = in;
+        this.bufferSize = bufferSize;
+        this.buffer = new byte[this.bufferSize];
+        currentFilePos = in.getPos();
+    }
+
+    public HDFSTextLineReader() throws IOException {
+        this.bufferSize = DEFAULT_BUFFER_SIZE;
+        this.buffer = new byte[this.bufferSize];
+    }
+
+    /**
+     * Create a line reader that reads from the given stream using the <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
+     * 
+     * @param in
+     *            input stream
+     * @param conf
+     *            configuration
+     * @throws IOException
+     */
+    public HDFSTextLineReader(FSDataInputStream in, Configuration conf) throws IOException {
+        this(in, conf.getInt(KEY_BUFFER_SIZE, DEFAULT_BUFFER_SIZE));
+    }
+
+    /**
+     * Read one line from the InputStream into the given Text. A line
+     * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
+     * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated
+     * line.
+     *
+     * @param str
+     *            the object to store the given line (without newline)
+     * @param maxLineLength
+     *            the maximum number of bytes to store into str;
+     *            the rest of the line is silently discarded.
+     * @param maxBytesToConsume
+     *            the maximum number of bytes to consume
+     *            in this call. This is only a hint, because if the line cross
+     *            this threshold, we allow it to happen. It can overshoot
+     *            potentially by as much as one buffer length.
+     * @return the number of bytes read including the (longest) newline
+     *         found.
+     * @throws IOException
+     *             if the underlying stream throws
+     */
+    public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
+        /* We're reading data from in, but the head of the stream may be
+         * already buffered in buffer, so we have several cases:
+         * 1. No newline characters are in the buffer, so we need to copy
+         *    everything and read another buffer from the stream.
+         * 2. An unambiguously terminated line is in buffer, so we just
+         *    copy to str.
+         * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
+         *    in CR.  In this case we copy everything up to CR to str, but
+         *    we also need to see what follows CR: if it's LF, then we
+         *    need consume LF as well, so next call to readLine will read
+         *    from after that.
+         * We use a flag prevCharCR to signal if previous character was CR
+         * and, if it happens to be at the end of the buffer, delay
+         * consuming it until we have a chance to look at the char that
+         * follows.
+         */
+        str.clear();
+        int txtLength = 0; //tracks str.getLength(), as an optimization
+        int newlineLength = 0; //length of terminating newline
+        boolean prevCharCR = false; //true of prev char was CR
+        long bytesConsumed = 0;
+        do {
+            int startPosn = bufferPosn; //starting from where we left off the last time
+            if (bufferPosn >= bufferLength) {
+                startPosn = bufferPosn = 0;
+                if (prevCharCR)
+                    ++bytesConsumed; //account for CR from previous read
+                bufferLength = reader.read(buffer);
+                if (bufferLength <= 0)
+                    break; // EOF
+            }
+            for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+                if (buffer[bufferPosn] == LF) {
+                    newlineLength = (prevCharCR) ? 2 : 1;
+                    ++bufferPosn; // at next invocation proceed from following byte
+                    break;
+                }
+                if (prevCharCR) { //CR + notLF, we are at notLF
+                    newlineLength = 1;
+                    break;
+                }
+                prevCharCR = (buffer[bufferPosn] == CR);
+            }
+            int readLength = bufferPosn - startPosn;
+            if (prevCharCR && newlineLength == 0)
+                --readLength; //CR at the end of the buffer
+            bytesConsumed += readLength;
+            int appendLength = readLength - newlineLength;
+            if (appendLength > maxLineLength - txtLength) {
+                appendLength = maxLineLength - txtLength;
+            }
+            if (appendLength > 0) {
+                str.append(buffer, startPosn, appendLength);
+                txtLength += appendLength;
+            }
+        } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
+
+        if (bytesConsumed > Integer.MAX_VALUE)
+            throw new IOException("Too many bytes before newline: " + bytesConsumed);
+        currentFilePos = reader.getPos() - bufferLength + bufferPosn;
+        return (int) bytesConsumed;
+    }
+
+    /**
+     * Read from the InputStream into the given Text.
+     * 
+     * @param str
+     *            the object to store the given line
+     * @param maxLineLength
+     *            the maximum number of bytes to store into str.
+     * @return the number of bytes read including the newline
+     * @throws IOException
+     *             if the underlying stream throws
+     */
+    public int readLine(Text str, int maxLineLength) throws IOException {
+        return readLine(str, maxLineLength, Integer.MAX_VALUE);
+    }
+
+    /**
+     * Read from the InputStream into the given Text.
+     * 
+     * @param str
+     *            the object to store the given line
+     * @return the number of bytes read including the newline
+     * @throws IOException
+     *             if the underlying stream throws
+     */
+    public int readLine(Text str) throws IOException {
+        return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
+    }
+
+    public void seek(long desired) throws IOException {
+        if (reader.getPos() <= desired || currentFilePos > desired) {
+            // desired position is ahead of stream or before the current position, seek to position
+            reader.seek(desired);
+            bufferLength = 0;
+            bufferPosn = 0;
+            currentFilePos = desired;
+        } else if (currentFilePos < desired) {
+            // desired position is in the buffer
+            int difference = (int) (desired - currentFilePos);
+            bufferPosn += difference;
+            currentFilePos = desired;
+        }
+    }
+
+    public FSDataInputStream getReader() {
+        return reader;
+    }
+
+    public void resetReader(FSDataInputStream reader) throws IOException {
+        this.reader = reader;
+        bufferLength = 0;
+        bufferPosn = 0;
+        currentFilePos = reader.getPos();
+    }
+
+    public void close() throws IOException {
+        reader.close();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LineRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LineRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LineRecordReader.java
new file mode 100644
index 0000000..9b11df6
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LineRecordReader.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+
+public class LineRecordReader extends AbstractStreamRecordReader {
+
+    protected boolean prevCharCR;
+    protected int newlineLength;
+    protected int recordNumber = 0;
+
+    @Override
+    public boolean hasNext() throws IOException {
+        /* We're reading data from in, but the head of the stream may be
+         * already buffered in buffer, so we have several cases:
+         * 1. No newline characters are in the buffer, so we need to copy
+         *    everything and read another buffer from the stream.
+         * 2. An unambiguously terminated line is in buffer, so we just
+         *    copy to record.
+         * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
+         *    in CR.  In this case we copy everything up to CR to record, but
+         *    we also need to see what follows CR: if it's LF, then we
+         *    need consume LF as well, so next call to readLine will read
+         *    from after that.
+         * We use a flag prevCharCR to signal if previous character was CR
+         * and, if it happens to be at the end of the buffer, delay
+         * consuming it until we have a chance to look at the char that
+         * follows.
+         */
+        newlineLength = 0; //length of terminating newline
+        prevCharCR = false; //true of prev char was CR
+        record.reset();
+        int readLength = 0;
+        do {
+            int startPosn = bufferPosn; //starting from where we left off the last time
+            if (bufferPosn >= bufferLength) {
+                startPosn = bufferPosn = 0;
+                bufferLength = reader.read(inputBuffer);
+                if (bufferLength <= 0) {
+                    if (readLength > 0) {
+                        record.endRecord();
+                        recordNumber++;
+                        return true;
+                    }
+                    reader.close();
+                    return false; //EOF
+                }
+            }
+            for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+                if (inputBuffer[bufferPosn] == ExternalDataConstants.LF) {
+                    newlineLength = (prevCharCR) ? 2 : 1;
+                    ++bufferPosn; // at next invocation proceed from following byte
+                    break;
+                }
+                if (prevCharCR) { //CR + notLF, we are at notLF
+                    newlineLength = 1;
+                    break;
+                }
+                prevCharCR = (inputBuffer[bufferPosn] == ExternalDataConstants.CR);
+            }
+            readLength = bufferPosn - startPosn;
+            if (prevCharCR && newlineLength == 0) {
+                --readLength; //CR at the end of the buffer
+            }
+            if (readLength > 0) {
+                record.append(inputBuffer, startPosn, readLength);
+            }
+        } while (newlineLength == 0);
+        recordNumber++;
+        return true;
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        super.configure(configuration);
+        if (ExternalDataUtils.hasHeader(configuration)) {
+            if (hasNext()) {
+                next();
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LookupReaderFactoryProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LookupReaderFactoryProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LookupReaderFactoryProvider.java
new file mode 100644
index 0000000..3a82a68
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/LookupReaderFactoryProvider.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.ILookupReaderFactory;
+import org.apache.asterix.external.input.record.reader.factory.HDFSLookupReaderFactory;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.HDFSUtils;
+
+public class LookupReaderFactoryProvider {
+
+    @SuppressWarnings("rawtypes")
+    public static ILookupReaderFactory getLookupReaderFactory(Map<String, String> configuration) throws Exception {
+        String inputFormat = HDFSUtils.getInputFormatClassName(configuration);
+        if (inputFormat.equals(ExternalDataConstants.CLASS_NAME_TEXT_INPUT_FORMAT)
+                || inputFormat.equals(ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT)
+                || inputFormat.equals(ExternalDataConstants.CLASS_NAME_RC_INPUT_FORMAT)) {
+            HDFSLookupReaderFactory<Object> readerFactory = new HDFSLookupReaderFactory<Object>();
+            readerFactory.configure(configuration);
+            return readerFactory;
+        } else {
+            throw new AsterixException("Unrecognized external format");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/QuotedLineRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/QuotedLineRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/QuotedLineRecordReader.java
new file mode 100644
index 0000000..668876e
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/QuotedLineRecordReader.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataExceptionUtils;
+
+public class QuotedLineRecordReader extends LineRecordReader {
+
+    private char quote;
+    private boolean prevCharEscape;
+    private boolean inQuote;
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        super.configure(configuration);
+        String quoteString = configuration.get(ExternalDataConstants.KEY_QUOTE);
+        if (quoteString == null || quoteString.length() != 1) {
+            throw new AsterixException(ExternalDataExceptionUtils.incorrectParameterMessage(
+                    ExternalDataConstants.KEY_QUOTE, ExternalDataConstants.PARAMETER_OF_SIZE_ONE, quoteString));
+        }
+        this.quote = quoteString.charAt(0);
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+        newlineLength = 0;
+        prevCharCR = false;
+        prevCharEscape = false;
+        record.reset();
+        int readLength = 0;
+        inQuote = false;
+        do {
+            int startPosn = bufferPosn;
+            if (bufferPosn >= bufferLength) {
+                startPosn = bufferPosn = 0;
+                bufferLength = reader.read(inputBuffer);
+                if (bufferLength <= 0) {
+                    {
+                        if (readLength > 0) {
+                            if (inQuote) {
+                                throw new IOException("malformed input record ended inside quote");
+                            }
+                            record.endRecord();
+                            recordNumber++;
+                            return true;
+                        }
+                        return false;
+                    }
+                }
+            }
+            for (; bufferPosn < bufferLength; ++bufferPosn) {
+                if (!inQuote) {
+                    if (inputBuffer[bufferPosn] == ExternalDataConstants.LF) {
+                        newlineLength = (prevCharCR) ? 2 : 1;
+                        ++bufferPosn;
+                        break;
+                    }
+                    if (prevCharCR) {
+                        newlineLength = 1;
+                        break;
+                    }
+                    prevCharCR = (inputBuffer[bufferPosn] == ExternalDataConstants.CR);
+                    if (inputBuffer[bufferPosn] == quote) {
+                        if (!prevCharEscape) {
+                            inQuote = true;
+                        }
+                    }
+                    if (prevCharEscape) {
+                        prevCharEscape = false;
+                    } else {
+                        prevCharEscape = inputBuffer[bufferPosn] == ExternalDataConstants.ESCAPE;
+                    }
+                } else {
+                    // only look for next quote
+                    if (inputBuffer[bufferPosn] == quote) {
+                        if (!prevCharEscape) {
+                            inQuote = false;
+                        }
+                    }
+                    prevCharEscape = inputBuffer[bufferPosn] == ExternalDataConstants.ESCAPE;
+                }
+            }
+            readLength = bufferPosn - startPosn;
+            if (prevCharCR && newlineLength == 0) {
+                --readLength;
+            }
+            if (readLength > 0) {
+                record.append(inputBuffer, startPosn, readLength);
+            }
+        } while (newlineLength == 0);
+        recordNumber++;
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RCLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RCLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RCLookupReader.java
new file mode 100644
index 0000000..5c33502
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RCLookupReader.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.RCFile.Reader;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.log4j.Logger;
+
+public class RCLookupReader extends AbstractHDFSLookupRecordReader<BytesRefArrayWritable> {
+    public RCLookupReader(ExternalFileIndexAccessor snapshotAccessor, FileSystem fs, Configuration conf) {
+        super(snapshotAccessor, fs, conf);
+    }
+
+    private static final Logger LOGGER = Logger.getLogger(RCLookupReader.class.getName());
+    private Reader reader;
+    private LongWritable key = new LongWritable();
+    private BytesRefArrayWritable value = new BytesRefArrayWritable();
+    private GenericRecord<BytesRefArrayWritable> record = new GenericRecord<BytesRefArrayWritable>();
+    private long offset;
+    private int row;
+
+    @Override
+    public Class<?> getRecordClass() throws IOException {
+        return Writable.class;
+    }
+
+    @Override
+    protected IRawRecord<BytesRefArrayWritable> lookup(RecordId rid) throws IOException {
+        if (rid.getOffset() != offset) {
+            offset = rid.getOffset();
+            if (reader.getPosition() != offset)
+                reader.seek(offset);
+            reader.resetBuffer();
+            row = -1;
+        }
+
+        // skip rows to the record row
+        while (row < rid.getRow()) {
+            reader.next(key);
+            reader.getCurrentRow(value);
+            row++;
+        }
+        record.set(value);
+        return record;
+    }
+
+    @Override
+    protected void closeFile() {
+        if (reader == null) {
+            return;
+        }
+        try {
+            reader.close();
+        } catch (Exception e) {
+            LOGGER.warn("Error closing HDFS file", e);
+        }
+    }
+
+    @Override
+    protected void openFile() throws IllegalArgumentException, IOException {
+        reader = new Reader(fs, new Path(file.getFileName()), conf);
+        offset = -1;
+        row = -1;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RSSRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RSSRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RSSRecordReader.java
new file mode 100644
index 0000000..1c2dc30
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/RSSRecordReader.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.log4j.Logger;
+
+import com.sun.syndication.feed.synd.SyndEntryImpl;
+import com.sun.syndication.feed.synd.SyndFeed;
+import com.sun.syndication.fetcher.FeedFetcher;
+import com.sun.syndication.fetcher.FetcherEvent;
+import com.sun.syndication.fetcher.FetcherException;
+import com.sun.syndication.fetcher.FetcherListener;
+import com.sun.syndication.fetcher.impl.FeedFetcherCache;
+import com.sun.syndication.fetcher.impl.HashMapFeedInfoCache;
+import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
+import com.sun.syndication.io.FeedException;
+
+public class RSSRecordReader implements IRecordReader<SyndEntryImpl> {
+
+    private static final Logger LOGGER = Logger.getLogger(RSSRecordReader.class.getName());
+    private boolean modified = false;
+    private Queue<SyndEntryImpl> rssFeedBuffer = new LinkedList<SyndEntryImpl>();
+    private FeedFetcherCache feedInfoCache;
+    private FeedFetcher fetcher;
+    private FetcherEventListenerImpl listener;
+    private URL feedUrl;
+    private GenericRecord<SyndEntryImpl> record = new GenericRecord<SyndEntryImpl>();
+    private boolean done = false;
+
+    public RSSRecordReader(String url) throws MalformedURLException {
+        feedUrl = new URL(url);
+        feedInfoCache = HashMapFeedInfoCache.getInstance();
+        fetcher = new HttpURLFeedFetcher(feedInfoCache);
+        listener = new FetcherEventListenerImpl(this, LOGGER);
+        fetcher.addFetcherEventListener(listener);
+    }
+
+    public boolean isModified() {
+        return modified;
+    }
+
+    @Override
+    public void close() throws IOException {
+        fetcher.removeFetcherEventListener(listener);
+    }
+
+    @Override
+    public void configure(Map<String, String> configurations) throws Exception {
+    }
+
+    @Override
+    public boolean hasNext() throws Exception {
+        return !done;
+    }
+
+    @Override
+    public IRawRecord<SyndEntryImpl> next() throws IOException {
+        if (done) {
+            return null;
+        }
+        try {
+            SyndEntryImpl feedEntry;
+            feedEntry = getNextRSSFeed();
+            if (feedEntry == null) {
+                return null;
+            }
+            record.set(feedEntry);
+            return record;
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+    }
+
+    @Override
+    public Class<SyndEntryImpl> getRecordClass() throws IOException {
+        return SyndEntryImpl.class;
+    }
+
+    @Override
+    public boolean stop() {
+        done = true;
+        return true;
+    }
+
+    public void setModified(boolean modified) {
+        this.modified = modified;
+    }
+
+    private SyndEntryImpl getNextRSSFeed() throws Exception {
+        if (rssFeedBuffer.isEmpty()) {
+            fetchFeed();
+        }
+        if (rssFeedBuffer.isEmpty()) {
+            return null;
+        } else {
+            return rssFeedBuffer.remove();
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    private void fetchFeed() throws IllegalArgumentException, IOException, FeedException, FetcherException {
+        // Retrieve the feed.
+        // We will get a Feed Polled Event and then a
+        // Feed Retrieved event (assuming the feed is valid)
+        SyndFeed feed = fetcher.retrieveFeed(feedUrl);
+        if (modified) {
+            if (LOGGER.isInfoEnabled()) {
+                LOGGER.info(feedUrl + " retrieved");
+                LOGGER.info(feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size()
+                        + " entries.");
+            }
+            List<? extends SyndEntryImpl> fetchedFeeds = feed.getEntries();
+            rssFeedBuffer.addAll(fetchedFeeds);
+        }
+    }
+}
+
+class FetcherEventListenerImpl implements FetcherListener {
+
+    private RSSRecordReader reader;
+    private Logger LOGGER;
+
+    public FetcherEventListenerImpl(RSSRecordReader reader, Logger LOGGER) {
+        this.reader = reader;
+        this.LOGGER = LOGGER;
+    }
+
+    /**
+     * @see com.sun.syndication.fetcher.FetcherListener#fetcherEvent(com.sun.syndication.fetcher.FetcherEvent)
+     */
+    @Override
+    public void fetcherEvent(FetcherEvent event) {
+        String eventType = event.getEventType();
+        if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) {
+            if (LOGGER.isInfoEnabled()) {
+                LOGGER.info("\tEVENT: Feed Polled. URL = " + event.getUrlString());
+            }
+        } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) {
+            if (LOGGER.isInfoEnabled()) {
+                LOGGER.info("\tEVENT: Feed Retrieved. URL = " + event.getUrlString());
+            }
+            (reader).setModified(true);
+        } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) {
+            if (LOGGER.isInfoEnabled()) {
+                LOGGER.info("\tEVENT: Feed Unchanged. URL = " + event.getUrlString());
+            }
+            (reader).setModified(true);
+        }
+    }
+}



[19/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedAzureTwitterAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedAzureTwitterAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedAzureTwitterAdapterFactory.java
deleted file mode 100644
index fbde1b4..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedAzureTwitterAdapterFactory.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.FeedPolicyAccessor;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.dataset.adapter.PullBasedAzureTwitterAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public class PullBasedAzureTwitterAdapterFactory implements IFeedAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final String INGESTOR_LOCATIONS_KEY = "ingestor-locations";
-    private static final String PARTITIONS_KEY = "partitions";
-    private static final String TABLE_NAME_KEY = "table-name";
-    private static final String ACCOUNT_NAME_KEY = "account-name";
-    private static final String ACCOUNT_KEY_KEY = "account-key";
-
-    private ARecordType outputType;
-    private Map<String, String> configuration;
-    private String tableName;
-    private String azureAccountName;
-    private String azureAccountKey;
-    private String[] locations;
-    private String[] partitions;
-    private FeedPolicyAccessor ingestionPolicy;
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public String getName() {
-        return "azure_twitter";
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        String locationsStr = configuration.get(INGESTOR_LOCATIONS_KEY);
-        if (locationsStr == null) {
-            return null;
-        }
-        String[] locations = locationsStr.split(",");
-        return new AlgebricksAbsolutePartitionConstraint(locations);
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        return new PullBasedAzureTwitterAdapter(azureAccountName, azureAccountKey, tableName, partitions, configuration,
-                ctx, outputType);
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        this.outputType = outputType;
-
-        tableName = configuration.get(TABLE_NAME_KEY);
-        if (tableName == null) {
-            throw new AsterixException("You must specify a valid table name");
-        }
-        azureAccountName = configuration.get(ACCOUNT_NAME_KEY);
-        azureAccountKey = configuration.get(ACCOUNT_KEY_KEY);
-        if (azureAccountName == null || azureAccountKey == null) {
-            throw new AsterixException("You must specify a valid Azure account name and key");
-        }
-
-        int nIngestLocations = 1;
-        String locationsStr = configuration.get(INGESTOR_LOCATIONS_KEY);
-        if (locationsStr != null) {
-            locations = locationsStr.split(",");
-            nIngestLocations = locations.length;
-        }
-
-        int nPartitions = 1;
-        String partitionsStr = configuration.get(PARTITIONS_KEY);
-        if (partitionsStr != null) {
-            partitions = partitionsStr.split(",");
-            nPartitions = partitions.length;
-        }
-
-        if (nIngestLocations != nPartitions) {
-            throw new AsterixException("Invalid adapter configuration: number of ingestion-locations ("
-                    + nIngestLocations + ") must be the same as the number of partitions (" + nPartitions + ")");
-        }
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-    public FeedPolicyAccessor getIngestionPolicy() {
-        return ingestionPolicy;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedTwitterAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedTwitterAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedTwitterAdapterFactory.java
deleted file mode 100644
index 7d2dd73..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PullBasedTwitterAdapterFactory.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.dataset.adapter.PullBasedTwitterAdapter;
-import org.apache.asterix.external.util.TwitterUtil;
-import org.apache.asterix.external.util.TwitterUtil.SearchAPIConstants;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Factory class for creating an instance of PullBasedTwitterAdapter.
- * This adapter provides the functionality of fetching tweets from Twitter service
- * via pull-based Twitter API.
- */
-public class PullBasedTwitterAdapterFactory implements IFeedAdapterFactory {
-    private static final long serialVersionUID = 1L;
-    private static final Logger LOGGER = Logger.getLogger(PullBasedTwitterAdapterFactory.class.getName());
-
-    public static final String PULL_BASED_TWITTER_ADAPTER_NAME = "pull_twitter";
-
-    private static final String DEFAULT_INTERVAL = "10"; // 10 seconds
-    private static final int INTAKE_CARDINALITY = 1; // degree of parallelism at intake stage
-
-    private ARecordType outputType;
-
-    private Map<String, String> configuration;
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        return new PullBasedTwitterAdapter(configuration, outputType, ctx);
-    }
-
-    @Override
-    public String getName() {
-        return PULL_BASED_TWITTER_ADAPTER_NAME;
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.outputType = outputType;
-        this.configuration = configuration;
-        TwitterUtil.initializeConfigurationWithAuthInfo(configuration);
-
-        if (configuration.get(SearchAPIConstants.QUERY) == null) {
-            throw new AsterixException(
-                    "parameter " + SearchAPIConstants.QUERY + " not specified as part of adaptor configuration");
-        }
-
-        String interval = configuration.get(SearchAPIConstants.INTERVAL);
-        if (interval != null) {
-            try {
-                Integer.parseInt(interval);
-            } catch (NumberFormatException nfe) {
-                throw new IllegalArgumentException(
-                        "parameter " + SearchAPIConstants.INTERVAL + " is defined incorrectly, expecting a number");
-            }
-        } else {
-            configuration.put(SearchAPIConstants.INTERVAL, DEFAULT_INTERVAL);
-            if (LOGGER.isLoggable(Level.WARNING)) {
-                LOGGER.warning(" Parameter " + SearchAPIConstants.INTERVAL + " not defined, using default ("
-                        + DEFAULT_INTERVAL + ")");
-            }
-        }
-
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return new AlgebricksCountPartitionConstraint(INTAKE_CARDINALITY);
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PushBasedTwitterAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PushBasedTwitterAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PushBasedTwitterAdapterFactory.java
deleted file mode 100644
index 5bfdbcf..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/PushBasedTwitterAdapterFactory.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.dataset.adapter.PushBasedTwitterAdapter;
-import org.apache.asterix.external.util.TwitterUtil;
-import org.apache.asterix.external.util.TwitterUtil.AuthenticationConstants;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public class PushBasedTwitterAdapterFactory implements IFeedAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final String NAME = "push_twitter";
-
-    private ARecordType outputType;
-
-    private Map<String, String> configuration;
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public String getName() {
-        return NAME;
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return new AlgebricksCountPartitionConstraint(1);
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        PushBasedTwitterAdapter twitterAdapter = new PushBasedTwitterAdapter(configuration, outputType, ctx);
-        return twitterAdapter;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.outputType = outputType;
-        this.configuration = configuration;
-        TwitterUtil.initializeConfigurationWithAuthInfo(configuration);
-        boolean requiredParamsSpecified = validateConfiguration(configuration);
-        if (!requiredParamsSpecified) {
-            StringBuilder builder = new StringBuilder();
-            builder.append("One or more parameters are missing from adapter configuration\n");
-            builder.append(AuthenticationConstants.OAUTH_CONSUMER_KEY + "\n");
-            builder.append(AuthenticationConstants.OAUTH_CONSUMER_SECRET + "\n");
-            builder.append(AuthenticationConstants.OAUTH_ACCESS_TOKEN + "\n");
-            builder.append(AuthenticationConstants.OAUTH_ACCESS_TOKEN_SECRET + "\n");
-            throw new Exception(builder.toString());
-        }
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-    private boolean validateConfiguration(Map<String, String> configuration) {
-        String consumerKey = configuration.get(AuthenticationConstants.OAUTH_CONSUMER_KEY);
-        String consumerSecret = configuration.get(AuthenticationConstants.OAUTH_CONSUMER_SECRET);
-        String accessToken = configuration.get(AuthenticationConstants.OAUTH_ACCESS_TOKEN);
-        String tokenSecret = configuration.get(AuthenticationConstants.OAUTH_ACCESS_TOKEN_SECRET);
-
-        if (consumerKey == null || consumerSecret == null || accessToken == null || tokenSecret == null) {
-            return false;
-        }
-        return true;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/RSSFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/RSSFeedAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/RSSFeedAdapterFactory.java
deleted file mode 100644
index 4d893fe..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/RSSFeedAdapterFactory.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.FeedPolicyAccessor;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.dataset.adapter.RSSFeedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Factory class for creating an instance of @see {RSSFeedAdapter}.
- * RSSFeedAdapter provides the functionality of fetching an RSS based feed.
- */
-public class RSSFeedAdapterFactory implements IFeedAdapterFactory {
-    private static final long serialVersionUID = 1L;
-    public static final String RSS_FEED_ADAPTER_NAME = "rss_feed";
-
-    public static final String KEY_RSS_URL = "url";
-    public static final String KEY_INTERVAL = "interval";
-
-    private Map<String, String> configuration;
-    private ARecordType outputType;
-    private List<String> feedURLs = new ArrayList<String>();
-    private FeedPolicyAccessor ingestionPolicy;
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        RSSFeedAdapter rssFeedAdapter = new RSSFeedAdapter(configuration, outputType, ctx);
-        return rssFeedAdapter;
-    }
-
-    @Override
-    public String getName() {
-        return "rss_feed";
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        this.outputType = outputType;
-        String rssURLProperty = configuration.get(KEY_RSS_URL);
-        if (rssURLProperty == null) {
-            throw new IllegalArgumentException("no rss url provided");
-        }
-        initializeFeedURLs(rssURLProperty);
-        configurePartitionConstraints();
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return new AlgebricksCountPartitionConstraint(feedURLs.size());
-    }
-
-    private void initializeFeedURLs(String rssURLProperty) {
-        feedURLs.clear();
-        String[] feedURLProperty = rssURLProperty.split(",");
-        for (String feedURL : feedURLProperty) {
-            feedURLs.add(feedURL);
-        }
-    }
-
-    protected void configurePartitionConstraints() {
-
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-    public FeedPolicyAccessor getIngestionPolicy() {
-        return ingestionPolicy;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/StreamBasedAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
deleted file mode 100644
index c7e582f..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.Map;
-import java.util.logging.Logger;
-
-import org.apache.asterix.external.util.INodeResolver;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public abstract class StreamBasedAdapterFactory implements IAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-    protected static final Logger LOGGER = Logger.getLogger(StreamBasedAdapterFactory.class.getName());
-
-    protected static INodeResolver nodeResolver;
-
-    protected Map<String, String> configuration;
-    protected ITupleParserFactory parserFactory;
-
-    public abstract InputDataFormat getInputDataFormat();
-
-    protected void configureFormat(IAType sourceDatatype) throws Exception {
-        parserFactory = new AsterixTupleParserFactory(configuration, (ARecordType) sourceDatatype,
-                getInputDataFormat());
-
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IAdapterFactory.java
new file mode 100644
index 0000000..9539278
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IAdapterFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+/**
+ * Base interface for IGenericDatasetAdapterFactory and ITypedDatasetAdapterFactory.
+ * Acts as a marker interface indicating that the implementation provides functionality
+ * for creating an adapter.
+ */
+public interface IAdapterFactory extends Serializable {
+
+    /**
+     * Returns the display name corresponding to the Adapter type that is created by the factory.
+     * 
+     * @return the display name
+     */
+    public String getAlias();
+
+    /**
+     * Gets a list of partition constraints. A partition constraint can be a
+     * requirement to execute at a particular location or could be cardinality
+     * constraints indicating the number of instances that need to run in
+     * parallel. example, a IDatasourceAdapter implementation written for data
+     * residing on the local file system of a node cannot run on any other node
+     * and thus has a location partition constraint. The location partition
+     * constraint can be expressed as a node IP address or a node controller id.
+     * In the former case, the IP address is translated to a node controller id
+     * running on the node with the given IP address.
+     */
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception;
+
+    /**
+     * Creates an instance of IDatasourceAdapter.
+     * 
+     * @param HyracksTaskContext
+     * @param partition
+     * @return An instance of IDatasourceAdapter.
+     * @throws Exception
+     */
+    public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception;
+
+    /**
+     * @param configuration
+     * @param outputType
+     * @throws Exception
+     */
+    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception;
+
+    /**
+     * Gets the record type associated with the output of the adapter
+     * 
+     * @return
+     */
+    public ARecordType getAdapterOutputType();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
new file mode 100644
index 0000000..f5f47ec
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IDataFlowController {
+
+    /**
+     * Order of calls:
+     * 1. Constructor()
+     * 2. if record flow controller
+     * |-a. Set record reader
+     * |-b. Set record parser
+     * else
+     * |-a. Set stream parser
+     * 3. setTupleForwarder(forwarder)
+     * 4. configure(configuration,ctx)
+     * 5. start(writer)
+     */
+
+    public void start(IFrameWriter writer) throws HyracksDataException;
+
+    public boolean stop();
+
+    public boolean handleException(Throwable th);
+
+    public ITupleForwarder getTupleForwarder();
+
+    public void setTupleForwarder(ITupleForwarder forwarder);
+
+    public void configure(Map<String, String> configuration, IHyracksTaskContext ctx) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParser.java
new file mode 100644
index 0000000..a4a5a43
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParser.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.builders.IARecordBuilder;
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.builders.UnorderedListBuilder;
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import org.apache.asterix.om.base.AMutableOrderedList;
+import org.apache.asterix.om.base.AMutableRecord;
+import org.apache.asterix.om.base.AMutableUnorderedList;
+import org.apache.asterix.om.base.IACursor;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.AUnorderedListType;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+
+public interface IDataParser {
+
+    /**
+     * @return The supported data sources
+     */
+    public DataSourceType getDataSourceType();
+
+    /**
+     * @param configuration
+     *            a set of configurations that comes from two sources.
+     *            1. The create adapter statement.
+     *            2. The query compiler.
+     * @param recordType
+     *            The expected record type
+     * @throws HyracksDataException
+     * @throws IOException
+     */
+    public void configure(Map<String, String> configuration, ARecordType recordType)
+            throws HyracksDataException, IOException;
+
+    /*
+     * The following two static methods are expensive. right now, they are used by RSSFeeds and Twitter feed
+     * TODO: Get rid of them
+     */
+    public static void writeRecord(AMutableRecord record, DataOutput dataOutput, IARecordBuilder recordBuilder)
+            throws IOException, AsterixException {
+        ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
+        int numFields = record.getType().getFieldNames().length;
+        for (int pos = 0; pos < numFields; pos++) {
+            fieldValue.reset();
+            IAObject obj = record.getValueByPos(pos);
+            IDataParser.writeObject(obj, fieldValue.getDataOutput());
+            recordBuilder.addField(pos, fieldValue);
+        }
+        recordBuilder.write(dataOutput, true);
+    }
+
+    @SuppressWarnings("unchecked")
+    public static void writeObject(IAObject obj, DataOutput dataOutput) throws IOException, AsterixException {
+        switch (obj.getType().getTypeTag()) {
+            case RECORD: {
+                IARecordBuilder recordBuilder = new RecordBuilder();
+                recordBuilder.reset((ARecordType) obj.getType());
+                recordBuilder.init();
+                writeRecord((AMutableRecord) obj, dataOutput, recordBuilder);
+                break;
+            }
+
+            case ORDEREDLIST: {
+                OrderedListBuilder listBuilder = new OrderedListBuilder();
+                listBuilder.reset((AOrderedListType) ((AMutableOrderedList) obj).getType());
+                IACursor cursor = ((AMutableOrderedList) obj).getCursor();
+                ArrayBackedValueStorage listItemValue = new ArrayBackedValueStorage();
+                while (cursor.next()) {
+                    listItemValue.reset();
+                    IAObject item = cursor.get();
+                    writeObject(item, listItemValue.getDataOutput());
+                    listBuilder.addItem(listItemValue);
+                }
+                listBuilder.write(dataOutput, true);
+                break;
+            }
+
+            case UNORDEREDLIST: {
+                UnorderedListBuilder listBuilder = new UnorderedListBuilder();
+                listBuilder.reset((AUnorderedListType) ((AMutableUnorderedList) obj).getType());
+                IACursor cursor = ((AMutableUnorderedList) obj).getCursor();
+                ArrayBackedValueStorage listItemValue = new ArrayBackedValueStorage();
+                while (cursor.next()) {
+                    listItemValue.reset();
+                    IAObject item = cursor.get();
+                    writeObject(item, listItemValue.getDataOutput());
+                    listBuilder.addItem(listItemValue);
+                }
+                listBuilder.write(dataOutput, true);
+                break;
+            }
+
+            default:
+                AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(obj.getType()).serialize(obj,
+                        dataOutput);
+                break;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
new file mode 100644
index 0000000..5c3845c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.om.types.ARecordType;
+
+public interface IDataParserFactory extends Serializable {
+
+    /**
+     * @return The expected data source type {STREAM or RECORDS}
+     *         The data source type for a parser and a data source must match.
+     *         an instance of IDataParserFactory with RECORDS data source type must implement IRecordDataParserFactory
+     *         <T>
+     *         an instance of IDataParserFactory with STREAM data source type must implement IStreamDataParserFactory
+     * @throws AsterixException
+     */
+    public DataSourceType getDataSourceType() throws AsterixException;
+
+    /**
+     * Configure the data parser factory. The passed map contains key value pairs from the
+     * submitted AQL statement and any additional pairs added by the compiler
+     * @param configuration
+     */
+    public void configure(Map<String, String> configuration) throws Exception;
+
+    /**
+     * Set the record type expected to be produced by parsers created by this factory
+     * @param recordType
+     */
+    public void setRecordType(ARecordType recordType);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalDataSourceFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalDataSourceFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalDataSourceFactory.java
new file mode 100644
index 0000000..580ac99
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalDataSourceFactory.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+
+public interface IExternalDataSourceFactory extends Serializable {
+
+    /**
+     * The data source type indicates whether the data source produces a continuous stream or
+     * a set of records
+     * @author amoudi
+     */
+    public enum DataSourceType {
+        STREAM,
+        RECORDS
+    }
+
+    /**
+     * @return The data source type {STREAM or RECORDS}
+     */
+    public DataSourceType getDataSourceType();
+
+    /**
+     * Specifies on which locations this data source is expected to run.
+     * @return
+     * @throws Exception
+     */
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception;
+
+    /**
+     * Configure the data parser factory. The passed map contains key value pairs from the
+     * submitted AQL statement and any additional pairs added by the compiler
+     * @param configuration
+     * @throws Exception
+     */
+    public void configure(Map<String, String> configuration) throws Exception;
+
+    /**
+     * Specify whether the external data source can be indexed
+     * @return
+     */
+    public boolean isIndexible();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalFunction.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalFunction.java
new file mode 100755
index 0000000..9c25c7f
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalFunction.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IExternalFunction {
+
+    public void initialize(IFunctionHelper functionHelper) throws Exception;
+
+    public void deinitialize();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalIndexer.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalIndexer.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalIndexer.java
new file mode 100644
index 0000000..0b4277e
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalIndexer.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.Serializable;
+
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+/**
+ * @author amoudi
+ *         This Interface represents the component responsible for adding record ids to tuples when indexing external data
+ */
+public interface IExternalIndexer extends Serializable {
+
+    /**
+     * This method is called by an indexible datasource when the external source reader have been updated.
+     * this gives a chance for the indexer to update its reader specific values (i,e. file name)
+     * @param reader
+     *        the new reader
+     * @throws Exception
+     */
+    public void reset(IRecordReader<?> reader) throws Exception;
+
+    /**
+     * This method is called by the dataflow controller with each tuple. the indexer is expected to append record ids to the tuple.
+     * @param tb
+     * @throws Exception
+     */
+    public void index(ArrayTupleBuilder tb) throws Exception;
+
+    /**
+     * This method returns the number of fields in the record id. It is used by tuple appender at the initialization step.
+     * @return
+     * @throws Exception
+     */
+    public int getNumberOfFields() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalScalarFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalScalarFunction.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalScalarFunction.java
new file mode 100755
index 0000000..ecdb833
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IExternalScalarFunction.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IExternalScalarFunction extends IExternalFunction {
+
+    public void evaluate(IFunctionHelper functionHelper) throws Exception;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionFactory.java
new file mode 100755
index 0000000..5c5bcd0
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionFactory.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IFunctionFactory {
+
+    public IExternalFunction getExternalFunction();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionHelper.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionHelper.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionHelper.java
new file mode 100755
index 0000000..ebd0757
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IFunctionHelper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.library.java.JTypeTag;
+
+import java.io.IOException;
+
+public interface IFunctionHelper {
+
+    public IJObject getArgument(int index);
+
+    public IJObject getResultObject();
+
+    public void setResult(IJObject result) throws IOException, AsterixException;
+
+    public boolean isValidResult();
+
+    public IJObject getObject(JTypeTag jtypeTag);
+
+    public void reset();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexibleExternalDataSource.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexibleExternalDataSource.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexibleExternalDataSource.java
new file mode 100644
index 0000000..fe30b38
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexibleExternalDataSource.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.util.List;
+
+import org.apache.asterix.external.indexing.ExternalFile;
+
+public interface IIndexibleExternalDataSource extends IExternalDataSourceFactory {
+    public void setSnapshot(List<ExternalFile> files, boolean indexingOp) throws Exception;
+
+    /**
+     * Specify whether the external data source is configured for indexing
+     *
+     * @return
+     */
+    public boolean isIndexingOp();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingAdapterFactory.java
new file mode 100644
index 0000000..37cc1cf
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingAdapterFactory.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.util.List;
+
+import org.apache.asterix.external.indexing.ExternalFile;
+
+public interface IIndexingAdapterFactory extends IAdapterFactory {
+    public void setSnapshot(List<ExternalFile> files, boolean indexingOp);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingDatasource.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingDatasource.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingDatasource.java
new file mode 100644
index 0000000..ed5e7b5
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IIndexingDatasource.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IIndexingDatasource {
+    public IExternalIndexer getIndexer();
+
+    public void setIndexer(IExternalIndexer indexer);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProvider.java
new file mode 100644
index 0000000..3dabb29
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProvider.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.external.input.stream.AInputStream;
+
+public interface IInputStreamProvider {
+    public AInputStream getInputStream() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProviderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProviderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProviderFactory.java
new file mode 100644
index 0000000..3cc31dc
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IInputStreamProviderFactory.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public interface IInputStreamProviderFactory extends IExternalDataSourceFactory {
+
+    public IInputStreamProvider createInputStreamProvider(IHyracksTaskContext ctx, int partition) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJListAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJListAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJListAccessor.java
new file mode 100644
index 0000000..70665db
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJListAccessor.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.external.library.java.JObjectPointableVisitor;
+import org.apache.asterix.om.pointables.AListVisitablePointable;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.util.container.IObjectPool;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IJListAccessor {
+
+    IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool, IAType listType,
+            JObjectPointableVisitor pointableVisitor) throws HyracksDataException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObject.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObject.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObject.java
new file mode 100644
index 0000000..a88f47d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObject.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.DataOutput;
+
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IJObject {
+
+    public ATypeTag getTypeTag();
+
+    public IAObject getIAObject();
+
+    public void serialize(DataOutput dataOutput, boolean writeTypeTag) throws HyracksDataException;
+
+    public void reset() throws AlgebricksException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObjectAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObjectAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObjectAccessor.java
new file mode 100644
index 0000000..7b10af1
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJObjectAccessor.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.om.pointables.base.IVisitablePointable;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.util.container.IObjectPool;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IJObjectAccessor {
+    IJObject access(IVisitablePointable pointable, IObjectPool<IJObject, IAType> obj) throws HyracksDataException;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJRecordAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJRecordAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJRecordAccessor.java
new file mode 100644
index 0000000..08c5dde
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJRecordAccessor.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.external.library.java.JObjectPointableVisitor;
+import org.apache.asterix.external.library.java.JObjects.JRecord;
+import org.apache.asterix.om.pointables.ARecordVisitablePointable;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.util.container.IObjectPool;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IJRecordAccessor {
+
+    public JRecord access(ARecordVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool,
+            ARecordType recordType, JObjectPointableVisitor pointableVisitor) throws HyracksDataException;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJType.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJType.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJType.java
new file mode 100644
index 0000000..9c0ebae
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IJType.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.ATypeTag;
+
+public interface IJType {
+
+    public ATypeTag getTypeTag();
+
+    public IAObject getIAObject();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupReaderFactory.java
new file mode 100644
index 0000000..c0baead
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupReaderFactory.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public interface ILookupReaderFactory<T> extends IExternalDataSourceFactory {
+    public ILookupRecordReader<? extends T> createRecordReader(IHyracksTaskContext ctx, int partition,
+            ExternalFileIndexAccessor snapshotAccessor) throws Exception;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupRecordReader.java
new file mode 100644
index 0000000..fd03a07
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/ILookupRecordReader.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.external.indexing.RecordId;
+
+public interface ILookupRecordReader<T> {
+
+    public void configure(Map<String, String> configuration) throws Exception;
+
+    public Class<?> getRecordClass() throws IOException;
+
+    public IRawRecord<T> read(RecordId rid) throws IOException, Exception;
+
+    public void open() throws IOException;
+
+    public void fail() throws IOException;
+
+    public void close() throws IOException;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolver.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolver.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolver.java
new file mode 100644
index 0000000..831cd69
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolver.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+
+/**
+ * A policy for resolving a name to a node controller id.
+ */
+public interface INodeResolver {
+
+    /**
+     * Resolve a passed-in value to a node controller id.
+     * 
+     * @param value
+     *            string to be resolved
+     * @return resolved result (a node controller id)
+     * @throws AsterixException
+     */
+    public String resolveNode(String value) throws AsterixException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolverFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolverFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolverFactory.java
new file mode 100644
index 0000000..ce49596
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/INodeResolverFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+/**
+ * Factory for creating an instance of INodeResolver
+ *
+ * @see INodeResolver
+ */
+public interface INodeResolverFactory {
+
+    /**
+     * Create an instance of {@link INodeResolver}
+     * 
+     * @return an instance of INodeResolver
+     */
+    public INodeResolver createNodeResolver();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRawRecord.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRawRecord.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRawRecord.java
new file mode 100644
index 0000000..92b500d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRawRecord.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+/**
+ * This interface represents a raw record that is not parsed yet.
+ * @param <T>
+ */
+public interface IRawRecord<T> {
+    /**
+     * @return the bytes representing this record. This is intended to be used for passing raw records in frames and
+     *         performing lazy deserialization on them. If the record can't be serialized, this method returns null.
+     */
+    public byte[] getBytes();
+
+    /**
+     * @return the java object of the record.
+     */
+    public T get();
+
+    /**
+     * @return The class of the record objects.
+     */
+    public Class<?> getRecordClass();
+
+    /**
+     * Resets the object to prepare it for another write operation.
+     */
+    public void reset();
+
+    /**
+     * @return The size of the valid bytes of the object. If the object can't be serialized, this method returns -1
+     */
+    int size();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParser.java
new file mode 100644
index 0000000..cc24847
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParser.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.DataOutput;
+
+public interface IRecordDataParser<T> extends IDataParser {
+
+    /**
+     * @param record
+     * @param out
+     * @throws Exception
+     */
+    public void parse(IRawRecord<? extends T> record, DataOutput out) throws Exception;
+
+    /**
+     * @return the record class
+     */
+    public Class<? extends T> getRecordClass();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParserFactory.java
new file mode 100644
index 0000000..993d947
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordDataParserFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.IOException;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IRecordDataParserFactory<T> extends IDataParserFactory {
+    public IRecordDataParser<T> createRecordParser(IHyracksTaskContext ctx)
+            throws HyracksDataException, AsterixException, IOException;
+
+    public Class<? extends T> getRecordClass();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordFlowController.java
new file mode 100644
index 0000000..c3bdc56
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordFlowController.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IRecordFlowController<T> extends IDataFlowController {
+
+    public void setRecordParser(IRecordDataParser<T> dataParser);
+
+    public void setRecordReader(IRecordReader<T> recordReader) throws Exception;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReader.java
new file mode 100644
index 0000000..019fe8f
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReader.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * This interface represents a record reader that reads data from external source as a set of records
+ * @param <T>
+ */
+public interface IRecordReader<T> extends Closeable {
+
+    /**
+     * Configure the reader with the set of key/value pairs passed by the compiler
+     * @param configuration
+     *        the set of key/value pairs
+     * @throws Exception
+     *         when the reader can't be configured (i,e. due to incorrect configuration, unreachable source, etc.)
+     */
+    public void configure(Map<String, String> configuration) throws Exception;
+
+    /**
+     * @return true if the reader has more records remaining, false, otherwise.
+     * @throws Exception
+     *         if an error takes place
+     */
+    public boolean hasNext() throws Exception;
+
+    /**
+     * @return the object representing the next record.
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    public IRawRecord<T> next() throws IOException, InterruptedException;
+
+    /**
+     * @return the class of the java objects representing the records. used to check compatibility between readers and
+     *         parsers.
+     * @throws IOException
+     */
+    public Class<? extends T> getRecordClass() throws IOException;
+
+    /**
+     * used to stop reader from producing more records.
+     * @return true if the connection to the external source has been suspended, false otherwise.
+     */
+    public boolean stop();
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
new file mode 100644
index 0000000..adb2602
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public interface IRecordReaderFactory<T> extends IExternalDataSourceFactory {
+
+    public IRecordReader<? extends T> createRecordReader(IHyracksTaskContext ctx, int partition) throws Exception;
+
+    public Class<? extends T> getRecordClass();
+
+}



[17/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/NCFileSystemAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/NCFileSystemAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/NCFileSystemAdapter.java
deleted file mode 100644
index 64c62f7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/NCFileSystemAdapter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.FileSplit;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * Factory class for creating an instance of NCFileSystemAdapter. An
- * NCFileSystemAdapter reads external data residing on the local file system of
- * an NC.
- */
-public class NCFileSystemAdapter extends FileSystemBasedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private final FileSplit[] fileSplits;
-
-    public NCFileSystemAdapter(FileSplit[] fileSplits, ITupleParserFactory parserFactory, IAType atype,
-            IHyracksTaskContext ctx) throws HyracksDataException {
-        super(parserFactory, atype, ctx);
-        this.fileSplits = fileSplits;
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        FileSplit split = fileSplits[partition];
-        File inputFile = split.getLocalFile().getFile();
-        InputStream in;
-        try {
-            in = new FileInputStream(inputFile);
-            return in;
-        } catch (FileNotFoundException e) {
-            throw new IOException(e);
-        }
-    }
-
-    @Override
-    public String getFilename(int partition) {
-        final FileSplit fileSplit = fileSplits[partition];
-        return fileSplit.getNodeName() + ":" + fileSplit.getLocalFile().getFile().getPath();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAdapter.java
deleted file mode 100644
index d6b4ba7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAdapter.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.external.dataset.adapter.IPullBasedFeedClient.InflowState;
-import org.apache.asterix.external.feeds.FeedPolicyEnforcer;
-import org.apache.asterix.external.feeds.IPullBasedFeedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-
-/**
- * Acts as an abstract class for all pull-based external data adapters. Captures
- * the common logic for obtaining bytes from an external source and packing them
- * into frames as tuples.
- */
-public abstract class PullBasedAdapter implements IPullBasedFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-    private static final Logger LOGGER = Logger.getLogger(PullBasedAdapter.class.getName());
-    private static final int timeout = 5; // seconds
-
-    protected ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(1);
-    protected IPullBasedFeedClient pullBasedFeedClient;
-    protected ARecordType adapterOutputType;
-    protected boolean continueIngestion = true;
-    protected Map<String, String> configuration;
-
-    private FrameTupleAppender appender;
-    private IFrame frame;
-    private long tupleCount = 0;
-    private final IHyracksTaskContext ctx;
-    private int frameTupleCount = 0;
-
-    protected FeedPolicyEnforcer policyEnforcer;
-
-    public FeedPolicyEnforcer getPolicyEnforcer() {
-        return policyEnforcer;
-    }
-
-    public void setFeedPolicyEnforcer(FeedPolicyEnforcer policyEnforcer) {
-        this.policyEnforcer = policyEnforcer;
-    }
-
-    public abstract IPullBasedFeedClient getFeedClient(int partition) throws Exception;
-
-    public PullBasedAdapter(Map<String, String> configuration, IHyracksTaskContext ctx) {
-        this.ctx = ctx;
-        this.configuration = configuration;
-    }
-
-    public long getIngestedRecordsCount() {
-        return tupleCount;
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        frame = new VSizeFrame(ctx);
-        appender = new FrameTupleAppender(frame);
-
-        pullBasedFeedClient = getFeedClient(partition);
-        InflowState inflowState = null;
-
-        while (continueIngestion) {
-            tupleBuilder.reset();
-            try {
-                // blocking call
-                inflowState = pullBasedFeedClient.nextTuple(tupleBuilder.getDataOutput(), timeout);
-                switch (inflowState) {
-                    case DATA_AVAILABLE:
-                        tupleBuilder.addFieldEndOffset();
-                        appendTupleToFrame(writer);
-                        frameTupleCount++;
-                        break;
-                    case NO_MORE_DATA:
-                        if (LOGGER.isLoggable(Level.INFO)) {
-                            LOGGER.info("Reached end of feed");
-                        }
-                        appender.flush(writer, true);
-                        tupleCount += frameTupleCount;
-                        frameTupleCount = 0;
-                        continueIngestion = false;
-                        break;
-                    case DATA_NOT_AVAILABLE:
-                        if (frameTupleCount > 0) {
-                            appender.flush(writer, true);
-                            tupleCount += frameTupleCount;
-                            frameTupleCount = 0;
-                        }
-                        if (LOGGER.isLoggable(Level.WARNING)) {
-                            LOGGER.warning("Timed out on obtaining data from pull based adapter. Trying again!");
-                        }
-                        break;
-                }
-
-            } catch (Exception failureException) {
-                try {
-                    failureException.printStackTrace();
-                    boolean continueIngestion = policyEnforcer.continueIngestionPostSoftwareFailure(failureException);
-                    if (continueIngestion) {
-                        tupleBuilder.reset();
-                        continue;
-                    } else {
-                        throw failureException;
-                    }
-                } catch (Exception recoveryException) {
-                    throw new Exception(recoveryException);
-                }
-            }
-        }
-    }
-
-    private void appendTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
-                tupleBuilder.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
-                    tupleBuilder.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-    /**
-     * Discontinue the ingestion of data and end the feed.
-     * 
-     * @throws Exception
-     */
-    public void stop() throws Exception {
-        continueIngestion = false;
-    }
-
-    public Map<String, String> getConfiguration() {
-        return configuration;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureFeedClient.java
deleted file mode 100644
index 985399b..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureFeedClient.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.json.JSONException;
-import org.json.JSONObject;
-
-import com.microsoft.windowsazure.services.core.storage.CloudStorageAccount;
-import com.microsoft.windowsazure.services.table.client.CloudTableClient;
-import com.microsoft.windowsazure.services.table.client.TableConstants;
-import com.microsoft.windowsazure.services.table.client.TableQuery;
-import com.microsoft.windowsazure.services.table.client.TableQuery.Operators;
-import com.microsoft.windowsazure.services.table.client.TableQuery.QueryComparisons;
-import com.microsoft.windowsazure.services.table.client.TableServiceEntity;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.library.java.JObjects.ByteArrayAccessibleInputStream;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.util.ResettableByteArrayOutputStream;
-import org.apache.asterix.runtime.operators.file.ADMDataParser;
-
-public class PullBasedAzureFeedClient implements IPullBasedFeedClient {
-    private static final Logger LOGGER = Logger.getLogger(PullBasedAzureFeedClient.class.getName());
-
-    private final String tableName;
-    private final ARecordType outputType;
-    private final CloudTableClient ctc;
-    private final TableQuery<? extends TableServiceEntity> tableQuery;
-    private Iterator<? extends TableServiceEntity> entityIt;
-
-    private final Pattern arrayPattern = Pattern.compile("\\[(?<vals>.*)\\]");
-    private final Pattern int32Pattern = Pattern.compile(":(?<int>\\d+)(,|})");
-    private final Pattern doubleWithEndingZeroPattern = Pattern.compile("\\d+\\.(?<zero>0)(,|})");
-
-    private final ResettableByteArrayOutputStream rbaos;
-    private final DataOutputStream dos;
-    private final ADMDataParser adp;
-    private final ByteArrayAccessibleInputStream baais;
-
-    public PullBasedAzureFeedClient(CloudStorageAccount csa, ARecordType outputType, String tableName, String lowKey,
-            String highKey) throws AsterixException {
-        this.tableName = tableName;
-        this.outputType = outputType;
-        this.tableQuery = configureTableQuery(tableName, lowKey, highKey);
-        this.ctc = csa.createCloudTableClient();
-        rbaos = new ResettableByteArrayOutputStream();
-        dos = new DataOutputStream(rbaos);
-        baais = new ByteArrayAccessibleInputStream(rbaos.getByteArray(), 0, 0);
-        adp = new ADMDataParser();
-        adp.initialize(baais, outputType, false);
-    }
-
-    private TableQuery<? extends TableServiceEntity> configureTableQuery(String tableName, String lowKey, String highKey) {
-        TableQuery<? extends TableServiceEntity> baseTQ = TableQuery.from(tableName, classFromString(tableName));
-        if (lowKey != null && highKey != null) {
-            String lowKeyPredicate = TableQuery.generateFilterCondition(TableConstants.PARTITION_KEY,
-                    QueryComparisons.GREATER_THAN_OR_EQUAL, lowKey);
-            String highKeyPredicate = TableQuery.generateFilterCondition(TableConstants.PARTITION_KEY,
-                    QueryComparisons.LESS_THAN_OR_EQUAL, highKey);
-            String partitionPredicate = TableQuery.combineFilters(lowKeyPredicate, Operators.AND, highKeyPredicate);
-            return baseTQ.where(partitionPredicate);
-        }
-
-        return baseTQ;
-    }
-
-    private Class<? extends TableServiceEntity> classFromString(String tableName) {
-        return tableName.equals("Postings") ? AzureTweetEntity.class : AzureTweetMetadataEntity.class;
-    }
-
-    @Override
-    public InflowState nextTuple(DataOutput dataOutput, int timeout) throws AsterixException {
-        if (entityIt == null) {
-            entityIt = ctc.execute(tableQuery).iterator();
-        }
-
-        boolean moreTweets = entityIt.hasNext();
-        if (moreTweets) {
-            String json = null;
-            try {
-                json = getJSONString();
-                byte[] jsonBytes = json.getBytes(StandardCharsets.UTF_8);
-                rbaos.reset();
-                dos.write(jsonBytes, 0, jsonBytes.length);
-                dos.flush();
-                baais.setContent(rbaos.getByteArray(), 0, jsonBytes.length);
-                adp.initialize(baais, outputType, false);
-                adp.parse(dataOutput);
-            } catch (Exception e) {
-                if (json != null) {
-                    if (LOGGER.isLoggable(Level.SEVERE)) {
-                        LOGGER.severe("Record in error: " + json);
-                    }
-                }
-                e.printStackTrace();
-                throw new AsterixException(e);
-            }
-        }
-        return moreTweets ? InflowState.DATA_AVAILABLE : InflowState.NO_MORE_DATA;
-    }
-
-    private String getJSONString() throws JSONException {
-        if (tableName.equals("Postings")) {
-            AzureTweetEntity tweet = (AzureTweetEntity) entityIt.next();
-            JSONObject tjo = new JSONObject(tweet.getJSON().toString());
-            tjo.put("posting_id", tweet.getRowKey());
-            tjo.put("user_id", tweet.getPartitionKey());
-            tjo.remove("id");
-            JSONObject utjo = tjo.getJSONObject("user");
-            utjo.remove("id");
-            tjo.put("user", utjo);
-            return tjo.toString();
-        } else if (tableName.equals("PostingMetadata")) {
-            AzureTweetMetadataEntity tweetMD = (AzureTweetMetadataEntity) entityIt.next();
-            JSONObject tmdjo = new JSONObject();
-            tmdjo.put("posting_id", tweetMD.getRowKey());
-            tmdjo.put("user_id", tweetMD.getPartitionKey());
-            tmdjo.put("created_at", stripTillColon(tweetMD.getCreationTimestamp()).replaceAll("\"", ""));
-            tmdjo.put("posting_type", stripTillColon(tweetMD.getPostingType()));
-            List<Integer> productIdList = Arrays.asList(extractArray(tweetMD.getProductId()));
-            tmdjo.put("product_id", productIdList);
-            if (tweetMD.getEthnicity() != null) {
-                tmdjo.put("ethnicity", new JSONObject(stripTillColon(tweetMD.getEthnicity())));
-            }
-            if (tweetMD.getGender() != null) {
-                tmdjo.put("gender", new JSONObject(stripTillColon(tweetMD.getGender())));
-            }
-            if (tweetMD.getLocation() != null) {
-                String locStr = stripTillColon(tweetMD.getLocation());
-                Matcher m = int32Pattern.matcher(locStr);
-                while (m.find()) {
-                    locStr = locStr.replace(m.group("int"), m.group("int") + ".01");
-                }
-                m = doubleWithEndingZeroPattern.matcher(locStr);
-                while (m.find()) {
-                    locStr = locStr.replace(m.group("zero"), "01");
-                }
-                tmdjo.put("location", new JSONObject(locStr));
-            }
-            if (tweetMD.getSentiment() != null) {
-                tmdjo.put("sentiment", stripTillColon(tweetMD.getSentiment()));
-            }
-            return tmdjo.toString();
-        } else {
-            throw new IllegalArgumentException();
-        }
-    }
-
-    private String stripTillColon(String str) {
-        return str.substring(str.indexOf(':') + 1);
-    }
-
-    private Integer[] extractArray(String str) {
-        Matcher m = arrayPattern.matcher(str);
-        m.find();
-        String[] stringNums = m.group("vals").replaceAll("\\s", "").split(",");
-        Integer[] nums = new Integer[stringNums.length];
-        for (int i = 0; i < nums.length; ++i) {
-            nums[i] = Integer.parseInt(stringNums[i]);
-        }
-        return nums;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureTwitterAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureTwitterAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureTwitterAdapter.java
deleted file mode 100644
index e8cacde..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedAzureTwitterAdapter.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.net.URISyntaxException;
-import java.security.InvalidKeyException;
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import com.microsoft.windowsazure.services.core.storage.CloudStorageAccount;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public class PullBasedAzureTwitterAdapter extends PullBasedAdapter implements IDatasourceAdapter {
-    private static final Logger LOGGER = Logger.getLogger(PullBasedAzureTwitterAdapter.class.getName());
-
-    private static final long serialVersionUID = 1L;
-
-    private final CloudStorageAccount csa;
-    private final String connectionString;
-    private final String azureAccountName;
-    private final String azureAccountKey;
-    private final ARecordType outputType;
-    private final String tableName;
-    private final boolean partitioned;
-
-    private String[] lowKeys;
-    private String[] highKeys;
-
-    public PullBasedAzureTwitterAdapter(String accountName, String accountKey, String tableName, String[] partitions,
-            Map<String, String> configuration, IHyracksTaskContext ctx, ARecordType outputType) throws AsterixException {
-        super(configuration, ctx);
-        this.outputType = outputType;
-        if (partitions != null) {
-            partitioned = true;
-            configurePartitions(partitions);
-        } else {
-            partitioned = false;
-        }
-        this.azureAccountName = accountName;
-        this.azureAccountKey = accountKey;
-        this.tableName = tableName;
-
-        connectionString = "DefaultEndpointsProtocol=http;" + "AccountName=" + azureAccountName + ";AccountKey="
-                + azureAccountKey + ";";
-        try {
-            csa = CloudStorageAccount.parse(connectionString);
-        } catch (InvalidKeyException | URISyntaxException e) {
-            throw new AsterixException("You must specify a valid Azure account name and key", e);
-        }
-    }
-
-    private void configurePartitions(String[] partitions) {
-        lowKeys = new String[partitions.length];
-        highKeys = new String[partitions.length];
-        for (int i = 0; i < partitions.length; ++i) {
-            String[] loHi = partitions[i].split(":");
-            lowKeys[i] = loHi[0];
-            highKeys[i] = loHi[1];
-            if (LOGGER.isLoggable(Level.INFO)) {
-                LOGGER.info("Partition " + i + " configured for keys " + lowKeys[i] + " to " + highKeys[i]);
-            }
-        }
-    }
-
-    @Override
-    public IPullBasedFeedClient getFeedClient(int partition) throws Exception {
-        if (partitioned) {
-            return new PullBasedAzureFeedClient(csa, outputType, tableName, lowKeys[partition], highKeys[partition]);
-        }
-        return new PullBasedAzureFeedClient(csa, outputType, tableName, null, null);
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PULL;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return false;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterAdapter.java
deleted file mode 100644
index 90281b7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterAdapter.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.CounterTimerTupleForwardPolicy;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * An adapter that provides the functionality of receiving tweets from the
- * Twitter service in the form of ADM formatted records.
- */
-public class PullBasedTwitterAdapter extends ClientBasedFeedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final int DEFAULT_BATCH_SIZE = 5;
-
-    private ARecordType recordType;
-    private PullBasedTwitterFeedClient tweetClient;
-
-    @Override
-    public IFeedClient getFeedClient(int partition) {
-        return tweetClient;
-    }
-
-    public PullBasedTwitterAdapter(Map<String, String> configuration, ARecordType recordType, IHyracksTaskContext ctx)
-            throws AsterixException {
-        super(configuration, ctx);
-        tweetClient = new PullBasedTwitterFeedClient(ctx, recordType, this);
-    }
-
-    public ARecordType getAdapterOutputType() {
-        return recordType;
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PULL;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return true;
-    }
-
-    @Override
-    public ITupleForwardPolicy getTupleParserPolicy() {
-        configuration.put(ITupleForwardPolicy.PARSER_POLICY,
-                ITupleForwardPolicy.TupleForwardPolicyType.COUNTER_TIMER_EXPIRED.name());
-        String propValue = configuration.get(CounterTimerTupleForwardPolicy.BATCH_SIZE);
-        if (propValue == null) {
-            configuration.put(CounterTimerTupleForwardPolicy.BATCH_SIZE, "" + DEFAULT_BATCH_SIZE);
-        }
-        return AsterixTupleParserFactory.getTupleParserPolicy(configuration);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterFeedClient.java
deleted file mode 100644
index 8b5e1e1..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PullBasedTwitterFeedClient.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.List;
-import java.util.Map;
-
-import twitter4j.Query;
-import twitter4j.QueryResult;
-import twitter4j.Status;
-import twitter4j.Twitter;
-import twitter4j.TwitterException;
-import org.apache.asterix.dataflow.data.nontagged.serde.ARecordSerializerDeserializer;
-import org.apache.asterix.external.util.TweetProcessor;
-import org.apache.asterix.external.util.TwitterUtil;
-import org.apache.asterix.external.util.TwitterUtil.SearchAPIConstants;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * An implementation of @see {PullBasedFeedClient} for the Twitter service. The
- * feed client fetches data from Twitter service by sending request at regular
- * (configurable) interval.
- */
-public class PullBasedTwitterFeedClient extends FeedClient {
-
-    private String keywords;
-    private Query query;
-    private Twitter twitter;
-    private int requestInterval = 5; // seconds
-    private QueryResult result;
-
-    private ARecordType recordType;
-    private int nextTweetIndex = 0;
-    private long lastTweetIdReceived = 0;
-    private TweetProcessor tweetProcessor;
-
-    public PullBasedTwitterFeedClient(IHyracksTaskContext ctx, ARecordType recordType, PullBasedTwitterAdapter adapter) {
-        this.twitter = TwitterUtil.getTwitterService(adapter.getConfiguration());
-        this.recordType = recordType;
-        this.tweetProcessor = new TweetProcessor(recordType);
-        this.recordSerDe = new ARecordSerializerDeserializer(recordType);
-        this.mutableRecord = tweetProcessor.getMutableRecord();
-        this.initialize(adapter.getConfiguration());
-    }
-
-    public ARecordType getRecordType() {
-        return recordType;
-    }
-
-    @Override
-    public InflowState retrieveNextRecord() throws Exception {
-        Status tweet;
-        tweet = getNextTweet();
-        if (tweet == null) {
-            return InflowState.DATA_NOT_AVAILABLE;
-        }
-
-        tweetProcessor.processNextTweet(tweet);
-        return InflowState.DATA_AVAILABLE;
-    }
-
-    private void initialize(Map<String, String> params) {
-        this.keywords = (String) params.get(SearchAPIConstants.QUERY);
-        this.requestInterval = Integer.parseInt((String) params.get(SearchAPIConstants.INTERVAL));
-        this.query = new Query(keywords);
-        this.query.setCount(100);
-    }
-
-    private Status getNextTweet() throws TwitterException, InterruptedException {
-        if (result == null || nextTweetIndex >= result.getTweets().size()) {
-            Thread.sleep(1000 * requestInterval);
-            query.setSinceId(lastTweetIdReceived);
-            result = twitter.search(query);
-            nextTweetIndex = 0;
-        }
-        if (result != null && !result.getTweets().isEmpty()) {
-            List<Status> tw = result.getTweets();
-            Status tweet = tw.get(nextTweetIndex++);
-            if (lastTweetIdReceived < tweet.getId()) {
-                lastTweetIdReceived = tweet.getId();
-            }
-            return tweet;
-        } else {
-            return null;
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterAdapter.java
deleted file mode 100644
index 01839d3..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterAdapter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.CounterTimerTupleForwardPolicy;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public class PushBasedTwitterAdapter extends ClientBasedFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final int DEFAULT_BATCH_SIZE = 50;
-
-    private PushBasedTwitterFeedClient tweetClient;
-
-    public PushBasedTwitterAdapter(Map<String, String> configuration, ARecordType recordType, IHyracksTaskContext ctx) throws AsterixException {
-        super(configuration, ctx);
-        this.configuration = configuration;
-        this.tweetClient = new PushBasedTwitterFeedClient(ctx, recordType, this);
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PUSH;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return true;
-    }
-
-    @Override
-    public IFeedClient getFeedClient(int partition) throws Exception {
-        return tweetClient;
-    }
-
-    @Override
-    public ITupleForwardPolicy getTupleParserPolicy() {
-        configuration.put(ITupleForwardPolicy.PARSER_POLICY,
-                ITupleForwardPolicy.TupleForwardPolicyType.COUNTER_TIMER_EXPIRED.name());
-        String propValue = configuration.get(CounterTimerTupleForwardPolicy.BATCH_SIZE);
-        if (propValue == null) {
-            configuration.put(CounterTimerTupleForwardPolicy.BATCH_SIZE, "" + DEFAULT_BATCH_SIZE);
-        }
-        return AsterixTupleParserFactory.getTupleParserPolicy(configuration);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterFeedClient.java
deleted file mode 100644
index bb40ac9..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/PushBasedTwitterFeedClient.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.concurrent.LinkedBlockingQueue;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.dataflow.data.nontagged.serde.ARecordSerializerDeserializer;
-import org.apache.asterix.external.util.TweetProcessor;
-import org.apache.asterix.external.util.TwitterUtil;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-import twitter4j.FilterQuery;
-import twitter4j.StallWarning;
-import twitter4j.Status;
-import twitter4j.StatusDeletionNotice;
-import twitter4j.StatusListener;
-import twitter4j.TwitterStream;
-
-/**
- * An implementation of @see {PullBasedFeedClient} for the Twitter service. The
- * feed client fetches data from Twitter service by sending request at regular
- * (configurable) interval.
- */
-public class PushBasedTwitterFeedClient extends FeedClient {
-
-    private ARecordType recordType;
-    private TweetProcessor tweetProcessor;
-    private LinkedBlockingQueue<Status> inputQ;
-
-    public PushBasedTwitterFeedClient(IHyracksTaskContext ctx, ARecordType recordType, PushBasedTwitterAdapter adapter)
-            throws AsterixException {
-        this.recordType = recordType;
-        this.tweetProcessor = new TweetProcessor(recordType);
-        this.recordSerDe = new ARecordSerializerDeserializer(recordType);
-        this.mutableRecord = tweetProcessor.getMutableRecord();
-        this.inputQ = new LinkedBlockingQueue<Status>();
-        TwitterStream twitterStream = TwitterUtil.getTwitterStream(adapter.getConfiguration());
-        twitterStream.addListener(new TweetListener(inputQ));
-        FilterQuery query = TwitterUtil.getFilterQuery(adapter.getConfiguration());
-        if (query != null) {
-            twitterStream.filter(query);
-        } else {
-            twitterStream.sample();
-        }
-    }
-
-    public ARecordType getRecordType() {
-        return recordType;
-    }
-
-    private class TweetListener implements StatusListener {
-
-        private LinkedBlockingQueue<Status> inputQ;
-
-        public TweetListener(LinkedBlockingQueue<Status> inputQ) {
-            this.inputQ = inputQ;
-        }
-
-        @Override
-        public void onStatus(Status tweet) {
-            inputQ.add(tweet);
-        }
-
-        @Override
-        public void onException(Exception arg0) {
-
-        }
-
-        @Override
-        public void onDeletionNotice(StatusDeletionNotice arg0) {
-        }
-
-        @Override
-        public void onScrubGeo(long arg0, long arg1) {
-        }
-
-        @Override
-        public void onStallWarning(StallWarning arg0) {
-        }
-
-        @Override
-        public void onTrackLimitationNotice(int arg0) {
-        }
-    }
-
-    @Override
-    public InflowState retrieveNextRecord() throws Exception {
-        Status tweet = inputQ.take();
-        tweetProcessor.processNextTweet(tweet);
-        return InflowState.DATA_AVAILABLE;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedAdapter.java
deleted file mode 100644
index 69cd82c..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedAdapter.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * RSSFeedAdapter provides the functionality of fetching an RSS based feed.
- */
-public class RSSFeedAdapter extends ClientBasedFeedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private static final String KEY_RSS_URL = "url";
-
-    private List<String> feedURLs = new ArrayList<String>();
-    private String id_prefix = "";
-
-    private IFeedClient rssFeedClient;
-
-    private ARecordType recordType;
-
-    public RSSFeedAdapter(Map<String, String> configuration, ARecordType recordType, IHyracksTaskContext ctx)
-            throws AsterixException {
-        super(configuration, ctx);
-        id_prefix = ctx.getJobletContext().getApplicationContext().getNodeId();
-        this.recordType = recordType;
-        reconfigure(configuration);
-    }
-
-    private void initializeFeedURLs(String rssURLProperty) {
-        feedURLs.clear();
-        String[] feedURLProperty = rssURLProperty.split(",");
-        for (String feedURL : feedURLProperty) {
-            feedURLs.add(feedURL);
-        }
-    }
-
-    protected void reconfigure(Map<String, String> arguments) {
-        String rssURLProperty = configuration.get(KEY_RSS_URL);
-        if (rssURLProperty != null) {
-            initializeFeedURLs(rssURLProperty);
-        }
-    }
-
-    @Override
-    public IFeedClient getFeedClient(int partition) throws Exception {
-        if (rssFeedClient == null) {
-            rssFeedClient = new RSSFeedClient(this, feedURLs.get(partition), id_prefix);
-        }
-        return rssFeedClient;
-    }
-
-    public ARecordType getRecordType() {
-        return recordType;
-    }
-
-    @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PULL;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return false;
-    }
-
-    @Override
-    public ITupleForwardPolicy getTupleParserPolicy() {
-        return AsterixTupleParserFactory.getTupleParserPolicy(configuration);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedClient.java
deleted file mode 100644
index 0b0d0fb..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/RSSFeedClient.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Queue;
-
-import com.sun.syndication.feed.synd.SyndEntryImpl;
-import com.sun.syndication.feed.synd.SyndFeed;
-import com.sun.syndication.fetcher.FeedFetcher;
-import com.sun.syndication.fetcher.FetcherEvent;
-import com.sun.syndication.fetcher.FetcherListener;
-import com.sun.syndication.fetcher.impl.FeedFetcherCache;
-import com.sun.syndication.fetcher.impl.HashMapFeedInfoCache;
-import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
-
-import org.apache.asterix.om.base.AMutableRecord;
-import org.apache.asterix.om.base.AMutableString;
-import org.apache.asterix.om.base.IAObject;
-import org.apache.asterix.om.types.ARecordType;
-
-/**
- * An implementation of @see {PullBasedFeedClient} responsible for
- * fetching from an RSS feed source at regular interval.
- */
-@SuppressWarnings("rawtypes")
-public class RSSFeedClient extends FeedClient {
-
-    private long id = 0;
-    private String idPrefix;
-    private boolean feedModified = false;
-
-    private Queue<SyndEntryImpl> rssFeedBuffer = new LinkedList<SyndEntryImpl>();
-
-    IAObject[] mutableFields;
-
-    private final FeedFetcherCache feedInfoCache;
-    private final FeedFetcher fetcher;
-    private final FetcherEventListenerImpl listener;
-    private final URL feedUrl;
-    private ARecordType recordType;
-    String[] tupleFieldValues;
-
-    public boolean isFeedModified() {
-        return feedModified;
-    }
-
-    public void setFeedModified(boolean feedModified) {
-        this.feedModified = feedModified;
-    }
-
-    public RSSFeedClient(RSSFeedAdapter adapter, String feedURL, String id_prefix) throws MalformedURLException {
-        this.idPrefix = id_prefix;
-        this.feedUrl = new URL(feedURL);
-        feedInfoCache = HashMapFeedInfoCache.getInstance();
-        fetcher = new HttpURLFeedFetcher(feedInfoCache);
-        listener = new FetcherEventListenerImpl(this);
-        fetcher.addFetcherEventListener(listener);
-        mutableFields = new IAObject[] { new AMutableString(null), new AMutableString(null), new AMutableString(null),
-                new AMutableString(null) };
-        recordType = adapter.getRecordType();
-        mutableRecord = new AMutableRecord(recordType, mutableFields);
-        tupleFieldValues = new String[recordType.getFieldNames().length];
-    }
-
-    @Override
-    public InflowState retrieveNextRecord() throws Exception {
-        SyndEntryImpl feedEntry = getNextRSSFeed();
-        if (feedEntry == null) {
-            return InflowState.DATA_NOT_AVAILABLE;
-        }
-        tupleFieldValues[0] = idPrefix + ":" + id;
-        tupleFieldValues[1] = feedEntry.getTitle();
-        tupleFieldValues[2] = feedEntry.getDescription().getValue();
-        tupleFieldValues[3] = feedEntry.getLink();
-        int numFields = recordType.getFieldNames().length;
-        for (int i = 0; i < numFields; i++) {
-            ((AMutableString) mutableFields[i]).setValue(tupleFieldValues[i]);
-            mutableRecord.setValueAtPos(i, mutableFields[i]);
-        }
-        id++;
-        return InflowState.DATA_AVAILABLE;
-    }
-
-    private SyndEntryImpl getNextRSSFeed() throws Exception {
-        if (rssFeedBuffer.isEmpty()) {
-            fetchFeed();
-        }
-        if (rssFeedBuffer.isEmpty()) {
-            return null;
-        } else {
-            return rssFeedBuffer.remove();
-        }
-    }
-
-    @SuppressWarnings("unchecked")
-    private void fetchFeed() {
-        try {
-            // Retrieve the feed.
-            // We will get a Feed Polled Event and then a
-            // Feed Retrieved event (assuming the feed is valid)
-            SyndFeed feed = fetcher.retrieveFeed(feedUrl);
-            if (feedModified) {
-                System.err.println(feedUrl + " retrieved");
-                System.err.println(feedUrl + " has a title: " + feed.getTitle() + " and contains "
-                        + feed.getEntries().size() + " entries.");
-
-                List fetchedFeeds = feed.getEntries();
-                rssFeedBuffer.addAll(fetchedFeeds);
-            }
-        } catch (Exception ex) {
-            System.out.println("ERROR: " + ex.getMessage());
-            ex.printStackTrace();
-        }
-    }
-
-}
-
-class FetcherEventListenerImpl implements FetcherListener {
-
-    private final IFeedClient feedClient;
-
-    public FetcherEventListenerImpl(IFeedClient feedClient) {
-        this.feedClient = feedClient;
-    }
-
-    /**
-     * @see com.sun.syndication.fetcher.FetcherListener#fetcherEvent(com.sun.syndication.fetcher.FetcherEvent)
-     */
-    public void fetcherEvent(FetcherEvent event) {
-        String eventType = event.getEventType();
-        if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) {
-            System.err.println("\tEVENT: Feed Polled. URL = " + event.getUrlString());
-        } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) {
-            System.err.println("\tEVENT: Feed Retrieved. URL = " + event.getUrlString());
-            ((RSSFeedClient) feedClient).setFeedModified(true);
-        } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) {
-            System.err.println("\tEVENT: Feed Unchanged. URL = " + event.getUrlString());
-            ((RSSFeedClient) feedClient).setFeedModified(true);
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/StreamBasedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/StreamBasedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/StreamBasedAdapter.java
index b436177..3f10dc4 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/StreamBasedAdapter.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/StreamBasedAdapter.java
@@ -23,7 +23,7 @@ import java.io.InputStream;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 import org.apache.asterix.om.types.IAType;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -31,7 +31,7 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.std.file.ITupleParser;
 import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
 
-public abstract class StreamBasedAdapter implements IDatasourceAdapter {
+public abstract class StreamBasedAdapter implements IDataSourceAdapter {
 
     private static final long serialVersionUID = 1L;
 
@@ -43,8 +43,8 @@ public abstract class StreamBasedAdapter implements IDatasourceAdapter {
 
     protected final IAType sourceDatatype;
 
-    public StreamBasedAdapter(ITupleParserFactory parserFactory, IAType sourceDatatype, IHyracksTaskContext ctx, int partition)
-            throws HyracksDataException {
+    public StreamBasedAdapter(ITupleParserFactory parserFactory, IAType sourceDatatype, IHyracksTaskContext ctx,
+            int partition) throws HyracksDataException {
         this.tupleParser = parserFactory.createTupleParser(ctx);
         this.sourceDatatype = sourceDatatype;
     }
@@ -56,7 +56,8 @@ public abstract class StreamBasedAdapter implements IDatasourceAdapter {
             tupleParser.parse(in, writer);
         } else {
             if (LOGGER.isLoggable(Level.WARNING)) {
-                LOGGER.warning("Could not obtain input stream for parsing from adapter " + this + "[" + partition + "]");
+                LOGGER.warning(
+                        "Could not obtain input stream for parsing from adapter " + this + "[" + partition + "]");
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/feeds/IPullBasedFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/feeds/IPullBasedFeedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/feeds/IPullBasedFeedAdapter.java
deleted file mode 100644
index 62052af..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/feeds/IPullBasedFeedAdapter.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.feeds;
-
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-
-public interface IPullBasedFeedAdapter extends IFeedAdapter {
-
-    /**
-     * @return
-     */
-    public FeedPolicyEnforcer getPolicyEnforcer();
-
-    /**
-     * @param feedPolicyEnforcer
-     */
-    public void setFeedPolicyEnforcer(FeedPolicyEnforcer feedPolicyEnforcer);
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFile.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFile.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFile.java
index 3988f1a..533d119 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFile.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFile.java
@@ -40,6 +40,16 @@ public class ExternalFile implements Serializable, Comparable<ExternalFile> {
     private int fileNumber;
     private ExternalFilePendingOp pendingOp;
 
+    public ExternalFile() {
+        this.dataverseName = "";
+        this.datasetName = "";
+        this.fileNumber = -1;
+        this.fileName = "";
+        this.lastModefiedTime = new Date();
+        this.size = 0;
+        this.pendingOp = ExternalFilePendingOp.PENDING_NO_OP;
+    }
+
     public ExternalFile(String dataverseName, String datasetName, int fileNumber, String fileName,
             Date lastModefiedTime, long size, ExternalFilePendingOp pendingOp) {
         this.dataverseName = dataverseName;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFileIndexAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFileIndexAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFileIndexAccessor.java
index b10379b..d94db08 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFileIndexAccessor.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/ExternalFileIndexAccessor.java
@@ -24,7 +24,7 @@ import java.io.DataInputStream;
 import java.io.Serializable;
 import java.util.Date;
 
-import org.apache.asterix.external.indexing.operators.ExternalLoopkupOperatorDiscriptor;
+import org.apache.asterix.external.operators.ExternalLookupOperatorDescriptor;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import org.apache.asterix.om.base.ADateTime;
 import org.apache.asterix.om.base.AInt64;
@@ -57,7 +57,7 @@ public class ExternalFileIndexAccessor implements Serializable {
     private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
     private static final long serialVersionUID = 1L;
     private ExternalBTreeDataflowHelper indexDataflowHelper;
-    private ExternalLoopkupOperatorDiscriptor opDesc;
+    private ExternalLookupOperatorDescriptor opDesc;
 
     private IHyracksTaskContext ctx;
     private ExternalBTree index;
@@ -72,39 +72,34 @@ public class ExternalFileIndexAccessor implements Serializable {
     private IIndexCursor fileIndexSearchCursor;
 
     public ExternalFileIndexAccessor(ExternalBTreeDataflowHelper indexDataflowHelper,
-            ExternalLoopkupOperatorDiscriptor opDesc) {
+            ExternalLookupOperatorDescriptor opDesc) {
         this.indexDataflowHelper = indexDataflowHelper;
         this.opDesc = opDesc;
     }
 
-    public void openIndex() throws HyracksDataException {
+    public void open() throws HyracksDataException {
         // Open the index and get the instance
         indexDataflowHelper.open();
         index = (ExternalBTree) indexDataflowHelper.getIndexInstance();
-        try {
-            // Create search key and search predicate objects
-            searchKey = new ArrayTupleReference();
-            searchKeyTupleBuilder = new ArrayTupleBuilder(FilesIndexDescription.FILE_KEY_SIZE);
-            searchKeyTupleBuilder.reset();
-            searchKeyTupleBuilder.addField(intSerde, currentFileNumber);
-            searchKey.reset(searchKeyTupleBuilder.getFieldEndOffsets(), searchKeyTupleBuilder.getByteArray());
-            searchCmp = BTreeUtils.getSearchMultiComparator(index.getComparatorFactories(), searchKey);
-            searchPredicate = new RangePredicate(searchKey, searchKey, true, true, searchCmp, searchCmp);
+        // Create search key and search predicate objects
+        searchKey = new ArrayTupleReference();
+        searchKeyTupleBuilder = new ArrayTupleBuilder(FilesIndexDescription.FILE_KEY_SIZE);
+        searchKeyTupleBuilder.reset();
+        searchKeyTupleBuilder.addField(intSerde, currentFileNumber);
+        searchKey.reset(searchKeyTupleBuilder.getFieldEndOffsets(), searchKeyTupleBuilder.getByteArray());
+        searchCmp = BTreeUtils.getSearchMultiComparator(index.getComparatorFactories(), searchKey);
+        searchPredicate = new RangePredicate(searchKey, searchKey, true, true, searchCmp, searchCmp);
 
-            // create the accessor  and the cursor using the passed version
-            ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
-                    .createSearchOperationCallback(indexDataflowHelper.getResourceID(), ctx);
-            fileIndexAccessor = index.createAccessor(searchCallback, indexDataflowHelper.getVersion());
-            fileIndexSearchCursor = fileIndexAccessor.createSearchCursor(false);
-        } catch (Exception e) {
-            indexDataflowHelper.close();
-            throw new HyracksDataException(e);
-        }
+        // create the accessor  and the cursor using the passed version
+        ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
+                .createSearchOperationCallback(indexDataflowHelper.getResourceID(), ctx);
+        fileIndexAccessor = index.createAccessor(searchCallback, indexDataflowHelper.getVersion());
+        fileIndexSearchCursor = fileIndexAccessor.createSearchCursor(false);
     }
 
-    public void searchForFile(int fileNumber, ExternalFile file) throws Exception {
+    public void lookup(int fileId, ExternalFile file) throws Exception {
         // Set search parameters
-        currentFileNumber.setValue(fileNumber);
+        currentFileNumber.setValue(fileId);
         searchKeyTupleBuilder.reset();
         searchKeyTupleBuilder.addField(intSerde, currentFileNumber);
         searchKey.reset(searchKeyTupleBuilder.getFieldEndOffsets(), searchKeyTupleBuilder.getByteArray());
@@ -122,14 +117,14 @@ public class ExternalFileIndexAccessor implements Serializable {
             ByteArrayInputStream stream = new ByteArrayInputStream(serRecord, recordStartOffset, recordLength);
             DataInput in = new DataInputStream(stream);
             ARecord externalFileRecord = (ARecord) filesIndexDescription.EXTERNAL_FILE_RECORD_SERDE.deserialize(in);
-            setExternalFileFromARecord(externalFileRecord, file);
+            setFile(externalFileRecord, file);
         } else {
             // This should never happen
             throw new HyracksDataException("Was not able to find a file in the files index");
         }
     }
 
-    private void setExternalFileFromARecord(ARecord externalFileRecord, ExternalFile file) {
+    private void setFile(ARecord externalFileRecord, ExternalFile file) {
         file.setFileName(
                 ((AString) externalFileRecord.getValueByPos(FilesIndexDescription.EXTERNAL_FILE_NAME_FIELD_INDEX))
                         .getStringValue());
@@ -140,11 +135,13 @@ public class ExternalFileIndexAccessor implements Serializable {
                         .getChrononTime())));
     }
 
-    public void closeIndex() throws HyracksDataException {
-        try {
-            fileIndexSearchCursor.close();
-        } finally {
-            indexDataflowHelper.close();
+    public void close() throws HyracksDataException {
+        if (index != null) {
+            try {
+                fileIndexSearchCursor.close();
+            } finally {
+                indexDataflowHelper.close();
+            }
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileIndexTupleTranslator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileIndexTupleTranslator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileIndexTupleTranslator.java
new file mode 100644
index 0000000..fa22179
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileIndexTupleTranslator.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.io.IOException;
+
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import org.apache.asterix.om.base.AMutableDateTime;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.base.AMutableInt64;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+@SuppressWarnings("unchecked")
+public class FileIndexTupleTranslator {
+    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
+    private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(
+            filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
+    private RecordBuilder recordBuilder = new RecordBuilder();
+    private ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
+    private AMutableInt32 aInt32 = new AMutableInt32(0);
+    private AMutableInt64 aInt64 = new AMutableInt64(0);
+    private AMutableString aString = new AMutableString(null);
+    private AMutableDateTime aDateTime = new AMutableDateTime(0);
+    private ISerializerDeserializer<IAObject> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ASTRING);
+    private ISerializerDeserializer<IAObject> dateTimeSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADATETIME);
+    private ISerializerDeserializer<IAObject> longSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT64);
+    private ArrayTupleReference tuple = new ArrayTupleReference();
+
+    public ITupleReference getTupleFromFile(ExternalFile file) throws IOException, AsterixException {
+        tupleBuilder.reset();
+        //File Number
+        aInt32.setValue(file.getFileNumber());
+        filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32,
+                tupleBuilder.getDataOutput());
+        tupleBuilder.addFieldEndOffset();
+
+        //File Record
+        recordBuilder.reset(filesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
+        // write field 0 (File Name)
+        fieldValue.reset();
+        aString.setValue(file.getFileName());
+        stringSerde.serialize(aString, fieldValue.getDataOutput());
+        recordBuilder.addField(0, fieldValue);
+
+        //write field 1 (File Size)
+        fieldValue.reset();
+        aInt64.setValue(file.getSize());
+        longSerde.serialize(aInt64, fieldValue.getDataOutput());
+        recordBuilder.addField(1, fieldValue);
+
+        //write field 2 (File Mod Date)
+        fieldValue.reset();
+        aDateTime.setValue(file.getLastModefiedTime().getTime());
+        dateTimeSerde.serialize(aDateTime, fieldValue.getDataOutput());
+        recordBuilder.addField(2, fieldValue);
+
+        //write the record
+        recordBuilder.write(tupleBuilder.getDataOutput(), true);
+        tupleBuilder.addFieldEndOffset();
+        tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+        return tuple;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileOffsetIndexer.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileOffsetIndexer.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileOffsetIndexer.java
new file mode 100644
index 0000000..932aece
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/FileOffsetIndexer.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.io.IOException;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.reader.HDFSRecordReader;
+import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.base.AMutableInt64;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class FileOffsetIndexer implements IExternalIndexer {
+
+    private static final long serialVersionUID = 1L;
+    public static final int NUM_OF_FIELDS = 2;
+    protected AMutableInt32 fileNumber = new AMutableInt32(0);
+    protected AMutableInt64 offset = new AMutableInt64(0);
+    protected RecordReader<?, Writable> recordReader;
+
+    @SuppressWarnings("unchecked")
+    private ISerializerDeserializer<IAObject> intSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT32);
+    @SuppressWarnings("unchecked")
+    private ISerializerDeserializer<IAObject> longSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT64);
+
+    @Override
+    public void reset(IRecordReader<?> reader) throws IOException {
+        //TODO: Make it more generic since we can't assume it is always going to be HDFS records.
+        @SuppressWarnings("unchecked")
+        HDFSRecordReader<?, Writable> hdfsReader = (HDFSRecordReader<?, Writable>) reader;
+        fileNumber.setValue(hdfsReader.getSnapshot().get(hdfsReader.getCurrentSplitIndex()).getFileNumber());
+        recordReader = hdfsReader.getReader();
+        offset.setValue(recordReader.getPos());
+    }
+
+    @Override
+    public void index(ArrayTupleBuilder tb) throws IOException {
+        tb.addField(intSerde, fileNumber);
+        tb.addField(longSerde, offset);
+        // Get position for next index(tb) call
+        offset.setValue(recordReader.getPos());
+    }
+
+    @Override
+    public int getNumberOfFields() {
+        return NUM_OF_FIELDS;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/IndexingScheduler.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/IndexingScheduler.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/IndexingScheduler.java
new file mode 100644
index 0000000..870a6df
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/IndexingScheduler.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Random;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hyracks.api.client.HyracksConnection;
+import org.apache.hyracks.api.client.IHyracksClientConnection;
+import org.apache.hyracks.api.client.NodeControllerInfo;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.HyracksException;
+import org.apache.hyracks.hdfs.scheduler.Scheduler;
+
+public class IndexingScheduler {
+    private static final Logger LOGGER = Logger.getLogger(Scheduler.class.getName());
+
+    /** a list of NCs */
+    private String[] NCs;
+
+    /** a map from ip to NCs */
+    private Map<String, List<String>> ipToNcMapping = new HashMap<String, List<String>>();
+
+    /** a map from the NC name to the index */
+    private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
+
+    /** a map from NC name to the NodeControllerInfo */
+    private Map<String, NodeControllerInfo> ncNameToNcInfos;
+
+    /**
+     * The constructor of the scheduler.
+     *
+     * @param ncNameToNcInfos
+     * @throws HyracksException
+     */
+    public IndexingScheduler(String ipAddress, int port) throws HyracksException {
+        try {
+            IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
+            this.ncNameToNcInfos = hcc.getNodeControllerInfos();
+            loadIPAddressToNCMap(ncNameToNcInfos);
+        } catch (Exception e) {
+            throw new HyracksException(e);
+        }
+    }
+
+    /**
+     * Set location constraints for a file scan operator with a list of file
+     * splits. It tries to assign splits to their local machines fairly
+     * Locality is more important than fairness
+     *
+     * @throws HyracksDataException
+     */
+    public String[] getLocationConstraints(InputSplit[] splits) throws HyracksException {
+        if (splits == null) {
+            /** deal the case when the splits array is null */
+            return new String[] {};
+        }
+        int[] workloads = new int[NCs.length];
+        Arrays.fill(workloads, 0);
+        String[] locations = new String[splits.length];
+        Map<String, IntWritable> locationToNumOfSplits = new HashMap<String, IntWritable>();
+        /**
+         * upper bound is number of splits
+         */
+        int upperBoundSlots = splits.length;
+
+        try {
+            Random random = new Random(System.currentTimeMillis());
+            boolean scheduled[] = new boolean[splits.length];
+            Arrays.fill(scheduled, false);
+            /**
+             * scan the splits and build the popularity map
+             * give the machines with less local splits more scheduling priority
+             */
+            buildPopularityMap(splits, locationToNumOfSplits);
+            HashMap<String, Integer> locationToNumOfAssignement = new HashMap<String, Integer>();
+            for (String location : locationToNumOfSplits.keySet()) {
+                locationToNumOfAssignement.put(location, 0);
+            }
+            /**
+             * push data-local upper-bounds slots to each machine
+             */
+            scheduleLocalSlots(splits, workloads, locations, upperBoundSlots, random, scheduled, locationToNumOfSplits,
+                    locationToNumOfAssignement);
+
+            int dataLocalCount = 0;
+            for (int i = 0; i < scheduled.length; i++) {
+                if (scheduled[i] == true) {
+                    dataLocalCount++;
+                }
+            }
+            LOGGER.info("Data local rate: "
+                    + (scheduled.length == 0 ? 0.0 : ((float) dataLocalCount / (float) (scheduled.length))));
+            /**
+             * push non-data-local upper-bounds slots to each machine
+             */
+            locationToNumOfAssignement.clear();
+            for (String nc : NCs) {
+                locationToNumOfAssignement.put(nc, 0);
+            }
+            for (int i = 0; i < scheduled.length; i++) {
+                if (scheduled[i]) {
+                    locationToNumOfAssignement.put(locations[i], locationToNumOfAssignement.get(locations[i]) + 1);
+                }
+            }
+
+            scheduleNonLocalSlots(splits, workloads, locations, upperBoundSlots, scheduled, locationToNumOfAssignement);
+            return locations;
+        } catch (IOException e) {
+            throw new HyracksException(e);
+        }
+    }
+
+    /**
+     * Schedule non-local slots to each machine
+     *
+     * @param splits
+     *            The HDFS file splits.
+     * @param workloads
+     *            The current capacity of each machine.
+     * @param locations
+     *            The result schedule.
+     * @param slotLimit
+     *            The maximum slots of each machine.
+     * @param scheduled
+     *            Indicate which slot is scheduled.
+     * @param locationToNumOfAssignement
+     */
+    private void scheduleNonLocalSlots(InputSplit[] splits, final int[] workloads, String[] locations, int slotLimit,
+            boolean[] scheduled, final HashMap<String, Integer> locationToNumOfAssignement)
+                    throws IOException, UnknownHostException {
+
+        PriorityQueue<String> scheduleCadndiates = new PriorityQueue<String>(NCs.length, new Comparator<String>() {
+            @Override
+            public int compare(String s1, String s2) {
+                return locationToNumOfAssignement.get(s1).compareTo(locationToNumOfAssignement.get(s2));
+            }
+
+        });
+
+        for (String nc : NCs) {
+            scheduleCadndiates.add(nc);
+        }
+        /**
+         * schedule no-local file reads
+         */
+        for (int i = 0; i < splits.length; i++) {
+            /** if there is no data-local NC choice, choose a random one */
+            if (!scheduled[i]) {
+                String selectedNcName = scheduleCadndiates.remove();
+                if (selectedNcName != null) {
+                    int ncIndex = ncNameToIndex.get(selectedNcName);
+                    workloads[ncIndex]++;
+                    scheduled[i] = true;
+                    locations[i] = selectedNcName;
+                    locationToNumOfAssignement.put(selectedNcName, workloads[ncIndex]);
+                    scheduleCadndiates.add(selectedNcName);
+                }
+            }
+        }
+    }
+
+    /**
+     * Schedule data-local slots to each machine.
+     *
+     * @param splits
+     *            The HDFS file splits.
+     * @param workloads
+     *            The current capacity of each machine.
+     * @param locations
+     *            The result schedule.
+     * @param slots
+     *            The maximum slots of each machine.
+     * @param random
+     *            The random generator.
+     * @param scheduled
+     *            Indicate which slot is scheduled.
+     * @throws IOException
+     * @throws UnknownHostException
+     */
+    private void scheduleLocalSlots(InputSplit[] splits, int[] workloads, String[] locations, int slots, Random random,
+            boolean[] scheduled, final Map<String, IntWritable> locationToNumSplits,
+            final HashMap<String, Integer> locationToNumOfAssignement) throws IOException, UnknownHostException {
+        /** scheduling candidates will be ordered inversely according to their popularity */
+        PriorityQueue<String> scheduleCadndiates = new PriorityQueue<String>(3, new Comparator<String>() {
+            @Override
+            public int compare(String s1, String s2) {
+                int assignmentDifference = locationToNumOfAssignement.get(s1)
+                        .compareTo(locationToNumOfAssignement.get(s2));
+                if (assignmentDifference != 0) {
+                    return assignmentDifference;
+                }
+                return locationToNumSplits.get(s1).compareTo(locationToNumSplits.get(s2));
+            }
+
+        });
+
+        for (int i = 0; i < splits.length; i++) {
+            if (scheduled[i]) {
+                continue;
+            }
+            /**
+             * get the location of all the splits
+             */
+            String[] locs = splits[i].getLocations();
+            if (locs.length > 0) {
+                scheduleCadndiates.clear();
+                for (int j = 0; j < locs.length; j++) {
+                    scheduleCadndiates.add(locs[j]);
+                }
+
+                for (String candidate : scheduleCadndiates) {
+                    /**
+                     * get all the IP addresses from the name
+                     */
+                    InetAddress[] allIps = InetAddress.getAllByName(candidate);
+                    /**
+                     * iterate overa all ips
+                     */
+                    for (InetAddress ip : allIps) {
+                        /**
+                         * if the node controller exists
+                         */
+                        if (ipToNcMapping.get(ip.getHostAddress()) != null) {
+                            /**
+                             * set the ncs
+                             */
+                            List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
+                            int arrayPos = random.nextInt(dataLocations.size());
+                            String nc = dataLocations.get(arrayPos);
+                            int pos = ncNameToIndex.get(nc);
+                            /**
+                             * check if the node is already full
+                             */
+                            if (workloads[pos] < slots) {
+                                locations[i] = nc;
+                                workloads[pos]++;
+                                scheduled[i] = true;
+                                locationToNumOfAssignement.put(candidate,
+                                        locationToNumOfAssignement.get(candidate) + 1);
+                                break;
+                            }
+                        }
+                    }
+                    /**
+                     * break the loop for data-locations if the schedule has
+                     * already been found
+                     */
+                    if (scheduled[i] == true) {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Scan the splits once and build a popularity map
+     *
+     * @param splits
+     *            the split array
+     * @param locationToNumOfSplits
+     *            the map to be built
+     * @throws IOException
+     */
+    private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits)
+            throws IOException {
+        for (InputSplit split : splits) {
+            String[] locations = split.getLocations();
+            for (String loc : locations) {
+                IntWritable locCount = locationToNumOfSplits.get(loc);
+                if (locCount == null) {
+                    locCount = new IntWritable(0);
+                    locationToNumOfSplits.put(loc, locCount);
+                }
+                locCount.set(locCount.get() + 1);
+            }
+        }
+    }
+
+    /**
+     * Load the IP-address-to-NC map from the NCNameToNCInfoMap
+     *
+     * @param ncNameToNcInfos
+     * @throws HyracksException
+     */
+    private void loadIPAddressToNCMap(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
+        try {
+            NCs = new String[ncNameToNcInfos.size()];
+            ipToNcMapping.clear();
+            ncNameToIndex.clear();
+            int i = 0;
+
+            /**
+             * build the IP address to NC map
+             */
+            for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
+                String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().lookupIpAddress())
+                        .getHostAddress();
+                List<String> matchedNCs = ipToNcMapping.get(ipAddr);
+                if (matchedNCs == null) {
+                    matchedNCs = new ArrayList<String>();
+                    ipToNcMapping.put(ipAddr, matchedNCs);
+                }
+                matchedNCs.add(entry.getKey());
+                NCs[i] = entry.getKey();
+                i++;
+            }
+
+            /**
+             * set up the NC name to index mapping
+             */
+            for (i = 0; i < NCs.length; i++) {
+                ncNameToIndex.put(NCs[i], i);
+            }
+        } catch (Exception e) {
+            throw new HyracksException(e);
+        }
+    }
+}



[08/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/DataGenerator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/DataGenerator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/DataGenerator.java
new file mode 100644
index 0000000..cbf488c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/DataGenerator.java
@@ -0,0 +1,1188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.nio.CharBuffer;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.asterix.external.util.Datatypes;
+
+public class DataGenerator {
+
+    private RandomDateGenerator randDateGen;
+    private RandomNameGenerator randNameGen;
+    private RandomMessageGenerator randMessageGen;
+    private RandomLocationGenerator randLocationGen;
+    private Random random = new Random();
+    private TwitterUser twUser = new TwitterUser();
+    private TweetMessage twMessage = new TweetMessage();
+    private static final String DEFAULT_COUNTRY = "US";
+
+    public DataGenerator(InitializationInfo info) {
+        initialize(info);
+    }
+
+    public class TweetMessageIterator implements Iterator<TweetMessage> {
+
+        private final int duration;
+        private long startTime = 0;
+        private int tweetId;
+
+        public TweetMessageIterator(int duration) {
+            this.duration = duration;
+            this.startTime = System.currentTimeMillis();
+        }
+
+        @Override
+        public boolean hasNext() {
+            if (duration == TweetGenerator.INFINITY) {
+                return true;
+            }
+            return System.currentTimeMillis() - startTime <= duration * 1000;
+        }
+
+        @Override
+        public TweetMessage next() {
+            tweetId++;
+            TweetMessage msg = null;
+            getTwitterUser(null);
+            Message message = randMessageGen.getNextRandomMessage();
+            Point location = randLocationGen.getRandomPoint();
+            DateTime sendTime = randDateGen.getNextRandomDatetime();
+            twMessage.reset(tweetId, twUser, location.getLatitude(), location.getLongitude(), sendTime.toString(),
+                    message, DEFAULT_COUNTRY);
+            msg = twMessage;
+            return msg;
+        }
+
+        @Override
+        public void remove() {
+            // TODO Auto-generated method stub
+
+        }
+
+    }
+
+    public static class InitializationInfo {
+        public Date startDate = new Date(1, 1, 2005);
+        public Date endDate = new Date(8, 20, 2012);
+        public String[] lastNames = DataGenerator.lastNames;
+        public String[] firstNames = DataGenerator.firstNames;
+        public String[] vendors = DataGenerator.vendors;
+        public String[] jargon = DataGenerator.jargon;
+        public String[] org_list = DataGenerator.org_list;
+    }
+
+    public void initialize(InitializationInfo info) {
+        randDateGen = new RandomDateGenerator(info.startDate, info.endDate);
+        randNameGen = new RandomNameGenerator(info.firstNames, info.lastNames);
+        randLocationGen = new RandomLocationGenerator(24, 49, 66, 98);
+        randMessageGen = new RandomMessageGenerator(info.vendors, info.jargon);
+    }
+
+    public void getTwitterUser(String usernameSuffix) {
+        String suggestedName = randNameGen.getRandomName();
+        String[] nameComponents = suggestedName.split(" ");
+        String screenName = nameComponents[0] + nameComponents[1] + randNameGen.getRandomNameSuffix();
+        String name = suggestedName;
+        if (usernameSuffix != null) {
+            name = name + usernameSuffix;
+        }
+        int numFriends = random.nextInt((int) (100)); // draw from Zipfian
+        int statusesCount = random.nextInt(500); // draw from Zipfian
+        int followersCount = random.nextInt((int) (200));
+        twUser.reset(screenName, numFriends, statusesCount, name, followersCount);
+    }
+
+    public static class RandomDateGenerator {
+
+        private final Date startDate;
+        private final Date endDate;
+        private final Random random = new Random();
+        private final int yearDifference;
+        private Date workingDate;
+        private Date recentDate;
+        private DateTime dateTime;
+
+        public RandomDateGenerator(Date startDate, Date endDate) {
+            this.startDate = startDate;
+            this.endDate = endDate;
+            this.yearDifference = endDate.getYear() - startDate.getYear() + 1;
+            this.workingDate = new Date();
+            this.recentDate = new Date();
+            this.dateTime = new DateTime();
+        }
+
+        public Date getStartDate() {
+            return startDate;
+        }
+
+        public Date getEndDate() {
+            return endDate;
+        }
+
+        public Date getNextRandomDate() {
+            int year = random.nextInt(yearDifference) + startDate.getYear();
+            int month;
+            int day;
+            if (year == endDate.getYear()) {
+                month = random.nextInt(endDate.getMonth()) + 1;
+                if (month == endDate.getMonth()) {
+                    day = random.nextInt(endDate.getDay()) + 1;
+                } else {
+                    day = random.nextInt(28) + 1;
+                }
+            } else {
+                month = random.nextInt(12) + 1;
+                day = random.nextInt(28) + 1;
+            }
+            workingDate.reset(month, day, year);
+            return workingDate;
+        }
+
+        public DateTime getNextRandomDatetime() {
+            Date randomDate = getNextRandomDate();
+            dateTime.reset(randomDate);
+            return dateTime;
+        }
+
+        public Date getNextRecentDate(Date date) {
+            int year = date.getYear()
+                    + (date.getYear() == endDate.getYear() ? 0 : random.nextInt(endDate.getYear() - date.getYear()));
+            int month = (year == endDate.getYear()) ? date.getMonth() == endDate.getMonth() ? (endDate.getMonth())
+                    : (date.getMonth() + random.nextInt(endDate.getMonth() - date.getMonth())) : random.nextInt(12) + 1;
+
+            int day = (year == endDate.getYear()) ? month == endDate.getMonth() ? date.getDay() == endDate.getDay() ? endDate
+                    .getDay() : date.getDay() + random.nextInt(endDate.getDay() - date.getDay())
+                    : random.nextInt(28) + 1
+                    : random.nextInt(28) + 1;
+            recentDate.reset(month, day, year);
+            return recentDate;
+        }
+
+    }
+
+    public static class DateTime extends Date {
+
+        private String hour = "10";
+        private String min = "10";
+        private String sec = "00";
+
+        public DateTime(int month, int day, int year, String hour, String min, String sec) {
+            super(month, day, year);
+            this.hour = hour;
+            this.min = min;
+            this.sec = sec;
+        }
+
+        public DateTime() {
+        }
+
+        public void reset(int month, int day, int year, String hour, String min, String sec) {
+            super.setDay(month);
+            super.setDay(day);
+            super.setYear(year);
+            this.hour = hour;
+            this.min = min;
+            this.sec = sec;
+        }
+
+        public DateTime(Date date) {
+            super(date.getMonth(), date.getDay(), date.getYear());
+        }
+
+        public void reset(Date date) {
+            reset(date.getMonth(), date.getDay(), date.getYear());
+        }
+
+        public DateTime(Date date, int hour, int min, int sec) {
+            super(date.getMonth(), date.getDay(), date.getYear());
+            this.hour = (hour < 10) ? "0" : "" + hour;
+            this.min = (min < 10) ? "0" : "" + min;
+            this.sec = (sec < 10) ? "0" : "" + sec;
+        }
+
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            builder.append("\"");
+            builder.append(super.getYear());
+            builder.append("-");
+            builder.append(super.getMonth() < 10 ? "0" + super.getMonth() : super.getMonth());
+            builder.append("-");
+            builder.append(super.getDay() < 10 ? "0" + super.getDay() : super.getDay());
+            builder.append("T");
+            builder.append(hour + ":" + min + ":" + sec);
+            builder.append("\"");
+            return builder.toString();
+        }
+    }
+
+    public static class Message {
+
+        private char[] message = new char[500];
+        private List<String> referredTopics;
+        private int length;
+
+        public Message(char[] m, List<String> referredTopics) {
+            System.arraycopy(m, 0, message, 0, m.length);
+            length = m.length;
+            this.referredTopics = referredTopics;
+        }
+
+        public Message() {
+            referredTopics = new ArrayList<String>();
+            length = 0;
+        }
+
+        public List<String> getReferredTopics() {
+            return referredTopics;
+        }
+
+        public void reset(char[] m, int offset, int length, List<String> referredTopics) {
+            System.arraycopy(m, offset, message, 0, length);
+            this.length = length;
+            this.referredTopics = referredTopics;
+        }
+
+        public int getLength() {
+            return length;
+        }
+
+        public char charAt(int index) {
+            return message[index];
+        }
+
+    }
+
+    public static class Point {
+
+        private float latitude;
+        private float longitude;
+
+        public float getLatitude() {
+            return latitude;
+        }
+
+        public float getLongitude() {
+            return longitude;
+        }
+
+        public Point(float latitude, float longitude) {
+            this.latitude = latitude;
+            this.longitude = longitude;
+        }
+
+        public void reset(float latitude, float longitude) {
+            this.latitude = latitude;
+            this.longitude = longitude;
+        }
+
+        public Point() {
+        }
+
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            builder.append("point(\"" + latitude + "," + longitude + "\")");
+            return builder.toString();
+        }
+    }
+
+    public static class RandomNameGenerator {
+
+        private String[] firstNames;
+        private String[] lastNames;
+
+        private final Random random = new Random();
+
+        private final String[] connectors = new String[] { "_", "#", "$", "@" };
+
+        public RandomNameGenerator(String[] firstNames, String[] lastNames) {
+            this.firstNames = firstNames;
+            this.lastNames = lastNames;
+        }
+
+        public String getRandomName() {
+            String name;
+            name = getSuggestedName();
+            return name;
+
+        }
+
+        private String getSuggestedName() {
+            int firstNameIndex = random.nextInt(firstNames.length);
+            int lastNameIndex = random.nextInt(lastNames.length);
+            String suggestedName = firstNames[firstNameIndex] + " " + lastNames[lastNameIndex];
+            return suggestedName;
+        }
+
+        public String getRandomNameSuffix() {
+            return connectors[random.nextInt(connectors.length)] + random.nextInt(1000);
+        }
+    }
+
+    public static class RandomMessageGenerator {
+
+        private final MessageTemplate messageTemplate;
+
+        public RandomMessageGenerator(String[] vendors, String[] jargon) {
+            List<String> vendorList = new ArrayList<String>();
+            for (String v : vendors) {
+                vendorList.add(v);
+            }
+            List<String> jargonList = new ArrayList<String>();
+            for (String j : jargon) {
+                jargonList.add(j);
+            }
+            this.messageTemplate = new MessageTemplate(vendorList, jargonList);
+        }
+
+        public Message getNextRandomMessage() {
+            return messageTemplate.getNextMessage();
+        }
+    }
+
+    public static class AbstractMessageTemplate {
+
+        protected final Random random = new Random();
+
+        protected String[] positiveVerbs = new String[] { "like", "love" };
+        protected String[] negativeVerbs = new String[] { "dislike", "hate", "can't stand" };
+
+        protected String[] negativeAdjectives = new String[] { "horrible", "bad", "terrible", "OMG" };
+        protected String[] postiveAdjectives = new String[] { "good", "awesome", "amazing", "mind-blowing" };
+
+        protected String[] otherWords = new String[] { "the", "its" };
+    }
+
+    public static class MessageTemplate extends AbstractMessageTemplate {
+
+        private List<String> vendors;
+        private List<String> jargon;
+        private CharBuffer buffer;
+        private List<String> referredTopics;
+        private Message message = new Message();
+
+        public MessageTemplate(List<String> vendors, List<String> jargon) {
+            this.vendors = vendors;
+            this.jargon = jargon;
+            buffer = CharBuffer.allocate(2500);
+            referredTopics = new ArrayList<String>();
+        }
+
+        public Message getNextMessage() {
+            buffer.position(0);
+            buffer.limit(2500);
+            referredTopics.clear();
+            boolean isPositive = random.nextBoolean();
+            String[] verbArray = isPositive ? positiveVerbs : negativeVerbs;
+            String[] adjectiveArray = isPositive ? postiveAdjectives : negativeAdjectives;
+            String verb = verbArray[random.nextInt(verbArray.length)];
+            String adjective = adjectiveArray[random.nextInt(adjectiveArray.length)];
+
+            buffer.put(" ");
+            buffer.put(verb);
+            buffer.put(" ");
+            String vendor = vendors.get(random.nextInt(vendors.size()));
+            referredTopics.add(vendor);
+            buffer.append(vendor);
+            buffer.append(" ");
+            buffer.append(otherWords[random.nextInt(otherWords.length)]);
+            buffer.append(" ");
+            String jargonTerm = jargon.get(random.nextInt(jargon.size()));
+            referredTopics.add(jargonTerm);
+            buffer.append(jargonTerm);
+            buffer.append(" is ");
+            buffer.append(adjective);
+            if (random.nextBoolean()) {
+                buffer.append(isPositive ? ":)" : ":(");
+            }
+
+            buffer.flip();
+            message.reset(buffer.array(), 0, buffer.limit(), referredTopics);
+            return message;
+        }
+    }
+
+    public static class RandomUtil {
+
+        public static Random random = new Random();
+
+        public static int[] getKFromN(int k, int n) {
+            int[] result = new int[k];
+            int cnt = 0;
+            HashSet<Integer> values = new HashSet<Integer>();
+            while (cnt < k) {
+                int val = random.nextInt(n + 1);
+                if (values.contains(val)) {
+                    continue;
+                }
+
+                result[cnt++] = val;
+                values.add(val);
+            }
+            return result;
+        }
+    }
+
+    public static class RandomLocationGenerator {
+
+        private Random random = new Random();
+
+        private final int beginLat;
+        private final int endLat;
+        private final int beginLong;
+        private final int endLong;
+
+        private Point point;
+
+        public RandomLocationGenerator(int beginLat, int endLat, int beginLong, int endLong) {
+            this.beginLat = beginLat;
+            this.endLat = endLat;
+            this.beginLong = beginLong;
+            this.endLong = endLong;
+            this.point = new Point();
+        }
+
+        public Point getRandomPoint() {
+            int latMajor = beginLat + random.nextInt(endLat - beginLat);
+            int latMinor = random.nextInt(100);
+            float latitude = latMajor + ((float) latMinor) / 100;
+
+            int longMajor = beginLong + random.nextInt(endLong - beginLong);
+            int longMinor = random.nextInt(100);
+            float longitude = longMajor + ((float) longMinor) / 100;
+
+            point.reset(latitude, longitude);
+            return point;
+        }
+
+    }
+
+    public static class TweetMessage {
+
+        private static final String[] DEFAULT_FIELDS = new String[] { TweetFields.TWEETID, TweetFields.USER,
+                TweetFields.LATITUDE, TweetFields.LONGITUDE, TweetFields.MESSAGE_TEXT, TweetFields.CREATED_AT,
+                TweetFields.COUNTRY };
+
+        private int id;
+        private TwitterUser user;
+        private double latitude;
+        private double longitude;
+        private String created_at;
+        private Message messageText;
+        private String country;
+
+        public static final class TweetFields {
+            public static final String TWEETID = "id";
+            public static final String USER = "user";
+            public static final String LATITUDE = "latitude";
+            public static final String LONGITUDE = "longitude";
+            public static final String MESSAGE_TEXT = "message_text";
+            public static final String CREATED_AT = "created_at";
+            public static final String COUNTRY = "country";
+
+        }
+
+        public TweetMessage() {
+        }
+
+        public TweetMessage(int tweetid, TwitterUser user, double latitude, double longitude, String created_at,
+                Message messageText, String country) {
+            this.id = tweetid;
+            this.user = user;
+            this.latitude = latitude;
+            this.longitude = longitude;
+            this.created_at = created_at;
+            this.messageText = messageText;
+            this.country = country;
+        }
+
+        public void reset(int tweetid, TwitterUser user, double latitude, double longitude, String created_at,
+                Message messageText, String country) {
+            this.id = tweetid;
+            this.user = user;
+            this.latitude = latitude;
+            this.longitude = longitude;
+            this.created_at = created_at;
+            this.messageText = messageText;
+            this.country = country;
+        }
+
+        public String getAdmEquivalent(String[] fields) {
+            if (fields == null) {
+                fields = DEFAULT_FIELDS;
+            }
+            StringBuilder builder = new StringBuilder();
+            builder.append("{");
+            for (String field : fields) {
+                switch (field) {
+                    case Datatypes.Tweet.ID:
+                        appendFieldName(builder, Datatypes.Tweet.ID);
+                        builder.append("int64(\"" + id + "\")");
+                        break;
+                    case Datatypes.Tweet.USER:
+                        appendFieldName(builder, Datatypes.Tweet.USER);
+                        builder.append(user);
+                        break;
+                    case Datatypes.Tweet.LATITUDE:
+                        appendFieldName(builder, Datatypes.Tweet.LATITUDE);
+                        builder.append(latitude);
+                        break;
+                    case Datatypes.Tweet.LONGITUDE:
+                        appendFieldName(builder, Datatypes.Tweet.LONGITUDE);
+                        builder.append(longitude);
+                        break;
+                    case Datatypes.Tweet.MESSAGE:
+                        appendFieldName(builder, Datatypes.Tweet.MESSAGE);
+                        builder.append("\"");
+                        for (int i = 0; i < messageText.getLength(); i++) {
+                            builder.append(messageText.charAt(i));
+                        }
+                        builder.append("\"");
+                        break;
+                    case Datatypes.Tweet.CREATED_AT:
+                        appendFieldName(builder, Datatypes.Tweet.CREATED_AT);
+                        builder.append(created_at);
+                        break;
+                    case Datatypes.Tweet.COUNTRY:
+                        appendFieldName(builder, Datatypes.Tweet.COUNTRY);
+                        builder.append("\"" + country + "\"");
+                        break;
+                }
+                builder.append(",");
+            }
+            builder.deleteCharAt(builder.length() - 1);
+            builder.append("}");
+            return builder.toString();
+        }
+
+        private void appendFieldName(StringBuilder builder, String fieldName) {
+            builder.append("\"" + fieldName + "\":");
+        }
+
+        public int getTweetid() {
+            return id;
+        }
+
+        public void setTweetid(int tweetid) {
+            this.id = tweetid;
+        }
+
+        public TwitterUser getUser() {
+            return user;
+        }
+
+        public void setUser(TwitterUser user) {
+            this.user = user;
+        }
+
+        public double getLatitude() {
+            return latitude;
+        }
+
+        public String getSendTime() {
+            return created_at;
+        }
+
+        public Message getMessageText() {
+            return messageText;
+        }
+
+        public void setMessageText(Message messageText) {
+            this.messageText = messageText;
+        }
+
+        public String getCountry() {
+            return country;
+        }
+
+    }
+
+    public static class TwitterUser {
+
+        private String screenName;
+        private String lang = "en";
+        private int friendsCount;
+        private int statusesCount;
+        private String name;
+        private int followersCount;
+
+        public TwitterUser() {
+
+        }
+
+        public TwitterUser(String screenName, int friendsCount, int statusesCount, String name, int followersCount) {
+            this.screenName = screenName;
+            this.friendsCount = friendsCount;
+            this.statusesCount = statusesCount;
+            this.name = name;
+            this.followersCount = followersCount;
+        }
+
+        public void reset(String screenName, int friendsCount, int statusesCount, String name, int followersCount) {
+            this.screenName = screenName;
+            this.friendsCount = friendsCount;
+            this.statusesCount = statusesCount;
+            this.name = name;
+            this.followersCount = followersCount;
+        }
+
+        public String getScreenName() {
+            return screenName;
+        }
+
+        public int getFriendsCount() {
+            return friendsCount;
+        }
+
+        public int getStatusesCount() {
+            return statusesCount;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public int getFollowersCount() {
+            return followersCount;
+        }
+
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            builder.append("{");
+            builder.append("\"screen_name\":" + "\"" + screenName + "\"");
+            builder.append(",");
+            builder.append("\"language\":" + "\"" + lang + "\"");
+            builder.append(",");
+            builder.append("\"friends_count\":" + friendsCount);
+            builder.append(",");
+            builder.append("\"status_count\":" + statusesCount);
+            builder.append(",");
+            builder.append("\"name\":" + "\"" + name + "\"");
+            builder.append(",");
+            builder.append("\"followers_count\":" + followersCount);
+            builder.append("}");
+            return builder.toString();
+        }
+
+    }
+
+    public static class Date {
+
+        private int day;
+        private int month;
+        private int year;
+
+        public Date(int month, int day, int year) {
+            this.month = month;
+            this.day = day;
+            this.year = year;
+        }
+
+        public void reset(int month, int day, int year) {
+            this.month = month;
+            this.day = day;
+            this.year = year;
+        }
+
+        public int getDay() {
+            return day;
+        }
+
+        public int getMonth() {
+            return month;
+        }
+
+        public int getYear() {
+            return year;
+        }
+
+        public Date() {
+        }
+
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            builder.append("date");
+            builder.append("(\"");
+            builder.append(year);
+            builder.append("-");
+            builder.append(month < 10 ? "0" + month : "" + month);
+            builder.append("-");
+            builder.append(day < 10 ? "0" + day : "" + day);
+            builder.append("\")");
+            return builder.toString();
+        }
+
+        public void setDay(int day) {
+            this.day = day;
+        }
+
+        public void setMonth(int month) {
+            this.month = month;
+        }
+
+        public void setYear(int year) {
+            this.year = year;
+        }
+    }
+
+    public static String[] lastNames = { "Hoopengarner", "Harrow", "Gardner", "Blyant", "Best", "Buttermore", "Gronko",
+            "Mayers", "Countryman", "Neely", "Ruhl", "Taggart", "Bash", "Cason", "Hil", "Zalack", "Mingle", "Carr",
+            "Rohtin", "Wardle", "Pullman", "Wire", "Kellogg", "Hiles", "Keppel", "Bratton", "Sutton", "Wickes",
+            "Muller", "Friedline", "Llora", "Elizabeth", "Anderson", "Gaskins", "Rifler", "Vinsant", "Stanfield",
+            "Black", "Guest", "Hujsak", "Carter", "Weidemann", "Hays", "Patton", "Hayhurst", "Paynter", "Cressman",
+            "Fiddler", "Evans", "Sherlock", "Woodworth", "Jackson", "Bloise", "Schneider", "Ring", "Kepplinger",
+            "James", "Moon", "Bennett", "Bashline", "Ryals", "Zeal", "Christman", "Milliron", "Nash", "Ewing", "Camp",
+            "Mason", "Richardson", "Bowchiew", "Hahn", "Wilson", "Wood", "Toyley", "Williamson", "Lafortune", "Errett",
+            "Saltser", "Hirleman", "Brindle", "Newbiggin", "Ulery", "Lambert", "Shick", "Kuster", "Moore", "Finck",
+            "Powell", "Jolce", "Townsend", "Sauter", "Cowher", "Wolfe", "Cavalet", "Porter", "Laborde", "Ballou",
+            "Murray", "Stoddard", "Pycroft", "Milne", "King", "Todd", "Staymates", "Hall", "Romanoff", "Keilbach",
+            "Sandford", "Hamilton", "Fye", "Kline", "Weeks", "Mcelroy", "Mccullough", "Bryant", "Hill", "Moore",
+            "Ledgerwood", "Prevatt", "Eckert", "Read", "Hastings", "Doverspike", "Allshouse", "Bryan", "Mccallum",
+            "Lombardi", "Mckendrick", "Cattley", "Barkley", "Steiner", "Finlay", "Priebe", "Armitage", "Hall", "Elder",
+            "Erskine", "Hatcher", "Walker", "Pearsall", "Dunkle", "Haile", "Adams", "Miller", "Newbern", "Basinger",
+            "Fuhrer", "Brinigh", "Mench", "Blackburn", "Bastion", "Mccune", "Bridger", "Hynes", "Quinn", "Courtney",
+            "Geddinge", "Field", "Seelig", "Cable", "Earhart", "Harshman", "Roby", "Beals", "Berry", "Reed", "Hector",
+            "Pittman", "Haverrman", "Kalp", "Briner", "Joghs", "Cowart", "Close", "Wynne", "Harden", "Weldy",
+            "Stephenson", "Hildyard", "Moberly", "Wells", "Mackendoerfer", "Fisher", "Oppie", "Oneal", "Churchill",
+            "Keister", "Alice", "Tavoularis", "Fisher", "Hair", "Burns", "Veith", "Wile", "Fuller", "Fields", "Clark",
+            "Randolph", "Stone", "Mcclymonds", "Holtzer", "Donkin", "Wilkinson", "Rosensteel", "Albright", "Stahl",
+            "Fox", "Kadel", "Houser", "Hanseu", "Henderson", "Davis", "Bicknell", "Swain", "Mercer", "Holdeman",
+            "Enderly", "Caesar", "Margaret", "Munshower", "Elless", "Lucy", "Feufer", "Schofield", "Graham",
+            "Blatenberger", "Benford", "Akers", "Campbell", "Ann", "Sadley", "Ling", "Gongaware", "Schmidt", "Endsley",
+            "Groah", "Flanders", "Reichard", "Lowstetter", "Sandblom", "Griffis", "Basmanoff", "Coveney", "Hawker",
+            "Archibald", "Hutton", "Barnes", "Diegel", "Raybould", "Focell", "Breitenstein", "Murray", "Chauvin",
+            "Busk", "Pheleps", "Teagarden", "Northey", "Baumgartner", "Fleming", "Harris", "Parkinson", "Carpenter",
+            "Whirlow", "Bonner", "Wortman", "Rogers", "Scott", "Lowe", "Mckee", "Huston", "Bullard", "Throckmorton",
+            "Rummel", "Mathews", "Dull", "Saline", "Tue", "Woolery", "Lalty", "Schrader", "Ramsey", "Eisenmann",
+            "Philbrick", "Sybilla", "Wallace", "Fonblanque", "Paul", "Orbell", "Higgens", "Casteel", "Franks",
+            "Demuth", "Eisenman", "Hay", "Robinson", "Fischer", "Hincken", "Wylie", "Leichter", "Bousum",
+            "Littlefield", "Mcdonald", "Greif", "Rhodes", "Wall", "Steele", "Baldwin", "Smith", "Stewart", "Schere",
+            "Mary", "Aultman", "Emrick", "Guess", "Mitchell", "Painter", "Aft", "Hasely", "Weldi", "Loewentsein",
+            "Poorbaugh", "Kepple", "Noton", "Judge", "Jackson", "Style", "Adcock", "Diller", "Marriman", "Johnston",
+            "Children", "Monahan", "Ehret", "Shaw", "Congdon", "Pinney", "Millard", "Crissman", "Tanner", "Rose",
+            "Knisely", "Cypret", "Sommer", "Poehl", "Hardie", "Bender", "Overholt", "Gottwine", "Beach", "Leslie",
+            "Trevithick", "Langston", "Magor", "Shotts", "Howe", "Hunter", "Cross", "Kistler", "Dealtry", "Christner",
+            "Pennington", "Thorley", "Eckhardstein", "Van", "Stroh", "Stough", "Stall", "Beedell", "Shea", "Garland",
+            "Mays", "Pritchard", "Frankenberger", "Rowley", "Lane", "Baum", "Alliman", "Park", "Jardine", "Butler",
+            "Cherry", "Kooser", "Baxter", "Billimek", "Downing", "Hurst", "Wood", "Baird", "Watkins", "Edwards",
+            "Kemerer", "Harding", "Owens", "Eiford", "Keener", "Garneis", "Fiscina", "Mang", "Draudy", "Mills",
+            "Gibson", "Reese", "Todd", "Ramos", "Levett", "Wilks", "Ward", "Mosser", "Dunlap", "Kifer", "Christopher",
+            "Ashbaugh", "Wynter", "Rawls", "Cribbs", "Haynes", "Thigpen", "Schreckengost", "Bishop", "Linton",
+            "Chapman", "James", "Jerome", "Hook", "Omara", "Houston", "Maclagan", "Sandys", "Pickering", "Blois",
+            "Dickson", "Kemble", "Duncan", "Woodward", "Southern", "Henley", "Treeby", "Cram", "Elsas", "Driggers",
+            "Warrick", "Overstreet", "Hindman", "Buck", "Sulyard", "Wentzel", "Swink", "Butt", "Schaeffer",
+            "Hoffhants", "Bould", "Willcox", "Lotherington", "Bagley", "Graff", "White", "Wheeler", "Sloan",
+            "Rodacker", "Hanford", "Jowers", "Kunkle", "Cass", "Powers", "Gilman", "Mcmichaels", "Hobbs", "Herndon",
+            "Prescott", "Smail", "Mcdonald", "Biery", "Orner", "Richards", "Mueller", "Isaman", "Bruxner", "Goodman",
+            "Barth", "Turzanski", "Vorrasi", "Stainforth", "Nehling", "Rahl", "Erschoff", "Greene", "Mckinnon",
+            "Reade", "Smith", "Pery", "Roose", "Greenwood", "Weisgarber", "Curry", "Holts", "Zadovsky", "Parrish",
+            "Putnam", "Munson", "Mcindoe", "Nickolson", "Brooks", "Bollinger", "Stroble", "Siegrist", "Fulton",
+            "Tomey", "Zoucks", "Roberts", "Otis", "Clarke", "Easter", "Johnson", "Fylbrigg", "Taylor", "Swartzbaugh",
+            "Weinstein", "Gadow", "Sayre", "Marcotte", "Wise", "Atweeke", "Mcfall", "Napier", "Eisenhart", "Canham",
+            "Sealis", "Baughman", "Gertraht", "Losey", "Laurence", "Eva", "Pershing", "Kern", "Pirl", "Rega",
+            "Sanborn", "Kanaga", "Sanders", "Anderson", "Dickinson", "Osteen", "Gettemy", "Crom", "Snyder", "Reed",
+            "Laurenzi", "Riggle", "Tillson", "Fowler", "Raub", "Jenner", "Koepple", "Soames", "Goldvogel", "Dimsdale",
+            "Zimmer", "Giesen", "Baker", "Beail", "Mortland", "Bard", "Sanner", "Knopsnider", "Jenkins", "Bailey",
+            "Werner", "Barrett", "Faust", "Agg", "Tomlinson", "Williams", "Little", "Greenawalt", "Wells", "Wilkins",
+            "Gisiko", "Bauerle", "Harrold", "Prechtl", "Polson", "Faast", "Winton", "Garneys", "Peters", "Potter",
+            "Porter", "Tennant", "Eve", "Dugger", "Jones", "Burch", "Cowper", "Whittier" };
+
+    public static String[] firstNames = { "Albert", "Jacquelin", "Dona", "Alia", "Mayme", "Genoveva", "Emma", "Lena",
+            "Melody", "Vilma", "Katelyn", "Jeremy", "Coral", "Leann", "Lita", "Gilda", "Kayla", "Alvina", "Maranda",
+            "Verlie", "Khadijah", "Karey", "Patrice", "Kallie", "Corey", "Mollie", "Daisy", "Melanie", "Sarita",
+            "Nichole", "Pricilla", "Terresa", "Berneice", "Arianne", "Brianne", "Lavinia", "Ulrike", "Lesha", "Adell",
+            "Ardelle", "Marisha", "Laquita", "Karyl", "Maryjane", "Kendall", "Isobel", "Raeann", "Heike", "Barbera",
+            "Norman", "Yasmine", "Nevada", "Mariam", "Edith", "Eugena", "Lovie", "Maren", "Bennie", "Lennie", "Tamera",
+            "Crystal", "Randi", "Anamaria", "Chantal", "Jesenia", "Avis", "Shela", "Randy", "Laurena", "Sharron",
+            "Christiane", "Lorie", "Mario", "Elizabeth", "Reina", "Adria", "Lakisha", "Brittni", "Azzie", "Dori",
+            "Shaneka", "Asuncion", "Katheryn", "Laurice", "Sharita", "Krystal", "Reva", "Inger", "Alpha", "Makeda",
+            "Anabel", "Loni", "Tiara", "Meda", "Latashia", "Leola", "Chin", "Daisey", "Ivory", "Amalia", "Logan",
+            "Tyler", "Kyong", "Carolann", "Maryetta", "Eufemia", "Anya", "Doreatha", "Lorna", "Rutha", "Ehtel",
+            "Debbie", "Chassidy", "Sang", "Christa", "Lottie", "Chun", "Karine", "Peggie", "Amina", "Melany", "Alayna",
+            "Scott", "Romana", "Naomi", "Christiana", "Salena", "Taunya", "Mitsue", "Regina", "Chelsie", "Charity",
+            "Dacia", "Aletha", "Latosha", "Lia", "Tamica", "Chery", "Bianca", "Shu", "Georgianne", "Myriam", "Austin",
+            "Wan", "Mallory", "Jana", "Georgie", "Jenell", "Kori", "Vicki", "Delfina", "June", "Mellisa", "Catherina",
+            "Claudie", "Tynisha", "Dayle", "Enriqueta", "Belen", "Pia", "Sarai", "Rosy", "Renay", "Kacie", "Frieda",
+            "Cayla", "Elissa", "Claribel", "Sabina", "Mackenzie", "Raina", "Cira", "Mitzie", "Aubrey", "Serafina",
+            "Maria", "Katharine", "Esperanza", "Sung", "Daria", "Billye", "Stefanie", "Kasha", "Holly", "Suzanne",
+            "Inga", "Flora", "Andria", "Genevie", "Eladia", "Janet", "Erline", "Renna", "Georgeanna", "Delorse",
+            "Elnora", "Rudy", "Rima", "Leanora", "Letisha", "Love", "Alverta", "Pinkie", "Domonique", "Jeannie",
+            "Jose", "Jacqueline", "Tara", "Lily", "Erna", "Tennille", "Galina", "Tamala", "Kirby", "Nichelle",
+            "Myesha", "Farah", "Santa", "Ludie", "Kenia", "Yee", "Micheline", "Maryann", "Elaina", "Ethelyn",
+            "Emmaline", "Shanell", "Marina", "Nila", "Alane", "Shakira", "Dorris", "Belinda", "Elois", "Barbie",
+            "Carita", "Gisela", "Lura", "Fransisca", "Helga", "Peg", "Leonarda", "Earlie", "Deetta", "Jacquetta",
+            "Blossom", "Kayleigh", "Deloras", "Keshia", "Christinia", "Dulce", "Bernie", "Sheba", "Lashanda", "Tula",
+            "Claretta", "Kary", "Jeanette", "Lupita", "Lenora", "Hisako", "Sherise", "Glynda", "Adela", "Chia",
+            "Sudie", "Mindy", "Caroyln", "Lindsey", "Xiomara", "Mercedes", "Onie", "Loan", "Alexis", "Tommie",
+            "Donette", "Monica", "Soo", "Camellia", "Lavera", "Valery", "Ariana", "Sophia", "Loris", "Ginette",
+            "Marielle", "Tari", "Julissa", "Alesia", "Suzanna", "Emelda", "Erin", "Ladawn", "Sherilyn", "Candice",
+            "Nereida", "Fairy", "Carl", "Joel", "Marilee", "Gracia", "Cordie", "So", "Shanita", "Drew", "Cassie",
+            "Sherie", "Marget", "Norma", "Delois", "Debera", "Chanelle", "Catarina", "Aracely", "Carlene", "Tricia",
+            "Aleen", "Katharina", "Marguerita", "Guadalupe", "Margorie", "Mandie", "Kathe", "Chong", "Sage", "Faith",
+            "Maryrose", "Stephany", "Ivy", "Pauline", "Susie", "Cristen", "Jenifer", "Annette", "Debi", "Karmen",
+            "Luci", "Shayla", "Hope", "Ocie", "Sharie", "Tami", "Breana", "Kerry", "Rubye", "Lashay", "Sondra",
+            "Katrice", "Brunilda", "Cortney", "Yan", "Zenobia", "Penni", "Addie", "Lavona", "Noel", "Anika",
+            "Herlinda", "Valencia", "Bunny", "Tory", "Victoria", "Carrie", "Mikaela", "Wilhelmina", "Chung",
+            "Hortencia", "Gerda", "Wen", "Ilana", "Sibyl", "Candida", "Victorina", "Chantell", "Casie", "Emeline",
+            "Dominica", "Cecila", "Delora", "Miesha", "Nova", "Sally", "Ronald", "Charlette", "Francisca", "Mina",
+            "Jenna", "Loraine", "Felisa", "Lulu", "Page", "Lyda", "Babara", "Flor", "Walter", "Chan", "Sherika",
+            "Kala", "Luna", "Vada", "Syreeta", "Slyvia", "Karin", "Renata", "Robbi", "Glenda", "Delsie", "Lizzie",
+            "Genia", "Caitlin", "Bebe", "Cory", "Sam", "Leslee", "Elva", "Caren", "Kasie", "Leticia", "Shannan",
+            "Vickey", "Sandie", "Kyle", "Chang", "Terrilyn", "Sandra", "Elida", "Marketta", "Elsy", "Tu", "Carman",
+            "Ashlie", "Vernia", "Albertine", "Vivian", "Elba", "Bong", "Margy", "Janetta", "Xiao", "Teofila", "Danyel",
+            "Nickole", "Aleisha", "Tera", "Cleotilde", "Dara", "Paulita", "Isela", "Maricela", "Rozella", "Marivel",
+            "Aurora", "Melissa", "Carylon", "Delinda", "Marvella", "Candelaria", "Deidre", "Tawanna", "Myrtie",
+            "Milagro", "Emilie", "Coretta", "Ivette", "Suzann", "Ammie", "Lucina", "Lory", "Tena", "Eleanor",
+            "Cherlyn", "Tiana", "Brianna", "Myra", "Flo", "Carisa", "Kandi", "Erlinda", "Jacqulyn", "Fermina", "Riva",
+            "Palmira", "Lindsay", "Annmarie", "Tamiko", "Carline", "Amelia", "Quiana", "Lashawna", "Veola", "Belva",
+            "Marsha", "Verlene", "Alex", "Leisha", "Camila", "Mirtha", "Melva", "Lina", "Arla", "Cythia", "Towanda",
+            "Aracelis", "Tasia", "Aurore", "Trinity", "Bernadine", "Farrah", "Deneen", "Ines", "Betty", "Lorretta",
+            "Dorethea", "Hertha", "Rochelle", "Juli", "Shenika", "Yung", "Lavon", "Deeanna", "Nakia", "Lynnette",
+            "Dinorah", "Nery", "Elene", "Carolee", "Mira", "Franchesca", "Lavonda", "Leida", "Paulette", "Dorine",
+            "Allegra", "Keva", "Jeffrey", "Bernardina", "Maryln", "Yoko", "Faviola", "Jayne", "Lucilla", "Charita",
+            "Ewa", "Ella", "Maggie", "Ivey", "Bettie", "Jerri", "Marni", "Bibi", "Sabrina", "Sarah", "Marleen",
+            "Katherin", "Remona", "Jamika", "Antonina", "Oliva", "Lajuana", "Fonda", "Sigrid", "Yael", "Billi",
+            "Verona", "Arminda", "Mirna", "Tesha", "Katheleen", "Bonita", "Kamilah", "Patrica", "Julio", "Shaina",
+            "Mellie", "Denyse", "Deandrea", "Alena", "Meg", "Kizzie", "Krissy", "Karly", "Alleen", "Yahaira", "Lucie",
+            "Karena", "Elaine", "Eloise", "Buena", "Marianela", "Renee", "Nan", "Carolynn", "Windy", "Avril", "Jane",
+            "Vida", "Thea", "Marvel", "Rosaline", "Tifany", "Robena", "Azucena", "Carlota", "Mindi", "Andera", "Jenny",
+            "Courtney", "Lyndsey", "Willette", "Kristie", "Shaniqua", "Tabatha", "Ngoc", "Una", "Marlena", "Louetta",
+            "Vernie", "Brandy", "Jacquelyne", "Jenelle", "Elna", "Erminia", "Ida", "Audie", "Louis", "Marisol",
+            "Shawana", "Harriette", "Karol", "Kitty", "Esmeralda", "Vivienne", "Eloisa", "Iris", "Jeanice", "Cammie",
+            "Jacinda", "Shena", "Floy", "Theda", "Lourdes", "Jayna", "Marg", "Kati", "Tanna", "Rosalyn", "Maxima",
+            "Soon", "Angelika", "Shonna", "Merle", "Kassandra", "Deedee", "Heidi", "Marti", "Renae", "Arleen",
+            "Alfredia", "Jewell", "Carley", "Pennie", "Corina", "Tonisha", "Natividad", "Lilliana", "Darcie", "Shawna",
+            "Angel", "Piedad", "Josefa", "Rebbeca", "Natacha", "Nenita", "Petrina", "Carmon", "Chasidy", "Temika",
+            "Dennise", "Renetta", "Augusta", "Shirlee", "Valeri", "Casimira", "Janay", "Berniece", "Deborah", "Yaeko",
+            "Mimi", "Digna", "Irish", "Cher", "Yong", "Lucila", "Jimmie", "Junko", "Lezlie", "Waneta", "Sandee",
+            "Marquita", "Eura", "Freeda", "Annabell", "Laree", "Jaye", "Wendy", "Toshia", "Kylee", "Aleta", "Emiko",
+            "Clorinda", "Sixta", "Audrea", "Juanita", "Birdie", "Reita", "Latanya", "Nia", "Leora", "Laurine",
+            "Krysten", "Jerrie", "Chantel", "Ira", "Sena", "Andre", "Jann", "Marla", "Precious", "Katy", "Gabrielle",
+            "Yvette", "Brook", "Shirlene", "Eldora", "Laura", "Milda", "Euna", "Jettie", "Debora", "Lise", "Edythe",
+            "Leandra", "Shandi", "Araceli", "Johanne", "Nieves", "Denese", "Carmelita", "Nohemi", "Annice", "Natalie",
+            "Yolande", "Jeffie", "Vashti", "Vickie", "Obdulia", "Youlanda", "Lupe", "Tomoko", "Monserrate", "Domitila",
+            "Etsuko", "Adrienne", "Lakesha", "Melissia", "Odessa", "Meagan", "Veronika", "Jolyn", "Isabelle", "Leah",
+            "Rhiannon", "Gianna", "Audra", "Sommer", "Renate", "Perla", "Thao", "Myong", "Lavette", "Mark", "Emilia",
+            "Ariane", "Karl", "Dorie", "Jacquie", "Mia", "Malka", "Shenita", "Tashina", "Christine", "Cherri", "Roni",
+            "Fran", "Mildred", "Sara", "Clarissa", "Fredia", "Elease", "Samuel", "Earlene", "Vernita", "Mae", "Concha",
+            "Renea", "Tamekia", "Hye", "Ingeborg", "Tessa", "Kelly", "Kristin", "Tam", "Sacha", "Kanisha", "Jillian",
+            "Tiffanie", "Ashlee", "Madelyn", "Donya", "Clementine", "Mickie", "My", "Zena", "Terrie", "Samatha",
+            "Gertie", "Tarra", "Natalia", "Sharlene", "Evie", "Shalon", "Rosalee", "Numbers", "Jodi", "Hattie",
+            "Naoma", "Valene", "Whitley", "Claude", "Alline", "Jeanne", "Camie", "Maragret", "Viola", "Kris", "Marlo",
+            "Arcelia", "Shari", "Jalisa", "Corrie", "Eleonor", "Angelyn", "Merry", "Lauren", "Melita", "Gita",
+            "Elenor", "Aurelia", "Janae", "Lyndia", "Margeret", "Shawanda", "Rolande", "Shirl", "Madeleine", "Celinda",
+            "Jaleesa", "Shemika", "Joye", "Tisa", "Trudie", "Kathrine", "Clarita", "Dinah", "Georgia", "Antoinette",
+            "Janis", "Suzette", "Sherri", "Herta", "Arie", "Hedy", "Cassi", "Audrie", "Caryl", "Jazmine", "Jessica",
+            "Beverly", "Elizbeth", "Marylee", "Londa", "Fredericka", "Argelia", "Nana", "Donnette", "Damaris",
+            "Hailey", "Jamee", "Kathlene", "Glayds", "Lydia", "Apryl", "Verla", "Adam", "Concepcion", "Zelda",
+            "Shonta", "Vernice", "Detra", "Meghann", "Sherley", "Sheri", "Kiyoko", "Margarita", "Adaline", "Mariela",
+            "Velda", "Ailene", "Juliane", "Aiko", "Edyth", "Cecelia", "Shavon", "Florance", "Madeline", "Rheba",
+            "Deann", "Ignacia", "Odelia", "Heide", "Mica", "Jennette", "Maricruz", "Ouida", "Darcy", "Laure",
+            "Justina", "Amada", "Laine", "Cruz", "Sunny", "Francene", "Roxanna", "Nam", "Nancie", "Deanna", "Letty",
+            "Britni", "Kazuko", "Lacresha", "Simon", "Caleb", "Milton", "Colton", "Travis", "Miles", "Jonathan",
+            "Logan", "Rolf", "Emilio", "Roberto", "Marcus", "Tim", "Delmar", "Devon", "Kurt", "Edward", "Jeffrey",
+            "Elvis", "Alfonso", "Blair", "Wm", "Sheldon", "Leonel", "Michal", "Federico", "Jacques", "Leslie",
+            "Augustine", "Hugh", "Brant", "Hong", "Sal", "Modesto", "Curtis", "Jefferey", "Adam", "John", "Glenn",
+            "Vance", "Alejandro", "Refugio", "Lucio", "Demarcus", "Chang", "Huey", "Neville", "Preston", "Bert",
+            "Abram", "Foster", "Jamison", "Kirby", "Erich", "Manual", "Dustin", "Derrick", "Donnie", "Jospeh", "Chris",
+            "Josue", "Stevie", "Russ", "Stanley", "Nicolas", "Samuel", "Waldo", "Jake", "Max", "Ernest", "Reinaldo",
+            "Rene", "Gale", "Morris", "Nathan", "Maximo", "Courtney", "Theodore", "Octavio", "Otha", "Delmer",
+            "Graham", "Dean", "Lowell", "Myles", "Colby", "Boyd", "Adolph", "Jarrod", "Nick", "Mark", "Clinton", "Kim",
+            "Sonny", "Dalton", "Tyler", "Jody", "Orville", "Luther", "Rubin", "Hollis", "Rashad", "Barton", "Vicente",
+            "Ted", "Rick", "Carmine", "Clifton", "Gayle", "Christopher", "Jessie", "Bradley", "Clay", "Theo", "Josh",
+            "Mitchell", "Boyce", "Chung", "Eugenio", "August", "Norbert", "Sammie", "Jerry", "Adan", "Edmundo",
+            "Homer", "Hilton", "Tod", "Kirk", "Emmett", "Milan", "Quincy", "Jewell", "Herb", "Steve", "Carmen",
+            "Bobby", "Odis", "Daron", "Jeremy", "Carl", "Hunter", "Tuan", "Thurman", "Asa", "Brenton", "Shane",
+            "Donny", "Andreas", "Teddy", "Dario", "Cyril", "Hoyt", "Teodoro", "Vincenzo", "Hilario", "Daren",
+            "Agustin", "Marquis", "Ezekiel", "Brendan", "Johnson", "Alden", "Richie", "Granville", "Chad", "Joseph",
+            "Lamont", "Jordon", "Gilberto", "Chong", "Rosendo", "Eddy", "Rob", "Dewitt", "Andre", "Titus", "Russell",
+            "Rigoberto", "Dick", "Garland", "Gabriel", "Hank", "Darius", "Ignacio", "Lazaro", "Johnie", "Mauro",
+            "Edmund", "Trent", "Harris", "Osvaldo", "Marvin", "Judson", "Rodney", "Randall", "Renato", "Richard",
+            "Denny", "Jon", "Doyle", "Cristopher", "Wilson", "Christian", "Jamie", "Roland", "Ken", "Tad", "Romeo",
+            "Seth", "Quinton", "Byron", "Ruben", "Darrel", "Deandre", "Broderick", "Harold", "Ty", "Monroe", "Landon",
+            "Mohammed", "Angel", "Arlen", "Elias", "Andres", "Carlton", "Numbers", "Tony", "Thaddeus", "Issac",
+            "Elmer", "Antoine", "Ned", "Fermin", "Grover", "Benito", "Abdul", "Cortez", "Eric", "Maxwell", "Coy",
+            "Gavin", "Rich", "Andy", "Del", "Giovanni", "Major", "Efren", "Horacio", "Joaquin", "Charles", "Noah",
+            "Deon", "Pasquale", "Reed", "Fausto", "Jermaine", "Irvin", "Ray", "Tobias", "Carter", "Yong", "Jorge",
+            "Brent", "Daniel", "Zane", "Walker", "Thad", "Shaun", "Jaime", "Mckinley", "Bradford", "Nathanial",
+            "Jerald", "Aubrey", "Virgil", "Abel", "Philip", "Chester", "Chadwick", "Dominick", "Britt", "Emmitt",
+            "Ferdinand", "Julian", "Reid", "Santos", "Dwain", "Morgan", "James", "Marion", "Micheal", "Eddie", "Brett",
+            "Stacy", "Kerry", "Dale", "Nicholas", "Darrick", "Freeman", "Scott", "Newton", "Sherman", "Felton",
+            "Cedrick", "Winfred", "Brad", "Fredric", "Dewayne", "Virgilio", "Reggie", "Edgar", "Heriberto", "Shad",
+            "Timmy", "Javier", "Nestor", "Royal", "Lynn", "Irwin", "Ismael", "Jonas", "Wiley", "Austin", "Kieth",
+            "Gonzalo", "Paris", "Earnest", "Arron", "Jarred", "Todd", "Erik", "Maria", "Chauncey", "Neil", "Conrad",
+            "Maurice", "Roosevelt", "Jacob", "Sydney", "Lee", "Basil", "Louis", "Rodolfo", "Rodger", "Roman", "Corey",
+            "Ambrose", "Cristobal", "Sylvester", "Benton", "Franklin", "Marcelo", "Guillermo", "Toby", "Jeramy",
+            "Donn", "Danny", "Dwight", "Clifford", "Valentine", "Matt", "Jules", "Kareem", "Ronny", "Lonny", "Son",
+            "Leopoldo", "Dannie", "Gregg", "Dillon", "Orlando", "Weston", "Kermit", "Damian", "Abraham", "Walton",
+            "Adrian", "Rudolf", "Will", "Les", "Norberto", "Fred", "Tyrone", "Ariel", "Terry", "Emmanuel", "Anderson",
+            "Elton", "Otis", "Derek", "Frankie", "Gino", "Lavern", "Jarod", "Kenny", "Dane", "Keenan", "Bryant",
+            "Eusebio", "Dorian", "Ali", "Lucas", "Wilford", "Jeremiah", "Warner", "Woodrow", "Galen", "Bob",
+            "Johnathon", "Amado", "Michel", "Harry", "Zachery", "Taylor", "Booker", "Hershel", "Mohammad", "Darrell",
+            "Kyle", "Stuart", "Marlin", "Hyman", "Jeffery", "Sidney", "Merrill", "Roy", "Garrett", "Porter", "Kenton",
+            "Giuseppe", "Terrance", "Trey", "Felix", "Buster", "Von", "Jackie", "Linwood", "Darron", "Francisco",
+            "Bernie", "Diego", "Brendon", "Cody", "Marco", "Ahmed", "Antonio", "Vince", "Brooks", "Kendrick", "Ross",
+            "Mohamed", "Jim", "Benny", "Gerald", "Pablo", "Charlie", "Antony", "Werner", "Hipolito", "Minh", "Mel",
+            "Derick", "Armand", "Fidel", "Lewis", "Donnell", "Desmond", "Vaughn", "Guadalupe", "Keneth", "Rodrick",
+            "Spencer", "Chas", "Gus", "Harlan", "Wes", "Carmelo", "Jefferson", "Gerard", "Jarvis", "Haywood", "Hayden",
+            "Sergio", "Gene", "Edgardo", "Colin", "Horace", "Dominic", "Aldo", "Adolfo", "Juan", "Man", "Lenard",
+            "Clement", "Everett", "Hal", "Bryon", "Mason", "Emerson", "Earle", "Laurence", "Columbus", "Lamar",
+            "Douglas", "Ian", "Fredrick", "Marc", "Loren", "Wallace", "Randell", "Noble", "Ricardo", "Rory", "Lindsey",
+            "Boris", "Bill", "Carlos", "Domingo", "Grant", "Craig", "Ezra", "Matthew", "Van", "Rudy", "Danial",
+            "Brock", "Maynard", "Vincent", "Cole", "Damion", "Ellsworth", "Marcel", "Markus", "Rueben", "Tanner",
+            "Reyes", "Hung", "Kennith", "Lindsay", "Howard", "Ralph", "Jed", "Monte", "Garfield", "Avery", "Bernardo",
+            "Malcolm", "Sterling", "Ezequiel", "Kristofer", "Luciano", "Casey", "Rosario", "Ellis", "Quintin",
+            "Trevor", "Miquel", "Jordan", "Arthur", "Carson", "Tyron", "Grady", "Walter", "Jonathon", "Ricky",
+            "Bennie", "Terrence", "Dion", "Dusty", "Roderick", "Isaac", "Rodrigo", "Harrison", "Zack", "Dee", "Devin",
+            "Rey", "Ulysses", "Clint", "Greg", "Dino", "Frances", "Wade", "Franklyn", "Jude", "Bradly", "Salvador",
+            "Rocky", "Weldon", "Lloyd", "Milford", "Clarence", "Alec", "Allan", "Bobbie", "Oswaldo", "Wilfred",
+            "Raleigh", "Shelby", "Willy", "Alphonso", "Arnoldo", "Robbie", "Truman", "Nicky", "Quinn", "Damien",
+            "Lacy", "Marcos", "Parker", "Burt", "Carroll", "Denver", "Buck", "Dong", "Normand", "Billie", "Edwin",
+            "Troy", "Arden", "Rusty", "Tommy", "Kenneth", "Leo", "Claud", "Joel", "Kendall", "Dante", "Milo", "Cruz",
+            "Lucien", "Ramon", "Jarrett", "Scottie", "Deshawn", "Ronnie", "Pete", "Alonzo", "Whitney", "Stefan",
+            "Sebastian", "Edmond", "Enrique", "Branden", "Leonard", "Loyd", "Olin", "Ron", "Rhett", "Frederic",
+            "Orval", "Tyrell", "Gail", "Eli", "Antonia", "Malcom", "Sandy", "Stacey", "Nickolas", "Hosea", "Santo",
+            "Oscar", "Fletcher", "Dave", "Patrick", "Dewey", "Bo", "Vito", "Blaine", "Randy", "Robin", "Winston",
+            "Sammy", "Edwardo", "Manuel", "Valentin", "Stanford", "Filiberto", "Buddy", "Zachariah", "Johnnie",
+            "Elbert", "Paul", "Isreal", "Jerrold", "Leif", "Owen", "Sung", "Junior", "Raphael", "Josef", "Donte",
+            "Allen", "Florencio", "Raymond", "Lauren", "Collin", "Eliseo", "Bruno", "Martin", "Lyndon", "Kurtis",
+            "Salvatore", "Erwin", "Michael", "Sean", "Davis", "Alberto", "King", "Rolland", "Joe", "Tory", "Chase",
+            "Dallas", "Vernon", "Beau", "Terrell", "Reynaldo", "Monty", "Jame", "Dirk", "Florentino", "Reuben", "Saul",
+            "Emory", "Esteban", "Michale", "Claudio", "Jacinto", "Kelley", "Levi", "Andrea", "Lanny", "Wendell",
+            "Elwood", "Joan", "Felipe", "Palmer", "Elmo", "Lawrence", "Hubert", "Rudolph", "Duane", "Cordell",
+            "Everette", "Mack", "Alan", "Efrain", "Trenton", "Bryan", "Tom", "Wilmer", "Clyde", "Chance", "Lou",
+            "Brain", "Justin", "Phil", "Jerrod", "George", "Kris", "Cyrus", "Emery", "Rickey", "Lincoln", "Renaldo",
+            "Mathew", "Luke", "Dwayne", "Alexis", "Jackson", "Gil", "Marty", "Burton", "Emil", "Glen", "Willian",
+            "Clemente", "Keven", "Barney", "Odell", "Reginald", "Aurelio", "Damon", "Ward", "Gustavo", "Harley",
+            "Peter", "Anibal", "Arlie", "Nigel", "Oren", "Zachary", "Scot", "Bud", "Wilbert", "Bart", "Josiah",
+            "Marlon", "Eldon", "Darryl", "Roger", "Anthony", "Omer", "Francis", "Patricia", "Moises", "Chuck",
+            "Waylon", "Hector", "Jamaal", "Cesar", "Julius", "Rex", "Norris", "Ollie", "Isaias", "Quentin", "Graig",
+            "Lyle", "Jeffry", "Karl", "Lester", "Danilo", "Mike", "Dylan", "Carlo", "Ryan", "Leon", "Percy", "Lucius",
+            "Jamel", "Lesley", "Joey", "Cornelius", "Rico", "Arnulfo", "Chet", "Margarito", "Ernie", "Nathanael",
+            "Amos", "Cleveland", "Luigi", "Alfonzo", "Phillip", "Clair", "Elroy", "Alva", "Hans", "Shon", "Gary",
+            "Jesus", "Cary", "Silas", "Keith", "Israel", "Willard", "Randolph", "Dan", "Adalberto", "Claude",
+            "Delbert", "Garry", "Mary", "Larry", "Riley", "Robt", "Darwin", "Barrett", "Steven", "Kelly", "Herschel",
+            "Darnell", "Scotty", "Armando", "Miguel", "Lawerence", "Wesley", "Garth", "Carol", "Micah", "Alvin",
+            "Billy", "Earl", "Pat", "Brady", "Cory", "Carey", "Bernard", "Jayson", "Nathaniel", "Gaylord", "Archie",
+            "Dorsey", "Erasmo", "Angelo", "Elisha", "Long", "Augustus", "Hobert", "Drew", "Stan", "Sherwood",
+            "Lorenzo", "Forrest", "Shawn", "Leigh", "Hiram", "Leonardo", "Gerry", "Myron", "Hugo", "Alvaro", "Leland",
+            "Genaro", "Jamey", "Stewart", "Elden", "Irving", "Olen", "Antone", "Freddy", "Lupe", "Joshua", "Gregory",
+            "Andrew", "Sang", "Wilbur", "Gerardo", "Merlin", "Williams", "Johnny", "Alex", "Tommie", "Jimmy",
+            "Donovan", "Dexter", "Gaston", "Tracy", "Jeff", "Stephen", "Berry", "Anton", "Darell", "Fritz", "Willis",
+            "Noel", "Mariano", "Crawford", "Zoey", "Alex", "Brianna", "Carlie", "Lloyd", "Cal", "Astor", "Randolf",
+            "Magdalene", "Trevelyan", "Terance", "Roy", "Kermit", "Harriett", "Crystal", "Laurinda", "Kiersten",
+            "Phyllida", "Liz", "Bettie", "Rena", "Colten", "Berenice", "Sindy", "Wilma", "Amos", "Candi", "Ritchie",
+            "Dirk", "Kathlyn", "Callista", "Anona", "Flossie", "Sterling", "Calista", "Regan", "Erica", "Jeana",
+            "Keaton", "York", "Nolan", "Daniel", "Benton", "Tommie", "Serenity", "Deanna", "Chas", "Heron", "Marlyn",
+            "Xylia", "Tristin", "Lyndon", "Andriana", "Madelaine", "Maddison", "Leila", "Chantelle", "Audrey",
+            "Connor", "Daley", "Tracee", "Tilda", "Eliot", "Merle", "Linwood", "Kathryn", "Silas", "Alvina",
+            "Phinehas", "Janis", "Alvena", "Zubin", "Gwendolen", "Caitlyn", "Bertram", "Hailee", "Idelle", "Homer",
+            "Jannah", "Delbert", "Rhianna", "Cy", "Jefferson", "Wayland", "Nona", "Tempest", "Reed", "Jenifer",
+            "Ellery", "Nicolina", "Aldous", "Prince", "Lexia", "Vinnie", "Doug", "Alberic", "Kayleen", "Woody",
+            "Rosanne", "Ysabel", "Skyler", "Twyla", "Geordie", "Leta", "Clive", "Aaron", "Scottie", "Celeste", "Chuck",
+            "Erle", "Lallie", "Jaycob", "Ray", "Carrie", "Laurita", "Noreen", "Meaghan", "Ulysses", "Andy", "Drogo",
+            "Dina", "Yasmin", "Mya", "Luvenia", "Urban", "Jacob", "Laetitia", "Sherry", "Love", "Michaela", "Deonne",
+            "Summer", "Brendon", "Sheena", "Mason", "Jayson", "Linden", "Salal", "Darrell", "Diana", "Hudson",
+            "Lennon", "Isador", "Charley", "April", "Ralph", "James", "Mina", "Jolyon", "Laurine", "Monna", "Carita",
+            "Munro", "Elsdon", "Everette", "Radclyffe", "Darrin", "Herbert", "Gawain", "Sheree", "Trudy", "Emmaline",
+            "Kassandra", "Rebecca", "Basil", "Jen", "Don", "Osborne", "Lilith", "Hannah", "Fox", "Rupert", "Paulene",
+            "Darius", "Wally", "Baptist", "Sapphire", "Tia", "Sondra", "Kylee", "Ashton", "Jepson", "Joetta", "Val",
+            "Adela", "Zacharias", "Zola", "Marmaduke", "Shannah", "Posie", "Oralie", "Brittany", "Ernesta", "Raymund",
+            "Denzil", "Daren", "Roosevelt", "Nelson", "Fortune", "Mariel", "Nick", "Jaden", "Upton", "Oz", "Margaux",
+            "Precious", "Albert", "Bridger", "Jimmy", "Nicola", "Rosalynne", "Keith", "Walt", "Della", "Joanna",
+            "Xenia", "Esmeralda", "Major", "Simon", "Rexana", "Stacy", "Calanthe", "Sherley", "Kaitlyn", "Graham",
+            "Ramsey", "Abbey", "Madlyn", "Kelvin", "Bill", "Rue", "Monica", "Caileigh", "Laraine", "Booker", "Jayna",
+            "Greta", "Jervis", "Sherman", "Kendrick", "Tommy", "Iris", "Geffrey", "Kaelea", "Kerr", "Garrick", "Jep",
+            "Audley", "Nic", "Bronte", "Beulah", "Patricia", "Jewell", "Deidra", "Cory", "Everett", "Harper",
+            "Charity", "Godfrey", "Jaime", "Sinclair", "Talbot", "Dayna", "Cooper", "Rosaline", "Jennie", "Eileen",
+            "Latanya", "Corinna", "Roxie", "Caesar", "Charles", "Pollie", "Lindsey", "Sorrel", "Dwight", "Jocelyn",
+            "Weston", "Shyla", "Valorie", "Bessie", "Josh", "Lessie", "Dayton", "Kathi", "Chasity", "Wilton", "Adam",
+            "William", "Ash", "Angela", "Ivor", "Ria", "Jazmine", "Hailey", "Jo", "Silvestra", "Ernie", "Clifford",
+            "Levi", "Matilda", "Quincey", "Camilla", "Delicia", "Phemie", "Laurena", "Bambi", "Lourdes", "Royston",
+            "Chastity", "Lynwood", "Elle", "Brenda", "Phoebe", "Timothy", "Raschelle", "Lilly", "Burt", "Rina",
+            "Rodney", "Maris", "Jaron", "Wilf", "Harlan", "Audra", "Vincent", "Elwyn", "Drew", "Wynter", "Ora",
+            "Lissa", "Virgil", "Xavier", "Chad", "Ollie", "Leyton", "Karolyn", "Skye", "Roni", "Gladys", "Dinah",
+            "Penny", "August", "Osmund", "Whitaker", "Brande", "Cornell", "Phil", "Zara", "Kilie", "Gavin", "Coty",
+            "Randy", "Teri", "Keira", "Pru", "Clemency", "Kelcey", "Nevil", "Poppy", "Gareth", "Christabel", "Bastian",
+            "Wynonna", "Roselyn", "Goddard", "Collin", "Trace", "Neal", "Effie", "Denys", "Virginia", "Richard",
+            "Isiah", "Harrietta", "Gaylord", "Diamond", "Trudi", "Elaine", "Jemmy", "Gage", "Annabel", "Quincy", "Syd",
+            "Marianna", "Philomena", "Aubree", "Kathie", "Jacki", "Kelley", "Bess", "Cecil", "Maryvonne", "Kassidy",
+            "Anselm", "Dona", "Darby", "Jamison", "Daryl", "Darell", "Teal", "Lennie", "Bartholomew", "Katie",
+            "Maybelline", "Kimball", "Elvis", "Les", "Flick", "Harley", "Beth", "Bidelia", "Montague", "Helen", "Ozzy",
+            "Stef", "Debra", "Maxene", "Stefanie", "Russ", "Avril", "Johnathan", "Orson", "Chelsey", "Josephine",
+            "Deshaun", "Wendell", "Lula", "Ferdinanda", "Greg", "Brad", "Kynaston", "Dena", "Russel", "Robertina",
+            "Misti", "Leon", "Anjelica", "Bryana", "Myles", "Judi", "Curtis", "Davin", "Kristia", "Chrysanta",
+            "Hayleigh", "Hector", "Osbert", "Eustace", "Cary", "Tansy", "Cayley", "Maryann", "Alissa", "Ike",
+            "Tranter", "Reina", "Alwilda", "Sidony", "Columbine", "Astra", "Jillie", "Stephania", "Jonah", "Kennedy",
+            "Ferdinand", "Allegria", "Donella", "Kelleigh", "Darian", "Eldreda", "Jayden", "Herbie", "Jake", "Winston",
+            "Vi", "Annie", "Cherice", "Hugo", "Tricia", "Haydee", "Cassarah", "Darden", "Mallory", "Alton", "Hadley",
+            "Romayne", "Lacey", "Ern", "Alayna", "Cecilia", "Seward", "Tilly", "Edgar", "Concordia", "Ibbie", "Dahlia",
+            "Oswin", "Stu", "Brett", "Maralyn", "Kristeen", "Dotty", "Robyn", "Nessa", "Tresha", "Guinevere",
+            "Emerson", "Haze", "Lyn", "Henderson", "Lexa", "Jaylen", "Gail", "Lizette", "Tiara", "Robbie", "Destiny",
+            "Alice", "Livia", "Rosy", "Leah", "Jan", "Zach", "Vita", "Gia", "Micheal", "Rowina", "Alysha", "Bobbi",
+            "Delores", "Osmond", "Karaugh", "Wilbur", "Kasandra", "Renae", "Kaety", "Dora", "Gaye", "Amaryllis",
+            "Katelyn", "Dacre", "Prudence", "Ebony", "Camron", "Jerrold", "Vivyan", "Randall", "Donna", "Misty",
+            "Damon", "Selby", "Esmund", "Rian", "Garry", "Julius", "Raelene", "Clement", "Dom", "Tibby", "Moss",
+            "Millicent", "Gwendoline", "Berry", "Ashleigh", "Lilac", "Quin", "Vere", "Creighton", "Harriet", "Malvina",
+            "Lianne", "Pearle", "Kizzie", "Kara", "Petula", "Jeanie", "Maria", "Pacey", "Victoria", "Huey", "Toni",
+            "Rose", "Wallis", "Diggory", "Josiah", "Delma", "Keysha", "Channing", "Prue", "Lee", "Ryan", "Sidney",
+            "Valerie", "Clancy", "Ezra", "Gilbert", "Clare", "Laz", "Crofton", "Mike", "Annabella", "Tara", "Eldred",
+            "Arthur", "Jaylon", "Peronel", "Paden", "Dot", "Marian", "Amyas", "Alexus", "Esmond", "Abbie", "Stanley",
+            "Brittani", "Vickie", "Errol", "Kimberlee", "Uland", "Ebenezer", "Howie", "Eveline", "Andrea", "Trish",
+            "Hopkin", "Bryanna", "Temperance", "Valarie", "Femie", "Alix", "Terrell", "Lewin", "Lorrin", "Happy",
+            "Micah", "Rachyl", "Sloan", "Gertrude", "Elizabeth", "Dorris", "Andra", "Bram", "Gary", "Jeannine",
+            "Maurene", "Irene", "Yolonda", "Jonty", "Coleen", "Cecelia", "Chantal", "Stuart", "Caris", "Ros",
+            "Kaleigh", "Mirabelle", "Kolby", "Primrose", "Susannah", "Ginny", "Jinny", "Dolly", "Lettice", "Sonny",
+            "Melva", "Ernest", "Garret", "Reagan", "Trenton", "Gallagher", "Edwin", "Nikolas", "Corrie", "Lynette",
+            "Ettie", "Sly", "Debbi", "Eudora", "Brittney", "Tacey", "Marius", "Anima", "Gordon", "Olivia", "Kortney",
+            "Shantel", "Kolleen", "Nevaeh", "Buck", "Sera", "Liliana", "Aric", "Kalyn", "Mick", "Libby", "Ingram",
+            "Alexandria", "Darleen", "Jacklyn", "Hughie", "Tyler", "Aida", "Ronda", "Deemer", "Taryn", "Laureen",
+            "Samantha", "Dave", "Hardy", "Baldric", "Montgomery", "Gus", "Ellis", "Titania", "Luke", "Chase", "Haidee",
+            "Mayra", "Isabell", "Trinity", "Milo", "Abigail", "Tacita", "Meg", "Hervey", "Natasha", "Sadie", "Holden",
+            "Dee", "Mansel", "Perry", "Randi", "Frederica", "Georgina", "Kolour", "Debbie", "Seraphina", "Elspet",
+            "Julyan", "Raven", "Zavia", "Jarvis", "Jaymes", "Grover", "Cairo", "Alea", "Jordon", "Braxton", "Donny",
+            "Rhoda", "Tonya", "Bee", "Alyssia", "Ashlyn", "Reanna", "Lonny", "Arlene", "Deb", "Jane", "Nikole",
+            "Bettina", "Harrison", "Tamzen", "Arielle", "Adelaide", "Faith", "Bridie", "Wilburn", "Fern", "Nan",
+            "Shaw", "Zeke", "Alan", "Dene", "Gina", "Alexa", "Bailey", "Sal", "Tammy", "Maximillian", "America",
+            "Sylvana", "Fitz", "Mo", "Marissa", "Cass", "Eldon", "Wilfrid", "Tel", "Joann", "Kendra", "Tolly",
+            "Leanne", "Ferdie", "Haven", "Lucas", "Marlee", "Cyrilla", "Red", "Phoenix", "Jazmin", "Carin", "Gena",
+            "Lashonda", "Tucker", "Genette", "Kizzy", "Winifred", "Melody", "Keely", "Kaylyn", "Radcliff", "Lettie",
+            "Foster", "Lyndsey", "Nicholas", "Farley", "Louisa", "Dana", "Dortha", "Francine", "Doran", "Bonita",
+            "Hal", "Sawyer", "Reginald", "Aislin", "Nathan", "Baylee", "Abilene", "Ladonna", "Maurine", "Shelly",
+            "Deandre", "Jasmin", "Roderic", "Tiffany", "Amanda", "Verity", "Wilford", "Gayelord", "Whitney", "Demelza",
+            "Kenton", "Alberta", "Kyra", "Tabitha", "Sampson", "Korey", "Lillian", "Edison", "Clayton", "Steph",
+            "Maya", "Dusty", "Jim", "Ronny", "Adrianne", "Bernard", "Harris", "Kiley", "Alexander", "Kisha", "Ethalyn",
+            "Patience", "Briony", "Indigo", "Aureole", "Makenzie", "Molly", "Sherilyn", "Barry", "Laverne", "Hunter",
+            "Rocky", "Tyreek", "Madalyn", "Phyliss", "Chet", "Beatrice", "Faye", "Lavina", "Madelyn", "Tracey",
+            "Gyles", "Patti", "Carlyn", "Stephanie", "Jackalyn", "Larrie", "Kimmy", "Isolda", "Emelina", "Lis",
+            "Zillah", "Cody", "Sheard", "Rufus", "Paget", "Mae", "Rexanne", "Luvinia", "Tamsen", "Rosanna", "Greig",
+            "Stacia", "Mabelle", "Quianna", "Lotus", "Delice", "Bradford", "Angus", "Cosmo", "Earlene", "Adrian",
+            "Arlie", "Noelle", "Sabella", "Isa", "Adelle", "Innocent", "Kirby", "Trixie", "Kenelm", "Nelda", "Melia",
+            "Kendal", "Dorinda", "Placid", "Linette", "Kam", "Sherisse", "Evan", "Ewart", "Janice", "Linton",
+            "Jacaline", "Charissa", "Douglas", "Aileen", "Kemp", "Oli", "Amethyst", "Rosie", "Nigella", "Sherill",
+            "Anderson", "Alanna", "Eric", "Claudia", "Jennifer", "Boniface", "Harriet", "Vernon", "Lucy", "Shawnee",
+            "Gerard", "Cecily", "Romey", "Randall", "Wade", "Lux", "Dawson", "Gregg", "Kade", "Roxanne", "Melinda",
+            "Rolland", "Rowanne", "Fannie", "Isidore", "Melia", "Harvie", "Salal", "Eleonor", "Jacquette", "Lavone",
+            "Shanika", "Tarquin", "Janet", "Josslyn", "Maegan", "Augusta", "Aubree", "Francene", "Martie", "Marisa",
+            "Tyreek", "Tatianna", "Caleb", "Sheridan", "Nellie", "Barbara", "Wat", "Jayla", "Esmaralda", "Graeme",
+            "Lavena", "Jemima", "Nikolas", "Triston", "Portia", "Kyla", "Marcus", "Raeburn", "Jamison", "Earl", "Wren",
+            "Leighton", "Lagina", "Lucasta", "Dina", "Amaranta", "Jessika", "Claud", "Bernard", "Winifred", "Ebba",
+            "Sammi", "Gall", "Chloe", "Ottoline", "Herbert", "Janice", "Gareth", "Channing", "Caleigh", "Kailee",
+            "Ralphie", "Tamzen", "Quincy", "Beaumont", "Albert", "Jadyn", "Violet", "Luanna", "Moriah", "Humbert",
+            "Jed", "Leona", "Hale", "Mitch", "Marlin", "Nivek", "Darwin", "Dirk", "Liliana", "Meadow", "Bernadine",
+            "Jorie", "Peyton", "Astra", "Roscoe", "Gina", "Lovell", "Jewel", "Romayne", "Rosy", "Imogene",
+            "Margaretta", "Lorinda", "Hopkin", "Bobby", "Flossie", "Bennie", "Horatio", "Jonah", "Lyn", "Deana",
+            "Juliana", "Blanch", "Wright", "Kendal", "Woodrow", "Tania", "Austyn", "Val", "Mona", "Charla", "Rudyard",
+            "Pamela", "Raven", "Zena", "Nicola", "Kaelea", "Conor", "Virgil", "Sonnie", "Goodwin", "Christianne",
+            "Linford", "Myron", "Denton", "Charita", "Brody", "Ginnie", "Harrison", "Jeanine", "Quin", "Isolda",
+            "Zoie", "Pearce", "Margie", "Larrie", "Angelina", "Marcia", "Jessamine", "Delilah", "Dick", "Luana",
+            "Delicia", "Lake", "Luvenia", "Vaughan", "Concordia", "Gayelord", "Cheyenne", "Felix", "Dorris", "Pen",
+            "Kristeen", "Parris", "Everitt", "Josephina", "Amy", "Tommie", "Adrian", "April", "Rosaline", "Zachery",
+            "Trace", "Phoebe", "Jenelle", "Kameron", "Katharine", "Media", "Colton", "Tad", "Quianna", "Kerenza",
+            "Greta", "Luvinia", "Pete", "Tonya", "Beckah", "Barbra", "Jon", "Tetty", "Corey", "Sylvana", "Kizzy",
+            "Korey", "Trey", "Haydee", "Penny", "Mandy", "Panda", "Coline", "Ramsey", "Sukie", "Annabel", "Sarina",
+            "Corbin", "Suzanna", "Rob", "Duana", "Shell", "Jason", "Eddy", "Rube", "Roseann", "Celia", "Brianne",
+            "Nerissa", "Jera", "Humphry", "Ashlynn", "Terrence", "Philippina", "Coreen", "Kolour", "Indiana", "Paget",
+            "Marlyn", "Hester", "Isbel", "Ocean", "Harris", "Leslie", "Vere", "Monroe", "Isabelle", "Bertie", "Clitus",
+            "Dave", "Alethea", "Lessie", "Louiza", "Madlyn", "Garland", "Wolf", "Lalo", "Donny", "Amabel", "Tianna",
+            "Louie", "Susie", "Mackenzie", "Renie", "Tess", "Marmaduke", "Gwendolen", "Bettina", "Beatrix", "Esmund",
+            "Minnie", "Carlie", "Barnabas", "Ruthie", "Honour", "Haylie", "Xavior", "Freddie", "Ericka", "Aretha",
+            "Edie", "Madelina", "Anson", "Tabby", "Derrick", "Jocosa", "Deirdre", "Aislin", "Chastity", "Abigail",
+            "Wynonna", "Zo", "Eldon", "Krystine", "Ghislaine", "Zavia", "Nolene", "Marigold", "Kelley", "Sylvester",
+            "Odell", "George", "Laurene", "Franklyn", "Clarice", "Mo", "Dustin", "Debbi", "Lina", "Tony", "Acacia",
+            "Hettie", "Natalee", "Marcie", "Brittany", "Elnora", "Rachel", "Dawn", "Basil", "Christal", "Anjelica",
+            "Fran", "Tawny", "Delroy", "Tameka", "Lillie", "Ceara", "Deanna", "Deshaun", "Ken", "Bradford", "Justina",
+            "Merle", "Draven", "Gretta", "Harriette", "Webster", "Nathaniel", "Anemone", "Coleen", "Ruth", "Chryssa",
+            "Hortensia", "Saffie", "Deonne", "Leopold", "Harlan", "Lea", "Eppie", "Lucinda", "Tilda", "Fanny", "Titty",
+            "Lockie", "Jepson", "Sherisse", "Maralyn", "Ethel", "Sly", "Ebenezer", "Canute", "Ella", "Freeman",
+            "Reuben", "Olivette", "Nona", "Rik", "Amice", "Kristine", "Kathie", "Jayne", "Jeri", "Mckenna", "Bertram",
+            "Kaylee", "Livia", "Gil", "Wallace", "Maryann", "Keeleigh", "Laurinda", "Doran", "Khloe", "Dakota",
+            "Yaron", "Kimberleigh", "Gytha", "Doris", "Marylyn", "Benton", "Linnette", "Esther", "Jakki", "Rowina",
+            "Marian", "Roselyn", "Norbert", "Maggie", "Caesar", "Phinehas", "Jerry", "Jasmine", "Antonette", "Miriam",
+            "Monna", "Maryvonne", "Jacquetta", "Bernetta", "Napier", "Annie", "Gladwin", "Sheldon", "Aric", "Elouise",
+            "Gawain", "Kristia", "Gabe", "Kyra", "Red", "Tod", "Dudley", "Lorraine", "Ryley", "Sabina", "Poppy",
+            "Leland", "Aileen", "Eglantine", "Alicia", "Jeni", "Addy", "Tiffany", "Geffrey", "Lavina", "Collin",
+            "Clover", "Vin", "Jerome", "Doug", "Vincent", "Florence", "Scarlet", "Celeste", "Desdemona", "Tiphanie",
+            "Kassandra", "Ashton", "Madison", "Art", "Magdalene", "Iona", "Josepha", "Anise", "Ferne", "Derek",
+            "Huffie", "Qiana", "Ysabel", "Tami", "Shannah", "Xavier", "Willard", "Winthrop", "Vickie", "Maura",
+            "Placid", "Tiara", "Reggie", "Elissa", "Isa", "Chrysanta", "Jeff", "Bessie", "Terri", "Amilia", "Brett",
+            "Daniella", "Damion", "Carolina", "Maximillian", "Travers", "Benjamin", "Oprah", "Darcy", "Yolanda",
+            "Nicolina", "Crofton", "Jarrett", "Kaitlin", "Shauna", "Keren", "Bevis", "Kalysta", "Sharron", "Alyssa",
+            "Blythe", "Zelma", "Caelie", "Norwood", "Billie", "Patrick", "Gary", "Cambria", "Tylar", "Mason", "Helen",
+            "Melyssa", "Gene", "Gilberta", "Carter", "Herbie", "Harmonie", "Leola", "Eugenia", "Clint", "Pauletta",
+            "Edwyna", "Georgina", "Teal", "Harper", "Izzy", "Dillon", "Kezia", "Evangeline", "Colene", "Madelaine",
+            "Zilla", "Rudy", "Dottie", "Caris", "Morton", "Marge", "Tacey", "Parker", "Troy", "Liza", "Lewin",
+            "Tracie", "Justine", "Dallas", "Linden", "Ray", "Loretta", "Teri", "Elvis", "Diane", "Julianna", "Manfred",
+            "Denise", "Eireen", "Ann", "Kenith", "Linwood", "Kathlyn", "Bernice", "Shelley", "Oswald", "Amedeus",
+            "Homer", "Tanzi", "Ted", "Ralphina", "Hyacinth", "Lotus", "Matthias", "Arlette", "Clark", "Cecil",
+            "Elspeth", "Alvena", "Noah", "Millard", "Brenden", "Cole", "Philipa", "Nina", "Thelma", "Iantha", "Reid",
+            "Jefferson", "Meg", "Elsie", "Shirlee", "Nathan", "Nancy", "Simona", "Racheal", "Carin", "Emory", "Delice",
+            "Kristi", "Karaugh", "Kaety", "Tilly", "Em", "Alanis", "Darrin", "Jerrie", "Hollis", "Cary", "Marly",
+            "Carita", "Jody", "Farley", "Hervey", "Rosalin", "Cuthbert", "Stewart", "Jodene", "Caileigh", "Briscoe",
+            "Dolores", "Sheree", "Eustace", "Nigel", "Detta", "Barret", "Rowland", "Kenny", "Githa", "Zoey", "Adela",
+            "Petronella", "Opal", "Coleman", "Niles", "Cyril", "Dona", "Alberic", "Allannah", "Jules", "Avalon",
+            "Hadley", "Thomas", "Renita", "Calanthe", "Heron", "Shawnda", "Chet", "Malina", "Manny", "Rina", "Frieda",
+            "Eveleen", "Deshawn", "Amos", "Raelene", "Paige", "Molly", "Nannie", "Ileen", "Brendon", "Milford",
+            "Unice", "Rebeccah", "Caedmon", "Gae", "Doreen", "Vivian", "Louis", "Raphael", "Vergil", "Lise", "Glenn",
+            "Karyn", "Terance", "Reina", "Jake", "Gordon", "Wisdom", "Isiah", "Gervase", "Fern", "Marylou", "Roddy",
+            "Justy", "Derick", "Shantelle", "Adam", "Chantel", "Madoline", "Emmerson", "Lexie", "Mickey", "Stephen",
+            "Dane", "Stacee", "Elwin", "Tracey", "Alexandra", "Ricky", "Ian", "Kasey", "Rita", "Alanna", "Georgene",
+            "Deon", "Zavier", "Ophelia", "Deforest", "Lowell", "Zubin", "Hardy", "Osmund", "Tabatha", "Debby",
+            "Katlyn", "Tallulah", "Priscilla", "Braden", "Wil", "Keziah", "Jen", "Aggie", "Korbin", "Lemoine",
+            "Barnaby", "Tranter", "Goldie", "Roderick", "Trina", "Emery", "Pris", "Sidony", "Adelle", "Tate", "Wilf",
+            "Zola", "Brande", "Chris", "Calanthia", "Lilly", "Kaycee", "Lashonda", "Jasmin", "Elijah", "Shantel",
+            "Simon", "Rosalind", "Jarod", "Kaylie", "Corrine", "Joselyn", "Archibald", "Mariabella", "Winton",
+            "Merlin", "Chad", "Ursula", "Kristopher", "Hewie", "Adrianna", "Lyndsay", "Jasmyn", "Tim", "Evette",
+            "Margaret", "Samson", "Bronte", "Terence", "Leila", "Candice", "Tori", "Jamey", "Coriander", "Conrad",
+            "Floyd", "Karen", "Lorin", "Maximilian", "Cairo", "Emily", "Yasmin", "Karolyn", "Bryan", "Lanny",
+            "Kimberly", "Rick", "Chaz", "Krystle", "Lyric", "Laura", "Garrick", "Flip", "Monty", "Brendan",
+            "Ermintrude", "Rayner", "Merla", "Titus", "Marva", "Patricia", "Leone", "Tracy", "Jaqueline", "Hallam",
+            "Delores", "Cressida", "Carlyle", "Leann", "Kelcey", "Laurence", "Ryan", "Reynold", "Mark", "Collyn",
+            "Audie", "Sammy", "Ellery", "Sallie", "Pamelia", "Adolph", "Lydia", "Titania", "Ron", "Bridger", "Aline",
+            "Read", "Kelleigh", "Weldon", "Irving", "Garey", "Diggory", "Evander", "Kylee", "Deidre", "Ormond",
+            "Laurine", "Reannon", "Arline", "Pat"
+
+    };
+
+    public static String[] jargon = { "wireless", "signal", "network", "3G", "plan", "touch-screen",
+            "customer-service", "reachability", "voice-command", "shortcut-menu", "customization", "platform", "speed",
+            "voice-clarity", "voicemail-service" };
+
+    public static String[] vendors = { "at&t", "verizon", "t-mobile", "sprint", "motorola", "samsung", "iphone" };
+
+    public static String[] org_list = { "Latsonity", "ganjalax", "Zuncan", "Lexitechno", "Hot-tech", "subtam",
+            "Coneflex", "Ganjatax", "physcane", "Tranzap", "Qvohouse", "Zununoing", "jaydax", "Keytech", "goldendexon",
+            "Villa-tech", "Trustbam", "Newcom", "Voltlane", "Ontohothex", "Ranhotfan", "Alphadax", "Transhigh",
+            "kin-ron", "Doublezone", "Solophase", "Vivaace", "silfind", "Basecone", "sonstreet", "Freshfix",
+            "Techitechi", "Kanelectrics", "linedexon", "Goldcity", "Newfase", "Technohow", "Zimcone", "Salthex",
+            "U-ron", "Solfix", "whitestreet", "Xx-technology", "Hexviafind", "over-it", "Strongtone", "Tripplelane",
+            "geomedia", "Scotcity", "Inchex", "Vaiatech", "Striptaxon", "Hatcom", "tresline", "Sanjodax", "freshdox",
+            "Sumlane", "Quadlane", "Newphase", "overtech", "Voltbam", "Icerunin", "Fixdintex", "Hexsanhex", "Statcode",
+            "Greencare", "U-electrics", "Zamcorporation", "Ontotanin", "Tanzimcare", "Groovetex", "Ganjastrip",
+            "Redelectronics", "Dandamace", "Whitemedia", "strongex", "Streettax", "highfax", "Mathtech", "Xx-drill",
+            "Sublamdox", "Unijobam", "Rungozoom", "Fixelectrics", "Villa-dox", "Ransaofan", "Plexlane", "itlab",
+            "Lexicone", "Fax-fax", "Viatechi", "Inchdox", "Kongreen", "Doncare", "Y-geohex", "Opeelectronics",
+            "Medflex", "Dancode", "Roundhex", "Labzatron", "Newhotplus", "Sancone", "Ronholdings", "Quoline",
+            "zoomplus", "Fix-touch", "Codetechno", "Tanzumbam", "Indiex", "Canline" };
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapter.java
new file mode 100644
index 0000000..dcf3b51
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapter.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.util.logging.Level;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.dataset.adapter.StreamBasedAdapter;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
+
+public class GenericSocketFeedAdapter extends StreamBasedAdapter {
+
+    private static final long serialVersionUID = 1L;
+
+    private final int port;
+    private SocketFeedServer socketFeedServer;
+
+    public GenericSocketFeedAdapter(ITupleParserFactory parserFactory, ARecordType outputType, int port,
+            IHyracksTaskContext ctx, int partition) throws AsterixException, IOException {
+        super(parserFactory, outputType, ctx, partition);
+        this.port = port;
+        this.socketFeedServer = new SocketFeedServer(outputType, port);
+    }
+
+    @Override
+    public void start(int partition, IFrameWriter writer) throws Exception {
+        super.start(partition, writer);
+    }
+
+    @Override
+    public InputStream getInputStream(int partition) throws IOException {
+        return socketFeedServer.getInputStream();
+    }
+
+    private static class SocketFeedServer {
+        private ServerSocket serverSocket;
+        private InputStream inputStream;
+
+        public SocketFeedServer(ARecordType outputtype, int port) throws IOException, AsterixException {
+            try {
+                serverSocket = new ServerSocket(port);
+            } catch (Exception e) {
+                if (LOGGER.isLoggable(Level.INFO)) {
+                    LOGGER.info("port: " + port + " unusable ");
+                }
+            }
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("Feed server configured to use port: " + port);
+            }
+        }
+
+        public InputStream getInputStream() {
+            Socket socket;
+            try {
+                if (LOGGER.isLoggable(Level.INFO)) {
+                    LOGGER.info("waiting for client at " + serverSocket.getLocalPort());
+                }
+                socket = serverSocket.accept();
+                inputStream = socket.getInputStream();
+            } catch (IOException e) {
+                if (LOGGER.isLoggable(Level.SEVERE)) {
+                    LOGGER.severe("Unable to create input stream required for feed ingestion");
+                }
+            }
+            return inputStream;
+        }
+
+        public void stop() throws IOException {
+            try {
+                serverSocket.close();
+            } catch (IOException ioe) {
+                if (LOGGER.isLoggable(Level.WARNING)) {
+                    LOGGER.warning("Unable to close socket at " + serverSocket.getLocalPort());
+                }
+            }
+        }
+
+    }
+
+    @Override
+    public boolean stop() throws Exception {
+        socketFeedServer.stop();
+        return true;
+    }
+
+    @Override
+    public boolean handleException(Throwable e) {
+        try {
+            this.socketFeedServer = new SocketFeedServer((ARecordType) sourceDatatype, port);
+            return true;
+        } catch (Exception re) {
+            return false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapterFactory.java
new file mode 100644
index 0000000..e19d757
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/GenericSocketFeedAdapterFactory.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.util.AsterixRuntimeUtil;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
+
+/**
+ * Factory class for creating @see{GenericSocketFeedAdapter} The
+ * adapter listens at a port for receiving data (from external world).
+ * Data received is transformed into Asterix Data Format (ADM).
+ */
+public class GenericSocketFeedAdapterFactory implements IAdapterFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    private ARecordType outputType;
+
+    private List<Pair<String, Integer>> sockets;
+
+    private Mode mode = Mode.IP;
+
+    private Map<String, String> configuration;
+
+    private ITupleParserFactory parserFactory;
+
+    public static final String KEY_SOCKETS = "sockets";
+
+    public static final String KEY_MODE = "address-type";
+
+    public static enum Mode {
+        NC,
+        IP
+    }
+
+    @Override
+    public String getAlias() {
+        return "socket_adapter";
+    }
+
+    public List<Pair<String, Integer>> getSockets() {
+        return sockets;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
+        this.configuration = configuration;
+        this.configureSockets(configuration);
+        this.configureFormat(outputType);
+        this.outputType = outputType;
+    }
+
+    private void configureFormat(ARecordType outputType2) {
+        // TODO Auto-generated method stub
+
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        List<String> locations = new ArrayList<String>();
+        for (Pair<String, Integer> socket : sockets) {
+            locations.add(socket.first);
+        }
+        return new AlgebricksAbsolutePartitionConstraint(locations.toArray(new String[] {}));
+    }
+
+    @Override
+    public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
+        Pair<String, Integer> socket = sockets.get(partition);
+        return new GenericSocketFeedAdapter(parserFactory, outputType, socket.second, ctx, partition);
+    }
+
+    private void configureSockets(Map<String, String> configuration) throws Exception {
+        sockets = new ArrayList<Pair<String, Integer>>();
+        String modeValue = configuration.get(KEY_MODE);
+        if (modeValue != null) {
+            mode = Mode.valueOf(modeValue.trim().toUpperCase());
+        }
+        String socketsValue = configuration.get(KEY_SOCKETS);
+        if (socketsValue == null) {
+            throw new IllegalArgumentException("\'sockets\' parameter not specified as part of adapter configuration");
+        }
+        Map<InetAddress, Set<String>> ncMap = AsterixRuntimeUtil.getNodeControllerMap();
+        List<String> ncs = AsterixRuntimeUtil.getAllNodeControllers();
+        String[] socketsArray = socketsValue.split(",");
+        Random random = new Random();
+        for (String socket : socketsArray) {
+            String[] socketTokens = socket.split(":");
+            String host = socketTokens[0].trim();
+            int port = Integer.parseInt(socketTokens[1].trim());
+            Pair<String, Integer> p = null;
+            switch (mode) {
+                case IP:
+                    Set<String> ncsOnIp = ncMap.get(InetAddress.getByName(host));
+                    if (ncsOnIp == null || ncsOnIp.isEmpty()) {
+                        throw new IllegalArgumentException("Invalid host " + host
+                                + " as it is not part of the AsterixDB cluster. Valid choices are "
+                                + StringUtils.join(ncMap.keySet(), ", "));
+                    }
+                    String[] ncArray = ncsOnIp.toArray(new String[] {});
+                    String nc = ncArray[random.nextInt(ncArray.length)];
+                    p = new Pair<String, Integer>(nc, port);
+                    break;
+
+                case NC:
+                    p = new Pair<String, Integer>(host, port);
+                    if (!ncs.contains(host)) {
+                        throw new IllegalArgumentException(
+                                "Invalid NC " + host + " as it is not part of the AsterixDB cluster. Valid choices are "
+                                        + StringUtils.join(ncs, ", "));
+
+                    }
+                    break;
+            }
+            sockets.add(p);
+        }
+    }
+
+    @Override
+    public ARecordType getAdapterOutputType() {
+        return outputType;
+    }
+}
\ No newline at end of file


[20/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/ITupleTrackingFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/ITupleTrackingFeedAdapter.java b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/ITupleTrackingFeedAdapter.java
index 822390a..4067508 100644
--- a/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/ITupleTrackingFeedAdapter.java
+++ b/asterix-common/src/main/java/org/apache/asterix/common/feeds/api/ITupleTrackingFeedAdapter.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.common.feeds.api;
 
-public interface ITupleTrackingFeedAdapter extends IFeedAdapter {
+public interface ITupleTrackingFeedAdapter extends IDataSourceAdapter {
 
     public void tuplePersistedTimeCallback(long timestamp);
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/parse/IAsterixTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/parse/IAsterixTupleParser.java b/asterix-common/src/main/java/org/apache/asterix/common/parse/IAsterixTupleParser.java
deleted file mode 100644
index 87f4c58..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/parse/IAsterixTupleParser.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.parse;
-
-import java.util.Map;
-
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-
-public interface IAsterixTupleParser extends ITupleParser{
-
-    public void configure(Map<String, String> configuration);
-    
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwardPolicy.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwardPolicy.java b/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwardPolicy.java
deleted file mode 100644
index df5a983..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwardPolicy.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.parse;
-
-import java.util.Map;
-
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public interface ITupleForwardPolicy {
-
-    public static final String PARSER_POLICY = "parser-policy";
-    
-    public enum TupleForwardPolicyType {
-        FRAME_FULL,
-        COUNTER_TIMER_EXPIRED,
-        RATE_CONTROLLED
-    }
-
-    public void configure(Map<String, String> configuration);
-
-    public void initialize(IHyracksCommonContext ctx, IFrameWriter frameWriter) throws HyracksDataException;
-
-    public TupleForwardPolicyType getType();
-
-    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException;
-
-    public void close() throws HyracksDataException;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwarder.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwarder.java b/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwarder.java
new file mode 100644
index 0000000..5ee065a
--- /dev/null
+++ b/asterix-common/src/main/java/org/apache/asterix/common/parse/ITupleForwarder.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.common.parse;
+
+import java.util.Map;
+
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksCommonContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public interface ITupleForwarder {
+
+    public static final String FORWARD_POLICY = "forward-policy";
+
+    public enum TupleForwardPolicy {
+        FRAME_FULL,
+        COUNTER_TIMER_EXPIRED,
+        RATE_CONTROLLED
+    }
+
+    public void configure(Map<String, String> configuration);
+
+    public void initialize(IHyracksCommonContext ctx, IFrameWriter frameWriter) throws HyracksDataException;
+
+    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException;
+
+    public void close() throws HyracksDataException;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
index d8147b6..6afe692 100644
--- a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
+++ b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
@@ -29,7 +29,6 @@ import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.net.URL;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -63,12 +62,12 @@ public class TestExecutor {
     private static final long MAX_URL_LENGTH = 2000l;
     private static Method managixExecuteMethod = null;
 
-    private static String host;
-    private static int port;
+    private String host;
+    private int port;
 
     public TestExecutor() {
-        this.host = "127.0.0.1";
-        this.port = 19002;
+        host = "127.0.0.1";
+        port = 19002;
     }
 
     public TestExecutor(String host, int port) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-events/src/main/java/org/apache/asterix/event/service/ZooKeeperService.java
----------------------------------------------------------------------
diff --git a/asterix-events/src/main/java/org/apache/asterix/event/service/ZooKeeperService.java b/asterix-events/src/main/java/org/apache/asterix/event/service/ZooKeeperService.java
index 96fb6ec..a10b5ea 100644
--- a/asterix-events/src/main/java/org/apache/asterix/event/service/ZooKeeperService.java
+++ b/asterix-events/src/main/java/org/apache/asterix/event/service/ZooKeeperService.java
@@ -29,6 +29,10 @@ import java.util.List;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.asterix.common.api.IClusterManagementWork.ClusterState;
+import org.apache.asterix.event.error.EventException;
+import org.apache.asterix.event.model.AsterixInstance;
+import org.apache.asterix.installer.schema.conf.Configuration;
 import org.apache.log4j.Logger;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -38,10 +42,6 @@ import org.apache.zookeeper.Watcher.Event.KeeperState;
 import org.apache.zookeeper.ZooDefs.Ids;
 import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.data.Stat;
-import org.apache.asterix.common.api.IClusterManagementWork.ClusterState;
-import org.apache.asterix.event.error.EventException;
-import org.apache.asterix.event.model.AsterixInstance;
-import org.apache.asterix.installer.schema.conf.Configuration;
 
 public class ZooKeeperService implements ILookupService {
 
@@ -63,6 +63,7 @@ public class ZooKeeperService implements ILookupService {
     private LinkedBlockingQueue<String> msgQ = new LinkedBlockingQueue<String>();
     private ZooKeeperWatcher watcher = new ZooKeeperWatcher(msgQ);
 
+    @Override
     public boolean isRunning(Configuration conf) throws Exception {
         List<String> servers = conf.getZookeeper().getServers().getServer();
         int clientPort = conf.getZookeeper().getClientPort().intValue();
@@ -92,6 +93,7 @@ public class ZooKeeperService implements ILookupService {
         return isRunning;
     }
 
+    @Override
     public void startService(Configuration conf) throws Exception {
         if (LOGGER.isDebugEnabled()) {
             LOGGER.debug("Starting ZooKeeper at " + zkConnectionString);
@@ -107,22 +109,29 @@ public class ZooKeeperService implements ILookupService {
         for (String zkServer : zkServers) {
             cmdBuffer.append(zkServer + " ");
         }
-        Runtime.getRuntime().exec(cmdBuffer.toString());
+        //TODO: Create a better way to interact with zookeeper
+        Process zkProcess = Runtime.getRuntime().exec(cmdBuffer.toString());
+        int output = zkProcess.waitFor();
+        if (output != 0) {
+            throw new Exception("Error starting zookeeper server. output code = " + output);
+        }
         zk = new ZooKeeper(zkConnectionString, ZOOKEEPER_SESSION_TIME_OUT, watcher);
-        String head = msgQ.poll(10, TimeUnit.SECONDS);
+        String head = msgQ.poll(60, TimeUnit.SECONDS);
         if (head == null) {
             StringBuilder msg = new StringBuilder(
                     "Unable to start Zookeeper Service. This could be because of the following reasons.\n");
             msg.append("1) Managix is incorrectly configured. Please run " + "managix validate"
                     + " to run a validation test and correct the errors reported.");
-            msg.append("\n2) If validation in (1) is successful, ensure that java_home parameter is set correctly in Managix configuration ("
-                    + AsterixEventServiceUtil.MANAGIX_CONF_XML + ")");
+            msg.append(
+                    "\n2) If validation in (1) is successful, ensure that java_home parameter is set correctly in Managix configuration ("
+                            + AsterixEventServiceUtil.MANAGIX_CONF_XML + ")");
             throw new Exception(msg.toString());
         }
         msgQ.take();
         createRootIfNotExist();
     }
 
+    @Override
     public void stopService(Configuration conf) throws Exception {
         if (LOGGER.isDebugEnabled()) {
             LOGGER.debug("Stopping ZooKeeper running at " + zkConnectionString);
@@ -141,6 +150,7 @@ public class ZooKeeperService implements ILookupService {
         }
     }
 
+    @Override
     public void writeAsterixInstance(AsterixInstance asterixInstance) throws Exception {
         String instanceBasePath = ASTERIX_INSTANCE_BASE_PATH + File.separator + asterixInstance.getName();
         ByteArrayOutputStream b = new ByteArrayOutputStream();
@@ -166,6 +176,7 @@ public class ZooKeeperService implements ILookupService {
         }
     }
 
+    @Override
     public AsterixInstance getAsterixInstance(String name) throws Exception {
         String path = ASTERIX_INSTANCE_BASE_PATH + File.separator + name;
         Stat stat = zk.exists(ASTERIX_INSTANCE_BASE_PATH + File.separator + name, false);
@@ -176,10 +187,12 @@ public class ZooKeeperService implements ILookupService {
         return readAsterixInstanceObject(asterixInstanceBytes);
     }
 
+    @Override
     public boolean exists(String path) throws Exception {
         return zk.exists(ASTERIX_INSTANCE_BASE_PATH + File.separator + path, false) != null;
     }
 
+    @Override
     public void removeAsterixInstance(String name) throws Exception {
         if (!exists(name)) {
             throw new EventException("Asterix instance by name " + name + " does not exists.");
@@ -195,6 +208,7 @@ public class ZooKeeperService implements ILookupService {
         zk.delete(ASTERIX_INSTANCE_BASE_PATH + File.separator + name, DEFAULT_NODE_VERSION);
     }
 
+    @Override
     public List<AsterixInstance> getAsterixInstances() throws Exception {
         List<String> instanceNames = zk.getChildren(ASTERIX_INSTANCE_BASE_PATH, false);
         List<AsterixInstance> asterixInstances = new ArrayList<AsterixInstance>();
@@ -207,13 +221,14 @@ public class ZooKeeperService implements ILookupService {
         return asterixInstances;
     }
 
-    private AsterixInstance readAsterixInstanceObject(byte[] asterixInstanceBytes) throws IOException,
-            ClassNotFoundException {
+    private AsterixInstance readAsterixInstanceObject(byte[] asterixInstanceBytes)
+            throws IOException, ClassNotFoundException {
         ByteArrayInputStream b = new ByteArrayInputStream(asterixInstanceBytes);
         ObjectInputStream ois = new ObjectInputStream(b);
         return (AsterixInstance) ois.readObject();
     }
 
+    @Override
     public void updateAsterixInstance(AsterixInstance updatedInstance) throws Exception {
         removeAsterixInstance(updatedInstance.getName());
         writeAsterixInstance(updatedInstance);
@@ -249,6 +264,7 @@ class ZooKeeperWatcher implements Watcher {
         this.msgQ = msgQ;
     }
 
+    @Override
     public void process(WatchedEvent wEvent) {
         if (wEvent.getState() == KeeperState.SyncConnected) {
             msgQ.add("connected");
@@ -276,7 +292,8 @@ class ZookeeperUtil {
         List<String> servers = conf.getZookeeper().getServers().getServer();
         int serverId = 1;
         for (String server : servers) {
-            buffer.append("server" + "." + serverId + "=" + server + ":" + leaderConnPort + ":" + leaderElecPort + "\n");
+            buffer.append(
+                    "server" + "." + serverId + "=" + server + ":" + leaderConnPort + ":" + leaderElecPort + "\n");
             serverId++;
         }
         AsterixEventServiceUtil.dumpToFile(zooKeeperConfigPath, buffer.toString());

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-external-data/pom.xml b/asterix-external-data/pom.xml
index 4062b23..867c96b 100644
--- a/asterix-external-data/pom.xml
+++ b/asterix-external-data/pom.xml
@@ -35,6 +35,43 @@
     <build>
         <plugins>
             <plugin>
+                <groupId>org.apache.asterix</groupId>
+                <artifactId>lexer-generator-maven-plugin</artifactId>
+                <version>0.8.8-SNAPSHOT</version>
+                <configuration>
+                    <grammarFile>src/main/resources/adm.grammar</grammarFile>
+                    <outputDir>${project.build.directory}/generated-sources/org/apache/asterix/runtime/operators/file/adm</outputDir>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>generate-lexer</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>generate-lexer</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <version>1.9</version>
+                <executions>
+                    <execution>
+                        <id>add-source</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>${project.build.directory}/generated-sources/</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
                 <groupId>org.jvnet.jaxb2.maven2</groupId>
                 <artifactId>maven-jaxb2-plugin</artifactId>
                 <version>0.9.0</version>
@@ -91,6 +128,50 @@
                 </executions>
             </plugin>
         </plugins>
+        <pluginManagement>
+            <plugins>
+                <!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
+                <plugin>
+                    <groupId>org.eclipse.m2e</groupId>
+                    <artifactId>lifecycle-mapping</artifactId>
+                    <version>1.0.0</version>
+                    <configuration>
+                        <lifecycleMappingMetadata>
+                            <pluginExecutions>
+                                <pluginExecution>
+                                    <pluginExecutionFilter>
+                                        <groupId> org.apache.asterix</groupId>
+                                        <artifactId> lexer-generator-maven-plugin</artifactId>
+                                        <versionRange>[0.1,)</versionRange>
+                                        <goals>
+                                            <goal>generate-lexer</goal>
+                                        </goals>
+                                    </pluginExecutionFilter>
+                                    <action>
+                                        <execute>
+                                            <runOnIncremental>false</runOnIncremental>
+                                        </execute>
+                                    </action>
+                                </pluginExecution>
+                                <pluginExecution>
+                                    <pluginExecutionFilter>
+                                        <groupId> org.codehaus.mojo</groupId>
+                                        <artifactId>build-helper-maven-plugin</artifactId>
+                                        <versionRange>[1.7,)</versionRange>
+                                        <goals>
+                                            <goal>add-source</goal>
+                                        </goals>
+                                    </pluginExecutionFilter>
+                                    <action>
+                                        <ignore />
+                                    </action>
+                                </pluginExecution>
+                            </pluginExecutions>
+                        </lifecycleMappingMetadata>
+                    </configuration>
+                </plugin>
+            </plugins>
+        </pluginManagement>
     </build>
     <dependencies>
         <dependency>
@@ -139,6 +220,10 @@
             <scope>compile</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs</artifactId>
+        </dependency>
+        <dependency>
             <groupId>net.java.dev.rome</groupId>
             <artifactId>rome-fetcher</artifactId>
             <version>1.0.0</version>
@@ -186,5 +271,11 @@
             <artifactId>jdo2-api</artifactId>
             <version>2.3-20090302111651</version>
         </dependency>
+        <dependency>
+            <groupId>com.e-movimento.tinytools</groupId>
+            <artifactId>privilegedaccessor</artifactId>
+            <version>1.2.2</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/CNNFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/CNNFeedAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/CNNFeedAdapterFactory.java
deleted file mode 100644
index 8b7b6d5..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/CNNFeedAdapterFactory.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.dataset.adapter.RSSFeedAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * A factory class for creating the @see {CNNFeedAdapter}.
- */
-public class CNNFeedAdapterFactory implements IFeedAdapterFactory {
-    private static final long serialVersionUID = 1L;
-
-    private Map<String, String> configuration;
-
-    private List<String> feedURLs = new ArrayList<String>();
-    private static Map<String, String> topicFeeds = new HashMap<String, String>();
-    private ARecordType recordType;
-    public static final String KEY_RSS_URL = "topic";
-    public static final String KEY_INTERVAL = "interval";
-    public static final String TOP_STORIES = "topstories";
-    public static final String WORLD = "world";
-    public static final String US = "us";
-    public static final String SPORTS = "sports";
-    public static final String BUSINESS = "business";
-    public static final String POLITICS = "politics";
-    public static final String CRIME = "crime";
-    public static final String TECHNOLOGY = "technology";
-    public static final String HEALTH = "health";
-    public static final String ENTERNTAINMENT = "entertainemnt";
-    public static final String TRAVEL = "travel";
-    public static final String LIVING = "living";
-    public static final String VIDEO = "video";
-    public static final String STUDENT = "student";
-    public static final String POPULAR = "popular";
-    public static final String RECENT = "recent";
-
-    private void initTopics() {
-        topicFeeds.put(TOP_STORIES, "http://rss.cnn.com/rss/cnn_topstories.rss");
-        topicFeeds.put(WORLD, "http://rss.cnn.com/rss/cnn_world.rss");
-        topicFeeds.put(US, "http://rss.cnn.com/rss/cnn_us.rss");
-        topicFeeds.put(SPORTS, "http://rss.cnn.com/rss/si_topstories.rss");
-        topicFeeds.put(BUSINESS, "http://rss.cnn.com/rss/money_latest.rss");
-        topicFeeds.put(POLITICS, "http://rss.cnn.com/rss/cnn_allpolitics.rss");
-        topicFeeds.put(CRIME, "http://rss.cnn.com/rss/cnn_crime.rss");
-        topicFeeds.put(TECHNOLOGY, "http://rss.cnn.com/rss/cnn_tech.rss");
-        topicFeeds.put(HEALTH, "http://rss.cnn.com/rss/cnn_health.rss");
-        topicFeeds.put(ENTERNTAINMENT, "http://rss.cnn.com/rss/cnn_showbiz.rss");
-        topicFeeds.put(LIVING, "http://rss.cnn.com/rss/cnn_living.rss");
-        topicFeeds.put(VIDEO, "http://rss.cnn.com/rss/cnn_freevideo.rss");
-        topicFeeds.put(TRAVEL, "http://rss.cnn.com/rss/cnn_travel.rss");
-        topicFeeds.put(STUDENT, "http://rss.cnn.com/rss/cnn_studentnews.rss");
-        topicFeeds.put(POPULAR, "http://rss.cnn.com/rss/cnn_mostpopular.rss");
-        topicFeeds.put(RECENT, "http://rss.cnn.com/rss/cnn_latest.rss");
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        RSSFeedAdapter cnnFeedAdapter = new RSSFeedAdapter(configuration, recordType, ctx);
-        return cnnFeedAdapter;
-    }
-
-    @Override
-    public String getName() {
-        return "cnn_feed";
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        String rssURLProperty = configuration.get(KEY_RSS_URL);
-        if (rssURLProperty == null) {
-            throw new IllegalArgumentException("no rss url provided");
-        }
-        initializeFeedURLs(rssURLProperty);
-        this.recordType = outputType;
-    }
-
-    private void initializeFeedURLs(String rssURLProperty) {
-        feedURLs.clear();
-        String[] rssTopics = rssURLProperty.split(",");
-        initTopics();
-        for (String topic : rssTopics) {
-            String feedURL = topicFeeds.get(topic);
-            if (feedURL == null) {
-                throw new IllegalArgumentException(
-                        " unknown topic :" + topic + " please choose from the following " + getValidTopics());
-            }
-            feedURLs.add(feedURL);
-        }
-    }
-
-    private static String getValidTopics() {
-        StringBuilder builder = new StringBuilder();
-        for (String key : topicFeeds.keySet()) {
-            builder.append(key);
-            builder.append(" ");
-        }
-        return new String(builder);
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return new AlgebricksCountPartitionConstraint(feedURLs.size());
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return recordType;
-    }
-
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/GenericAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/GenericAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/GenericAdapterFactory.java
new file mode 100644
index 0000000..2e7158d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/GenericAdapterFactory.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.adapter.factory;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.external.api.IDataFlowController;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.api.IExternalDataSourceFactory;
+import org.apache.asterix.external.api.IIndexibleExternalDataSource;
+import org.apache.asterix.external.api.IIndexingAdapterFactory;
+import org.apache.asterix.external.dataset.adapter.GenericAdapter;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.provider.DataflowControllerProvider;
+import org.apache.asterix.external.provider.DatasourceFactoryProvider;
+import org.apache.asterix.external.provider.ParserFactoryProvider;
+import org.apache.asterix.external.util.ExternalDataCompatibilityUtils;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class GenericAdapterFactory implements IIndexingAdapterFactory, IAdapterFactory {
+
+    private static final long serialVersionUID = 1L;
+    private IExternalDataSourceFactory dataSourceFactory;
+    private IDataParserFactory dataParserFactory;
+    private ARecordType recordType;
+    private Map<String, String> configuration;
+    private List<ExternalFile> files;
+    private boolean indexingOp;
+
+    @Override
+    public void setSnapshot(List<ExternalFile> files, boolean indexingOp) {
+        this.files = files;
+        this.indexingOp = indexingOp;
+    }
+
+    @Override
+    public String getAlias() {
+        return ExternalDataConstants.ALIAS_GENERIC_ADAPTER;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return dataSourceFactory.getPartitionConstraint();
+    }
+
+    /**
+     * Runs on each node controller (after serialization-deserialization)
+     */
+    @Override
+    public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
+        IDataFlowController controller = DataflowControllerProvider.getDataflowController(recordType, ctx, partition,
+                dataSourceFactory, dataParserFactory, configuration, indexingOp);
+        return new GenericAdapter(controller);
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
+        this.recordType = outputType;
+        this.configuration = configuration;
+        dataSourceFactory = DatasourceFactoryProvider.getExternalDataSourceFactory(configuration);
+        dataParserFactory = ParserFactoryProvider.getDataParserFactory(configuration);
+        prepare();
+        ExternalDataCompatibilityUtils.validateCompatibility(dataSourceFactory, dataParserFactory);
+    }
+
+    private void prepare() throws Exception {
+        if (dataSourceFactory.isIndexible() && (files != null)) {
+            ((IIndexibleExternalDataSource) dataSourceFactory).setSnapshot(files, indexingOp);
+        }
+        dataSourceFactory.configure(configuration);
+        dataParserFactory.setRecordType(recordType);
+        dataParserFactory.configure(configuration);
+    }
+
+    @Override
+    public ARecordType getAdapterOutputType() {
+        return recordType;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSAdapterFactory.java
deleted file mode 100644
index c4a96f4..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSAdapterFactory.java
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.dataset.adapter.HDFSAdapter;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.dataflow.HDFSObjectTupleParserFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.AsterixAppContextInfo;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.ICCContext;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.exceptions.HyracksException;
-import org.apache.hyracks.hdfs.dataflow.ConfFactory;
-import org.apache.hyracks.hdfs.dataflow.InputSplitsFactory;
-import org.apache.hyracks.hdfs.scheduler.Scheduler;
-
-/**
- * A factory class for creating an instance of HDFSAdapter
- */
-public class HDFSAdapterFactory extends StreamBasedAdapterFactory implements IAdapterFactory {
-    private static final long serialVersionUID = 1L;
-
-    public static final String HDFS_ADAPTER_NAME = "hdfs";
-    public static final String CLUSTER_LOCATIONS = "cluster-locations";
-    public static transient String SCHEDULER = "hdfs-scheduler";
-
-    public static final String KEY_HDFS_URL = "hdfs";
-    public static final String KEY_PATH = "path";
-    public static final String KEY_INPUT_FORMAT = "input-format";
-    public static final String INPUT_FORMAT_TEXT = "text-input-format";
-    public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
-    // New
-    public static final String KEY_PARSER = "parser";
-    public static final String PARSER_HIVE = "hive-parser";
-    public static final String INPUT_FORMAT_RC = "rc-input-format";
-    public static final String FORMAT_BINARY = "binary";
-
-    public static final String KEY_LOCAL_SOCKET_PATH = "local-socket-path";
-
-    // Hadoop property names constants
-    public static final String CLASS_NAME_TEXT_INPUT_FORMAT = "org.apache.hadoop.mapred.TextInputFormat";
-    public static final String CLASS_NAME_SEQUENCE_INPUT_FORMAT = "org.apache.hadoop.mapred.SequenceFileInputFormat";
-    public static final String CLASS_NAME_RC_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
-    public static final String CLASS_NAME_HDFS_FILESYSTEM = "org.apache.hadoop.hdfs.DistributedFileSystem";
-    public static final String KEY_HADOOP_FILESYSTEM_URI = "fs.defaultFS";
-    public static final String KEY_HADOOP_FILESYSTEM_CLASS = "fs.hdfs.impl";
-    public static final String KEY_HADOOP_INPUT_DIR = "mapred.input.dir";
-    public static final String KEY_HADOOP_INPUT_FORMAT = "mapred.input.format.class";
-    public static final String KEY_HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit";
-    public static final String KEY_HADOOP_SOCKET_PATH = "dfs.domain.socket.path";
-
-    private transient AlgebricksPartitionConstraint clusterLocations;
-    private String[] readSchedule;
-    private boolean executed[];
-    private InputSplitsFactory inputSplitsFactory;
-    private ConfFactory confFactory;
-    private IAType atype;
-    private boolean configured = false;
-    public static Scheduler hdfsScheduler;
-    private static boolean initialized = false;
-    protected List<ExternalFile> files;
-
-    private static Scheduler initializeHDFSScheduler() {
-        ICCContext ccContext = AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext();
-        Scheduler scheduler = null;
-        try {
-            scheduler = new Scheduler(ccContext.getClusterControllerInfo().getClientNetAddress(),
-                    ccContext.getClusterControllerInfo().getClientNetPort());
-        } catch (HyracksException e) {
-            throw new IllegalStateException("Cannot obtain hdfs scheduler");
-        }
-        return scheduler;
-    }
-
-    protected static final Map<String, String> formatClassNames = initInputFormatMap();
-
-    protected static Map<String, String> initInputFormatMap() {
-        Map<String, String> formatClassNames = new HashMap<String, String>();
-        formatClassNames.put(INPUT_FORMAT_TEXT, CLASS_NAME_TEXT_INPUT_FORMAT);
-        formatClassNames.put(INPUT_FORMAT_SEQUENCE, CLASS_NAME_SEQUENCE_INPUT_FORMAT);
-        formatClassNames.put(INPUT_FORMAT_RC, CLASS_NAME_RC_INPUT_FORMAT);
-        return formatClassNames;
-    }
-
-    public JobConf getJobConf() throws HyracksDataException {
-        return confFactory.getConf();
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        JobConf conf = confFactory.getConf();
-        InputSplit[] inputSplits = inputSplitsFactory.getSplits();
-        String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
-        HDFSAdapter hdfsAdapter = new HDFSAdapter(atype, readSchedule, executed, inputSplits, conf, nodeName,
-                parserFactory, ctx, configuration, files);
-        return hdfsAdapter;
-    }
-
-    @Override
-    public String getName() {
-        return HDFS_ADAPTER_NAME;
-    }
-
-    public static JobConf configureJobConf(Map<String, String> configuration) throws Exception {
-        JobConf conf = new JobConf();
-        String formatClassName = formatClassNames.get(configuration.get(KEY_INPUT_FORMAT).trim());
-        String localShortCircuitSocketPath = configuration.get(KEY_LOCAL_SOCKET_PATH);
-        if (formatClassName == null) {
-            formatClassName = configuration.get(KEY_INPUT_FORMAT).trim();
-        }
-        conf.set(KEY_HADOOP_FILESYSTEM_URI, configuration.get(KEY_HDFS_URL).trim());
-        conf.set(KEY_HADOOP_FILESYSTEM_CLASS, CLASS_NAME_HDFS_FILESYSTEM);
-        conf.setClassLoader(HDFSAdapter.class.getClassLoader());
-        conf.set(KEY_HADOOP_INPUT_DIR, configuration.get(KEY_PATH).trim());
-        conf.set(KEY_HADOOP_INPUT_FORMAT, formatClassName);
-
-        // Enable local short circuit reads if user supplied the parameters
-        if (localShortCircuitSocketPath != null) {
-            conf.set(KEY_HADOOP_SHORT_CIRCUIT, "true");
-            conf.set(KEY_HADOOP_SOCKET_PATH, localShortCircuitSocketPath.trim());
-        }
-        return conf;
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        if (!configured) {
-            throw new IllegalStateException("Adapter factory has not been configured yet");
-        }
-        return clusterLocations;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        if (!initialized) {
-            hdfsScheduler = initializeHDFSScheduler();
-            initialized = true;
-        }
-        this.configuration = configuration;
-        JobConf conf = configureJobConf(configuration);
-        confFactory = new ConfFactory(conf);
-
-        clusterLocations = getClusterLocations();
-        int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
-
-        // if files list was set, we restrict the splits to the list since this dataset is indexed
-        InputSplit[] inputSplits;
-        if (files == null) {
-            inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
-        } else {
-            inputSplits = getSplits(conf);
-        }
-        inputSplitsFactory = new InputSplitsFactory(inputSplits);
-
-        readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
-        executed = new boolean[readSchedule.length];
-        Arrays.fill(executed, false);
-        configured = true;
-
-        atype = outputType;
-        configureFormat(atype);
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    public static AlgebricksPartitionConstraint getClusterLocations() {
-        ArrayList<String> locs = new ArrayList<String>();
-        Map<String, String[]> stores = AsterixAppContextInfo.getInstance().getMetadataProperties().getStores();
-        for (String i : stores.keySet()) {
-            String[] nodeStores = stores.get(i);
-            for (int j = 0; j < nodeStores.length; j++) {
-                //two readers per partition
-                locs.add(i);
-                locs.add(i);
-            }
-        }
-        String[] cluster = new String[locs.size()];
-        cluster = locs.toArray(cluster);
-        return new AlgebricksAbsolutePartitionConstraint(cluster);
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return (ARecordType) atype;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.UNKNOWN;
-    }
-
-    /*
-     * This method is overridden to do the following:
-     * if data is text data (adm or delimited text), it will use a text tuple parser,
-     * otherwise it will use hdfs record object parser
-     */
-    @Override
-    protected void configureFormat(IAType sourceDatatype) throws Exception {
-        String specifiedFormat = configuration.get(AsterixTupleParserFactory.KEY_FORMAT);
-        if (specifiedFormat == null) {
-            throw new IllegalArgumentException(" Unspecified data format");
-        }
-
-        if (AsterixTupleParserFactory.FORMAT_BINARY.equalsIgnoreCase(specifiedFormat)) {
-            parserFactory = new HDFSObjectTupleParserFactory((ARecordType) atype, this, configuration);
-        } else {
-            InputDataFormat inputFormat = InputDataFormat.UNKNOWN;
-            if (AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT.equalsIgnoreCase(specifiedFormat)) {
-                inputFormat = InputDataFormat.DELIMITED;
-            } else if (AsterixTupleParserFactory.FORMAT_ADM.equalsIgnoreCase(specifiedFormat)) {
-                inputFormat = InputDataFormat.ADM;
-            }
-            parserFactory = new AsterixTupleParserFactory(configuration, (ARecordType) sourceDatatype, inputFormat);
-        }
-
-    }
-
-    /**
-     * Instead of creating the split using the input format, we do it manually
-     * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
-     * and the produced splits only cover intersection between current files in hdfs and files stored internally
-     * in AsterixDB
-     * 1. NoOp means appended file
-     * 2. AddOp means new file
-     * 3. UpdateOp means the delta of a file
-     *
-     * @return
-     * @throws IOException
-     */
-    protected InputSplit[] getSplits(JobConf conf) throws IOException {
-        ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>();
-        ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>();
-        // Create file system object
-        try (FileSystem fs = FileSystem.get(conf)) {
-            // Create files splits
-            for (ExternalFile file : files) {
-                Path filePath = new Path(file.getFileName());
-                FileStatus fileStatus;
-                try {
-                    fileStatus = fs.getFileStatus(filePath);
-                } catch (FileNotFoundException e) {
-                    // file was deleted at some point, skip to next file
-                    continue;
-                }
-                if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP
-                        && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
-                    // Get its information from HDFS name node
-                    BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
-                    // Create a split per block
-                    for (BlockLocation block : fileBlocks) {
-                        if (block.getOffset() < file.getSize()) {
-                            fileSplits.add(new FileSplit(filePath,
-                                    block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize()
-                                            ? block.getLength() : (file.getSize() - block.getOffset()),
-                                    block.getHosts()));
-                            orderedExternalFiles.add(file);
-                        }
-                    }
-                } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP
-                        && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
-                    long oldSize = 0L;
-                    long newSize = file.getSize();
-                    for (int i = 0; i < files.size(); i++) {
-                        if (files.get(i).getFileName() == file.getFileName()
-                                && files.get(i).getSize() != file.getSize()) {
-                            newSize = files.get(i).getSize();
-                            oldSize = file.getSize();
-                            break;
-                        }
-                    }
-
-                    // Get its information from HDFS name node
-                    BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
-                    // Create a split per block
-                    for (BlockLocation block : fileBlocks) {
-                        if (block.getOffset() + block.getLength() > oldSize) {
-                            if (block.getOffset() < newSize) {
-                                // Block interact with delta -> Create a split
-                                long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
-                                long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L
-                                        : block.getOffset() + block.getLength() - newSize;
-                                long splitLength = block.getLength() - startCut - endCut;
-                                fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength,
-                                        block.getHosts()));
-                                orderedExternalFiles.add(file);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        files = orderedExternalFiles;
-        return fileSplits.toArray(new FileSplit[fileSplits.size()]);
-    }
-
-    // Used to tell the factory to restrict the splits to the intersection between this list and the actual files on hdfs side
-    public void setFiles(List<ExternalFile> files) {
-        this.files = files;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
deleted file mode 100644
index 8bf6d93..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.dataset.adapter.HDFSIndexingAdapter;
-import org.apache.asterix.external.indexing.dataflow.HDFSIndexingParserFactory;
-import org.apache.asterix.external.indexing.dataflow.IndexingScheduler;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.AsterixAppContextInfo;
-import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.DelimitedDataParser;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-import org.apache.hyracks.api.context.ICCContext;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksException;
-import org.apache.hyracks.dataflow.common.data.parsers.DoubleParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.LongParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
-import org.apache.hyracks.hdfs.dataflow.ConfFactory;
-import org.apache.hyracks.hdfs.dataflow.InputSplitsFactory;
-
-public class HDFSIndexingAdapterFactory extends HDFSAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private transient AlgebricksPartitionConstraint clusterLocations;
-    private String[] readSchedule;
-    private boolean executed[];
-    private InputSplitsFactory inputSplitsFactory;
-    private ConfFactory confFactory;
-    private IAType atype;
-    private boolean configured = false;
-    public static IndexingScheduler hdfsScheduler;
-    private static boolean initialized = false;
-    private Map<String, String> configuration;
-
-    public static final String HDFS_INDEXING_ADAPTER = "hdfs-indexing-adapter";
-
-    private static IndexingScheduler initializeHDFSScheduler() {
-        ICCContext ccContext = AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext();
-        IndexingScheduler scheduler = null;
-        try {
-            scheduler = new IndexingScheduler(ccContext.getClusterControllerInfo().getClientNetAddress(),
-                    ccContext.getClusterControllerInfo().getClientNetPort());
-        } catch (HyracksException e) {
-            throw new IllegalStateException("Cannot obtain hdfs scheduler");
-        }
-        return scheduler;
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public String getName() {
-        return HDFS_INDEXING_ADAPTER;
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        if (!configured) {
-            throw new IllegalStateException("Adapter factory has not been configured yet");
-        }
-        return clusterLocations;
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        JobConf conf = confFactory.getConf();
-        InputSplit[] inputSplits = inputSplitsFactory.getSplits();
-        String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
-        ((HDFSIndexingParserFactory) parserFactory).setJobConf(conf);
-        ((HDFSIndexingParserFactory) parserFactory).setArguments(configuration);
-        HDFSIndexingAdapter hdfsIndexingAdapter = new HDFSIndexingAdapter(atype, readSchedule, executed, inputSplits,
-                conf, clusterLocations, files, parserFactory, ctx, nodeName,
-                (String) configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
-                (String) configuration.get(AsterixTupleParserFactory.KEY_FORMAT));
-        return hdfsIndexingAdapter;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        if (!initialized) {
-            hdfsScheduler = initializeHDFSScheduler();
-            initialized = true;
-        }
-        this.configuration = configuration;
-        JobConf conf = HDFSAdapterFactory.configureJobConf(configuration);
-        confFactory = new ConfFactory(conf);
-        clusterLocations = getClusterLocations();
-        InputSplit[] inputSplits = getSplits(conf);
-        inputSplitsFactory = new InputSplitsFactory(inputSplits);
-        readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
-        executed = new boolean[readSchedule.length];
-        Arrays.fill(executed, false);
-        configured = true;
-        atype = outputType;
-        // The function below is overwritten to create indexing adapter factory instead of regular adapter factory
-        configureFormat(atype);
-    }
-
-    @Override
-    protected void configureFormat(IAType sourceDatatype) throws Exception {
-
-        char delimiter = AsterixTupleParserFactory.getDelimiter(configuration);
-        char quote = AsterixTupleParserFactory.getQuote(configuration, delimiter);
-
-        parserFactory = new HDFSIndexingParserFactory((ARecordType) atype,
-                configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
-                configuration.get(AsterixTupleParserFactory.KEY_FORMAT), delimiter, quote,
-                configuration.get(HDFSAdapterFactory.KEY_PARSER));
-    }
-
-    /**
-     * A static function that creates and return delimited text data parser
-     *
-     * @param recordType
-     *            (the record type to be parsed)
-     * @param delimiter
-     *            (the delimiter value)
-     * @return
-     */
-    public static DelimitedDataParser getDelimitedDataParser(ARecordType recordType, char delimiter, char quote) {
-        int n = recordType.getFieldTypes().length;
-        IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
-        for (int i = 0; i < n; i++) {
-            ATypeTag tag = null;
-            if (recordType.getFieldTypes()[i].getTypeTag() == ATypeTag.UNION) {
-                if (!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) {
-                    throw new NotImplementedException("Non-optional UNION type is not supported.");
-                }
-                tag = ((AUnionType) recordType.getFieldTypes()[i]).getNullableType().getTypeTag();
-            } else {
-                tag = recordType.getFieldTypes()[i].getTypeTag();
-            }
-            if (tag == null) {
-                throw new NotImplementedException("Failed to get the type information for field " + i + ".");
-            }
-            IValueParserFactory vpf = valueParserFactoryMap.get(tag);
-            if (vpf == null) {
-                throw new NotImplementedException("No value parser factory for delimited fields of type " + tag);
-            }
-            fieldParserFactories[i] = vpf;
-        }
-        return new DelimitedDataParser(recordType, fieldParserFactories, delimiter, quote, false);
-    }
-
-    public static AlgebricksPartitionConstraint getClusterLocations() {
-        ArrayList<String> locs = new ArrayList<String>();
-        Map<String, String[]> stores = AsterixAppContextInfo.getInstance().getMetadataProperties().getStores();
-        for (String i : stores.keySet()) {
-            String[] nodeStores = stores.get(i);
-            for (int j = 0; j < nodeStores.length; j++) {
-                locs.add(i);
-            }
-        }
-        String[] cluster = new String[locs.size()];
-        cluster = locs.toArray(cluster);
-        return new AlgebricksAbsolutePartitionConstraint(cluster);
-    }
-
-    private static Map<ATypeTag, IValueParserFactory> valueParserFactoryMap = initializeValueParserFactoryMap();
-
-    private static Map<ATypeTag, IValueParserFactory> initializeValueParserFactoryMap() {
-        Map<ATypeTag, IValueParserFactory> m = new HashMap<ATypeTag, IValueParserFactory>();
-        m.put(ATypeTag.INT32, IntegerParserFactory.INSTANCE);
-        m.put(ATypeTag.FLOAT, FloatParserFactory.INSTANCE);
-        m.put(ATypeTag.DOUBLE, DoubleParserFactory.INSTANCE);
-        m.put(ATypeTag.INT64, LongParserFactory.INSTANCE);
-        m.put(ATypeTag.STRING, UTF8StringParserFactory.INSTANCE);
-        return m;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HiveAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HiveAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HiveAdapterFactory.java
deleted file mode 100644
index 553682e..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/HiveAdapterFactory.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.dataset.adapter.HDFSAdapter;
-import org.apache.asterix.external.dataset.adapter.HiveAdapter;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * A factory class for creating an instance of HiveAdapter
- */
-public class HiveAdapterFactory extends StreamBasedAdapterFactory implements IAdapterFactory {
-    private static final long serialVersionUID = 1L;
-
-    public static final String HIVE_DATABASE = "database";
-    public static final String HIVE_TABLE = "table";
-    public static final String HIVE_HOME = "hive-home";
-    public static final String HIVE_METASTORE_URI = "metastore-uri";
-    public static final String HIVE_WAREHOUSE_DIR = "warehouse-dir";
-    public static final String HIVE_METASTORE_RAWSTORE_IMPL = "rawstore-impl";
-
-    private HDFSAdapterFactory hdfsAdapterFactory;
-    private HDFSAdapter hdfsAdapter;
-    private boolean configured = false;
-    private IAType atype;
-
-    public HiveAdapterFactory() {
-        hdfsAdapterFactory = new HDFSAdapterFactory();
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        hdfsAdapter = (HDFSAdapter) hdfsAdapterFactory.createAdapter(ctx, partition);
-        HiveAdapter hiveAdapter = new HiveAdapter(atype, hdfsAdapter, parserFactory, ctx);
-        return hiveAdapter;
-    }
-
-    @Override
-    public String getName() {
-        return "hive";
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        if (!configured) {
-            populateConfiguration(configuration);
-            hdfsAdapterFactory.configure(configuration, outputType);
-            this.atype = outputType;
-        }
-    }
-
-    public static void populateConfiguration(Map<String, String> configuration) throws Exception {
-        /** configure hive */
-        String database = configuration.get(HIVE_DATABASE);
-        String tablePath = null;
-        if (database == null) {
-            tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + configuration.get(HIVE_TABLE);
-        } else {
-            tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + tablePath + ".db" + "/"
-                    + configuration.get(HIVE_TABLE);
-        }
-        configuration.put(HDFSAdapterFactory.KEY_PATH, tablePath);
-        if (!configuration.get(AsterixTupleParserFactory.KEY_FORMAT)
-                .equals(AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT)) {
-            throw new IllegalArgumentException(
-                    "format" + configuration.get(AsterixTupleParserFactory.KEY_FORMAT) + " is not supported");
-        }
-
-        if (!(configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)
-                || configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT)
-                        .equals(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE))) {
-            throw new IllegalArgumentException(
-                    "file input format" + configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT) + " is not supported");
-        }
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return hdfsAdapterFactory.getPartitionConstraint();
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return (ARecordType) atype;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.UNKNOWN;
-    }
-
-    public void setFiles(List<ExternalFile> files) {
-        hdfsAdapterFactory.setFiles(files);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IAdapterFactory.java
deleted file mode 100644
index b8005cd..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IAdapterFactory.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-/**
- * Base interface for IGenericDatasetAdapterFactory and ITypedDatasetAdapterFactory.
- * Acts as a marker interface indicating that the implementation provides functionality
- * for creating an adapter.
- */
-public interface IAdapterFactory extends Serializable {
-
-    public static final String KEY_TYPE_NAME = "type-name";
-
-    public enum SupportedOperation {
-        READ,
-        WRITE,
-        READ_WRITE
-    }
-
-    /**
-     * Returns the type of adapter indicating if the adapter can be used for
-     * reading from an external data source or writing to an external data
-     * source or can be used for both purposes.
-     * 
-     * @see SupportedOperation
-     * @return
-     */
-    public SupportedOperation getSupportedOperations();
-
-    /**
-     * Returns the display name corresponding to the Adapter type that is created by the factory.
-     * 
-     * @return the display name
-     */
-    public String getName();
-
-    /**
-     * Gets a list of partition constraints. A partition constraint can be a
-     * requirement to execute at a particular location or could be cardinality
-     * constraints indicating the number of instances that need to run in
-     * parallel. example, a IDatasourceAdapter implementation written for data
-     * residing on the local file system of a node cannot run on any other node
-     * and thus has a location partition constraint. The location partition
-     * constraint can be expressed as a node IP address or a node controller id.
-     * In the former case, the IP address is translated to a node controller id
-     * running on the node with the given IP address.
-     */
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception;
-
-    /**
-     * Creates an instance of IDatasourceAdapter.
-     * 
-     * @param HyracksTaskContext
-     * @param partition
-     * @return An instance of IDatasourceAdapter.
-     * @throws Exception
-     */
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception;
-
-    /**
-     * @param configuration
-     * @param outputType
-     * @throws Exception
-     */
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception;
-
-    /**
-     * Gets the record type associated with the output of the adapter
-     * 
-     * @return
-     */
-    public ARecordType getAdapterOutputType();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IControlledAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IControlledAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IControlledAdapterFactory.java
deleted file mode 100644
index 0de6fad..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IControlledAdapterFactory.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import org.apache.asterix.external.dataset.adapter.IControlledAdapter;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-
-public interface IControlledAdapterFactory extends Serializable {
-    public IControlledAdapter createAdapter(IHyracksTaskContext ctx, ExternalFileIndexAccessor fileIndexAccessor,
-            RecordDescriptor inRecDesc);
-
-    public void configure(IAType atype, boolean propagateInput, int[] ridFields,
-            Map<String, String> adapterConfiguration, boolean retainNull);
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IFeedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IFeedAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IFeedAdapterFactory.java
deleted file mode 100644
index 9358a52..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/IFeedAdapterFactory.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-
-public interface IFeedAdapterFactory extends IAdapterFactory {
-
-    public boolean isRecordTrackingEnabled();
-
-    public IIntakeProgressTracker createIntakeProgressTracker();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/LookupAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/LookupAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/LookupAdapterFactory.java
new file mode 100644
index 0000000..866910b
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/LookupAdapterFactory.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.adapter.factory;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import org.apache.asterix.external.api.ILookupReaderFactory;
+import org.apache.asterix.external.api.ILookupRecordReader;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.dataset.adapter.LookupAdapter;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordIdReader;
+import org.apache.asterix.external.indexing.RecordIdReaderFactory;
+import org.apache.asterix.external.input.record.reader.LookupReaderFactoryProvider;
+import org.apache.asterix.external.provider.ParserFactoryProvider;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class LookupAdapterFactory<T> implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+    private IRecordDataParserFactory dataParserFactory;
+    private ILookupReaderFactory readerFactory;
+    private ARecordType recordType;
+    private int[] ridFields;
+    private Map<String, String> configuration;
+    private boolean retainInput;
+    private boolean retainNull;
+    private int[] propagatedFields;
+    private INullWriterFactory iNullWriterFactory;
+
+    public LookupAdapterFactory(ARecordType recordType, int[] ridFields, boolean retainInput, boolean retainNull,
+            INullWriterFactory iNullWriterFactory) {
+        this.recordType = recordType;
+        this.ridFields = ridFields;
+        this.retainInput = retainInput;
+        this.retainNull = retainNull;
+        this.iNullWriterFactory = iNullWriterFactory;
+    }
+
+    public LookupAdapter<T> createAdapter(IHyracksTaskContext ctx, int partition, RecordDescriptor inRecDesc,
+            ExternalFileIndexAccessor snapshotAccessor, IFrameWriter writer) throws HyracksDataException {
+        try {
+            IRecordDataParser<T> dataParser = dataParserFactory.createRecordParser(ctx);
+            dataParser.configure(configuration, recordType);
+            ILookupRecordReader<? extends T> reader = readerFactory.createRecordReader(ctx, partition,
+                    snapshotAccessor);
+            reader.configure(configuration);
+            RecordIdReader ridReader = RecordIdReaderFactory.create(configuration, ridFields);
+            configurePropagatedFields(inRecDesc);
+            return new LookupAdapter<T>(dataParser, reader, inRecDesc, ridReader, retainInput, propagatedFields,
+                    retainNull, iNullWriterFactory, ctx, writer);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        readerFactory = LookupReaderFactoryProvider.getLookupReaderFactory(configuration);
+        dataParserFactory = (IRecordDataParserFactory<T>) ParserFactoryProvider.getDataParserFactory(configuration);
+        dataParserFactory.setRecordType(recordType);
+        readerFactory.configure(configuration);
+        dataParserFactory.configure(configuration);
+    }
+
+    private void configurePropagatedFields(RecordDescriptor inRecDesc) {
+        int ptr = 0;
+        boolean skip = false;
+        propagatedFields = new int[inRecDesc.getFieldCount() - ridFields.length];
+        for (int i = 0; i < inRecDesc.getFieldCount(); i++) {
+            if (ptr < ridFields.length) {
+                skip = false;
+                for (int j = 0; j < ridFields.length; j++) {
+                    if (ridFields[j] == i) {
+                        ptr++;
+                        skip = true;
+                        break;
+                    }
+                }
+                if (!skip)
+                    propagatedFields[i - ptr] = i;
+            } else {
+                propagatedFields[i - ptr] = i;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java
deleted file mode 100644
index 251d69a..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.adapter.factory;
-
-import java.io.File;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.util.DNSResolverFactory;
-import org.apache.asterix.external.util.INodeResolver;
-import org.apache.asterix.external.util.INodeResolverFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.dataflow.std.file.FileSplit;
-
-/**
- * Factory class for creating an instance of NCFileSystemAdapter. An
- * NCFileSystemAdapter reads external data residing on the local file system of
- * an NC.
- */
-public class NCFileSystemAdapterFactory extends StreamBasedAdapterFactory implements IAdapterFactory {
-    private static final long serialVersionUID = 1L;
-
-    public static final String NC_FILE_SYSTEM_ADAPTER_NAME = "localfs";
-
-    private static final INodeResolver DEFAULT_NODE_RESOLVER = new DNSResolverFactory().createNodeResolver();
-
-    private IAType sourceDatatype;
-    private FileSplit[] fileSplits;
-    private ARecordType outputType;
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        NCFileSystemAdapter fsAdapter = new NCFileSystemAdapter(fileSplits, parserFactory, sourceDatatype, ctx);
-        return fsAdapter;
-    }
-
-    @Override
-    public String getName() {
-        return NC_FILE_SYSTEM_ADAPTER_NAME;
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        this.configuration = configuration;
-        this.outputType = outputType;
-        String[] splits = configuration.get(AsterixTupleParserFactory.KEY_PATH).split(",");
-        IAType sourceDatatype = outputType;
-        configureFileSplits(splits);
-        configureFormat(sourceDatatype);
-
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return configurePartitionConstraint();
-    }
-
-    private void configureFileSplits(String[] splits) throws AsterixException {
-        if (fileSplits == null) {
-            fileSplits = new FileSplit[splits.length];
-            String nodeName;
-            String nodeLocalPath;
-            int count = 0;
-            String trimmedValue;
-            for (String splitPath : splits) {
-                trimmedValue = splitPath.trim();
-                if (!trimmedValue.contains("://")) {
-                    throw new AsterixException(
-                            "Invalid path: " + splitPath + "\nUsage- path=\"Host://Absolute File Path\"");
-                }
-                nodeName = trimmedValue.split(":")[0];
-                nodeLocalPath = trimmedValue.split("://")[1];
-                FileSplit fileSplit = new FileSplit(nodeName, new FileReference(new File(nodeLocalPath)));
-                fileSplits[count++] = fileSplit;
-            }
-        }
-    }
-
-    private AlgebricksPartitionConstraint configurePartitionConstraint() throws AsterixException {
-        String[] locs = new String[fileSplits.length];
-        String location;
-        for (int i = 0; i < fileSplits.length; i++) {
-            location = getNodeResolver().resolveNode(fileSplits[i].getNodeName());
-            locs[i] = location;
-        }
-        return new AlgebricksAbsolutePartitionConstraint(locs);
-    }
-
-    protected INodeResolver getNodeResolver() {
-        if (nodeResolver == null) {
-            nodeResolver = initializeNodeResolver();
-        }
-        return nodeResolver;
-    }
-
-    private static INodeResolver initializeNodeResolver() {
-        INodeResolver nodeResolver = null;
-        String configuredNodeResolverFactory = System
-                .getProperty(AsterixTupleParserFactory.NODE_RESOLVER_FACTORY_PROPERTY);
-        if (configuredNodeResolverFactory != null) {
-            try {
-                nodeResolver = ((INodeResolverFactory) (Class.forName(configuredNodeResolverFactory).newInstance()))
-                        .createNodeResolver();
-
-            } catch (Exception e) {
-                if (LOGGER.isLoggable(Level.WARNING)) {
-                    LOGGER.log(Level.WARNING, "Unable to create node resolver from the configured classname "
-                            + configuredNodeResolverFactory + "\n" + e.getMessage());
-                }
-                nodeResolver = DEFAULT_NODE_RESOLVER;
-            }
-        } else {
-            nodeResolver = DEFAULT_NODE_RESOLVER;
-        }
-        return nodeResolver;
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return outputType;
-    }
-
-    @Override
-    public InputDataFormat getInputDataFormat() {
-        return InputDataFormat.UNKNOWN;
-    }
-
-    public void setFiles(List<ExternalFile> files) throws AlgebricksException {
-        throw new AlgebricksException("can't set files for this Adapter");
-    }
-
-}



[04/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/ADMDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/ADMDataParser.java
deleted file mode 100644
index 6e4c175..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/ADMDataParser.java
+++ /dev/null
@@ -1,1100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.BitSet;
-import java.util.List;
-
-import org.apache.asterix.builders.AbvsBuilderFactory;
-import org.apache.asterix.builders.IARecordBuilder;
-import org.apache.asterix.builders.IAsterixListBuilder;
-import org.apache.asterix.builders.ListBuilderFactory;
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.builders.RecordBuilderFactory;
-import org.apache.asterix.builders.UnorderedListBuilder;
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeserializer;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.base.ANull;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.AUnorderedListType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
-import org.apache.asterix.om.types.hierachy.ITypeConvertComputer;
-import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.asterix.om.util.container.IObjectPool;
-import org.apache.asterix.om.util.container.ListObjectPool;
-import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
-import org.apache.asterix.runtime.operators.file.adm.AdmLexerException;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.api.IMutableValueStorage;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-
-/**
- * Parser for ADM formatted data.
- */
-public class ADMDataParser extends AbstractDataParser {
-
-    protected AdmLexer admLexer;
-    protected ARecordType recordType;
-    protected boolean datasetRec;
-
-    private int nullableFieldId = 0;
-    private ArrayBackedValueStorage castBuffer = new ArrayBackedValueStorage();
-
-    private IObjectPool<IARecordBuilder, ATypeTag> recordBuilderPool = new ListObjectPool<IARecordBuilder, ATypeTag>(
-            new RecordBuilderFactory());
-    private IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool = new ListObjectPool<IAsterixListBuilder, ATypeTag>(
-            new ListBuilderFactory());
-    private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = new ListObjectPool<IMutableValueStorage, ATypeTag>(
-            new AbvsBuilderFactory());
-
-    private String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
-    private String mismatchErrorMessage2 = " got a value of type ";
-
-    static class ParseException extends AsterixException {
-        private static final long serialVersionUID = 1L;
-        private String filename;
-        private int line = -1;
-        private int column = -1;
-
-        public ParseException(String message) {
-            super(message);
-        }
-
-        public ParseException(Throwable cause) {
-            super(cause);
-        }
-
-        public ParseException(String message, Throwable cause) {
-            super(message, cause);
-        }
-
-        public ParseException(Throwable cause, String filename, int line, int column) {
-            super(cause);
-            setLocation(filename, line, column);
-        }
-
-        public void setLocation(String filename, int line, int column) {
-            this.filename = filename;
-            this.line = line;
-            this.column = column;
-        }
-
-        @Override
-        public String getMessage() {
-            StringBuilder msg = new StringBuilder("Parse error");
-            if (filename != null) {
-                msg.append(" in file " + filename);
-            }
-            if (line >= 0) {
-                if (column >= 0) {
-                    msg.append(" at (" + line + ", " + column + ")");
-                } else {
-                    msg.append(" in line " + line);
-                }
-            }
-            return msg.append(": " + super.getMessage()).toString();
-        }
-    }
-
-    public ADMDataParser() {
-        this(null);
-    }
-
-    public ADMDataParser(String filename) {
-        this.filename = filename;
-    }
-
-    @Override
-    public boolean parse(DataOutput out) throws AsterixException {
-        try {
-            resetPools();
-            return parseAdmInstance(recordType, datasetRec, out);
-        } catch (IOException e) {
-            throw new ParseException(e, filename, admLexer.getLine(), admLexer.getColumn());
-        } catch (AdmLexerException e) {
-            throw new AsterixException(e);
-        } catch (ParseException e) {
-            e.setLocation(filename, admLexer.getLine(), admLexer.getColumn());
-            throw e;
-        }
-    }
-
-    @Override
-    public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException {
-        this.recordType = recordType;
-        this.datasetRec = datasetRec;
-        try {
-            admLexer = new AdmLexer(new java.io.InputStreamReader(in));
-        } catch (IOException e) {
-            throw new ParseException(e);
-        }
-    }
-
-    protected boolean parseAdmInstance(IAType objectType, boolean datasetRec, DataOutput out)
-            throws AsterixException, IOException, AdmLexerException {
-        int token = admLexer.next();
-        if (token == AdmLexer.TOKEN_EOF) {
-            return false;
-        } else {
-            admFromLexerStream(token, objectType, out, datasetRec);
-            return true;
-        }
-    }
-
-    private void admFromLexerStream(int token, IAType objectType, DataOutput out, Boolean datasetRec)
-            throws AsterixException, IOException, AdmLexerException {
-
-        switch (token) {
-            case AdmLexer.TOKEN_NULL_LITERAL: {
-                if (checkType(ATypeTag.NULL, objectType)) {
-                    nullSerde.serialize(ANull.NULL, out);
-                } else {
-                    throw new ParseException("This field can not be null");
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_TRUE_LITERAL: {
-                if (checkType(ATypeTag.BOOLEAN, objectType)) {
-                    booleanSerde.serialize(ABoolean.TRUE, out);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_BOOLEAN_CONS: {
-                parseConstructor(ATypeTag.BOOLEAN, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_FALSE_LITERAL: {
-                if (checkType(ATypeTag.BOOLEAN, objectType)) {
-                    booleanSerde.serialize(ABoolean.FALSE, out);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_DOUBLE_LITERAL: {
-                parseToNumericTarget(ATypeTag.DOUBLE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_DOUBLE_CONS: {
-                parseConstructor(ATypeTag.DOUBLE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_FLOAT_LITERAL: {
-                parseToNumericTarget(ATypeTag.FLOAT, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_FLOAT_CONS: {
-                parseConstructor(ATypeTag.FLOAT, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT8_LITERAL: {
-                parseAndCastNumeric(ATypeTag.INT8, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT8_CONS: {
-                parseConstructor(ATypeTag.INT8, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT16_LITERAL: {
-                parseAndCastNumeric(ATypeTag.INT16, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT16_CONS: {
-                parseConstructor(ATypeTag.INT16, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT_LITERAL: {
-                // For an INT value without any suffix, we return it as INT64 type value since it is the default integer type.
-                parseAndCastNumeric(ATypeTag.INT64, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT32_LITERAL: {
-                parseAndCastNumeric(ATypeTag.INT32, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT32_CONS: {
-                parseConstructor(ATypeTag.INT32, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT64_LITERAL: {
-                parseAndCastNumeric(ATypeTag.INT64, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INT64_CONS: {
-                parseConstructor(ATypeTag.INT64, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_STRING_LITERAL: {
-                if (checkType(ATypeTag.STRING, objectType)) {
-                    final String tokenImage = admLexer.getLastTokenImage().substring(1,
-                            admLexer.getLastTokenImage().length() - 1);
-                    aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
-                    stringSerde.serialize(aString, out);
-                } else if (checkType(ATypeTag.UUID, objectType)) {
-                    // Dealing with UUID type that is represented by a string
-                    final String tokenImage = admLexer.getLastTokenImage().substring(1,
-                            admLexer.getLastTokenImage().length() - 1);
-                    aUUID.fromStringToAMuatbleUUID(tokenImage);
-                    uuidSerde.serialize(aUUID, out);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_STRING_CONS: {
-                parseConstructor(ATypeTag.STRING, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_HEX_CONS:
-            case AdmLexer.TOKEN_BASE64_CONS: {
-                if (checkType(ATypeTag.BINARY, objectType)) {
-                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
-                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
-                            parseToBinaryTarget(token, admLexer.getLastTokenImage(), out);
-                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
-                                break;
-                            }
-                        }
-                    }
-                }
-                throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
-            }
-            case AdmLexer.TOKEN_DATE_CONS: {
-                parseConstructor(ATypeTag.DATE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_TIME_CONS: {
-                parseConstructor(ATypeTag.TIME, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_DATETIME_CONS: {
-                parseConstructor(ATypeTag.DATETIME, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_INTERVAL_DATE_CONS: {
-                if (checkType(ATypeTag.INTERVAL, objectType)) {
-                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
-                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
-                            parseDateInterval(admLexer.getLastTokenImage(), out);
-                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
-                                break;
-                            }
-                        }
-                    }
-                }
-                throw new ParseException("Wrong interval data parsing for date interval.");
-            }
-            case AdmLexer.TOKEN_INTERVAL_TIME_CONS: {
-                if (checkType(ATypeTag.INTERVAL, objectType)) {
-                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
-                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
-                            parseTimeInterval(admLexer.getLastTokenImage(), out);
-                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
-                                break;
-                            }
-                        }
-                    }
-                }
-                throw new ParseException("Wrong interval data parsing for time interval.");
-            }
-            case AdmLexer.TOKEN_INTERVAL_DATETIME_CONS: {
-                if (checkType(ATypeTag.INTERVAL, objectType)) {
-                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
-                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
-                            parseDateTimeInterval(admLexer.getLastTokenImage(), out);
-                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
-                                break;
-                            }
-                        }
-                    }
-                }
-                throw new ParseException("Wrong interval data parsing for datetime interval.");
-            }
-            case AdmLexer.TOKEN_DURATION_CONS: {
-                parseConstructor(ATypeTag.DURATION, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_YEAR_MONTH_DURATION_CONS: {
-                parseConstructor(ATypeTag.YEARMONTHDURATION, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_DAY_TIME_DURATION_CONS: {
-                parseConstructor(ATypeTag.DAYTIMEDURATION, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_POINT_CONS: {
-                parseConstructor(ATypeTag.POINT, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_POINT3D_CONS: {
-                parseConstructor(ATypeTag.POINT3D, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_CIRCLE_CONS: {
-                parseConstructor(ATypeTag.CIRCLE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_RECTANGLE_CONS: {
-                parseConstructor(ATypeTag.RECTANGLE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_LINE_CONS: {
-                parseConstructor(ATypeTag.LINE, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_POLYGON_CONS: {
-                parseConstructor(ATypeTag.POLYGON, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_START_UNORDERED_LIST: {
-                if (checkType(ATypeTag.UNORDEREDLIST, objectType)) {
-                    objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
-                    parseUnorderedList((AUnorderedListType) objectType, out);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
-                }
-                break;
-            }
-
-            case AdmLexer.TOKEN_START_ORDERED_LIST: {
-                if (checkType(ATypeTag.ORDEREDLIST, objectType)) {
-                    objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
-                    parseOrderedList((AOrderedListType) objectType, out);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_START_RECORD: {
-                if (checkType(ATypeTag.RECORD, objectType)) {
-                    objectType = getComplexType(objectType, ATypeTag.RECORD);
-                    parseRecord((ARecordType) objectType, out, datasetRec);
-                } else {
-                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
-                }
-                break;
-            }
-            case AdmLexer.TOKEN_UUID_CONS: {
-                parseConstructor(ATypeTag.UUID, objectType, out);
-                break;
-            }
-            case AdmLexer.TOKEN_EOF: {
-                break;
-            }
-            default: {
-                throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token) + ".");
-            }
-        }
-
-    }
-
-    private String replaceEscapes(String tokenImage) throws ParseException {
-        char[] chars = tokenImage.toCharArray();
-        int len = chars.length;
-        int readpos = 0;
-        int writepos = 0;
-        int movemarker = 0;
-        while (readpos < len) {
-            if (chars[readpos] == '\\') {
-                moveChars(chars, movemarker, readpos, readpos - writepos);
-                switch (chars[readpos + 1]) {
-                    case '\\':
-                    case '\"':
-                    case '/':
-                        chars[writepos] = chars[readpos + 1];
-                        break;
-                    case 'b':
-                        chars[writepos] = '\b';
-                        break;
-                    case 'f':
-                        chars[writepos] = '\f';
-                        break;
-                    case 'n':
-                        chars[writepos] = '\n';
-                        break;
-                    case 'r':
-                        chars[writepos] = '\r';
-                        break;
-                    case 't':
-                        chars[writepos] = '\t';
-                        break;
-                    case 'u':
-                        chars[writepos] = (char) Integer.parseInt(new String(chars, readpos + 2, 4), 16);
-                        readpos += 4;
-                        break;
-                    default:
-                        throw new ParseException("Illegal escape '\\" + chars[readpos + 1] + "'");
-                }
-                ++readpos;
-                movemarker = readpos + 1;
-            }
-            ++writepos;
-            ++readpos;
-        }
-        moveChars(chars, movemarker, len, readpos - writepos);
-        return new String(chars, 0, len - (readpos - writepos));
-    }
-
-    private static void moveChars(final char[] chars, final int start, final int end, final int offset) {
-        if (offset == 0) {
-            return;
-        }
-        for (int i = start; i < end; ++i) {
-            chars[i - offset] = chars[i];
-        }
-    }
-
-    private IAType getComplexType(IAType aObjectType, ATypeTag tag) {
-        if (aObjectType == null) {
-            return null;
-        }
-
-        if (aObjectType.getTypeTag() == tag) {
-            return aObjectType;
-        }
-
-        if (aObjectType.getTypeTag() == ATypeTag.UNION) {
-            List<IAType> unionList = ((AUnionType) aObjectType).getUnionList();
-            for (int i = 0; i < unionList.size(); i++) {
-                if (unionList.get(i).getTypeTag() == tag) {
-                    return unionList.get(i);
-                }
-            }
-        }
-        return null; // wont get here
-    }
-
-    private ATypeTag getTargetTypeTag(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
-        if (aObjectType == null) {
-            return expectedTypeTag;
-        }
-        if (aObjectType.getTypeTag() != ATypeTag.UNION) {
-            final ATypeTag typeTag = aObjectType.getTypeTag();
-            if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
-                    || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
-                return typeTag;
-            } else {
-                return null;
-            }
-            //            return ATypeHierarchy.canPromote(expectedTypeTag, typeTag) ? typeTag : null;
-        } else { // union
-            List<IAType> unionList = ((AUnionType) aObjectType).getUnionList();
-            for (IAType t : unionList) {
-                final ATypeTag typeTag = t.getTypeTag();
-                if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
-                        || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
-                    return typeTag;
-                }
-            }
-        }
-        return null;
-    }
-
-    private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
-        return getTargetTypeTag(expectedTypeTag, aObjectType) != null;
-    }
-
-    private void parseRecord(ARecordType recType, DataOutput out, Boolean datasetRec)
-            throws IOException, AsterixException, AdmLexerException {
-
-        ArrayBackedValueStorage fieldValueBuffer = getTempBuffer();
-        ArrayBackedValueStorage fieldNameBuffer = getTempBuffer();
-        IARecordBuilder recBuilder = getRecordBuilder();
-
-        BitSet nulls = null;
-        if (datasetRec) {
-            if (recType != null) {
-                nulls = new BitSet(recType.getFieldNames().length);
-                recBuilder.reset(recType);
-            } else {
-                recBuilder.reset(null);
-            }
-        } else if (recType != null) {
-            nulls = new BitSet(recType.getFieldNames().length);
-            recBuilder.reset(recType);
-        } else {
-            recBuilder.reset(null);
-        }
-
-        recBuilder.init();
-        int token;
-        boolean inRecord = true;
-        boolean expectingRecordField = false;
-        boolean first = true;
-
-        Boolean openRecordField = false;
-        int fieldId = 0;
-        IAType fieldType = null;
-        do {
-            token = admLexer.next();
-            switch (token) {
-                case AdmLexer.TOKEN_END_RECORD: {
-                    if (expectingRecordField) {
-                        throw new ParseException("Found END_RECORD while expecting a record field.");
-                    }
-                    inRecord = false;
-                    break;
-                }
-                case AdmLexer.TOKEN_STRING_LITERAL: {
-                    // we've read the name of the field
-                    // now read the content
-                    fieldNameBuffer.reset();
-                    fieldValueBuffer.reset();
-                    expectingRecordField = false;
-
-                    if (recType != null) {
-                        String fldName = admLexer.getLastTokenImage().substring(1,
-                                admLexer.getLastTokenImage().length() - 1);
-                        fieldId = recBuilder.getFieldId(fldName);
-                        if (fieldId < 0 && !recType.isOpen()) {
-                            throw new ParseException("This record is closed, you can not add extra fields !!");
-                        } else if (fieldId < 0 && recType.isOpen()) {
-                            aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
-                                    admLexer.getLastTokenImage().length() - 1));
-                            stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
-                            openRecordField = true;
-                            fieldType = null;
-                        } else {
-                            // a closed field
-                            nulls.set(fieldId);
-                            fieldType = recType.getFieldTypes()[fieldId];
-                            openRecordField = false;
-                        }
-                    } else {
-                        aStringFieldName.setValue(
-                                admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1));
-                        stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
-                        openRecordField = true;
-                        fieldType = null;
-                    }
-
-                    token = admLexer.next();
-                    if (token != AdmLexer.TOKEN_COLON) {
-                        throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
-                                + " while expecting \":\".");
-                    }
-
-                    token = admLexer.next();
-                    this.admFromLexerStream(token, fieldType, fieldValueBuffer.getDataOutput(), false);
-                    if (openRecordField) {
-                        if (fieldValueBuffer.getByteArray()[0] != ATypeTag.NULL.serialize()) {
-                            recBuilder.addField(fieldNameBuffer, fieldValueBuffer);
-                        }
-                    } else if (NonTaggedFormatUtil.isOptional(recType)) {
-                        if (fieldValueBuffer.getByteArray()[0] != ATypeTag.NULL.serialize()) {
-                            recBuilder.addField(fieldId, fieldValueBuffer);
-                        }
-                    } else {
-                        recBuilder.addField(fieldId, fieldValueBuffer);
-                    }
-
-                    break;
-                }
-                case AdmLexer.TOKEN_COMMA: {
-                    if (first) {
-                        throw new ParseException("Found COMMA before any record field.");
-                    }
-                    if (expectingRecordField) {
-                        throw new ParseException("Found COMMA while expecting a record field.");
-                    }
-                    expectingRecordField = true;
-                    break;
-                }
-                default: {
-                    throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
-                            + " while parsing record fields.");
-                }
-            }
-            first = false;
-        } while (inRecord);
-
-        if (recType != null) {
-            nullableFieldId = checkNullConstraints(recType, nulls);
-            if (nullableFieldId != -1) {
-                throw new ParseException("Field: " + recType.getFieldNames()[nullableFieldId] + " can not be null");
-            }
-        }
-        recBuilder.write(out, true);
-    }
-
-    private int checkNullConstraints(ARecordType recType, BitSet nulls) {
-        boolean isNull = false;
-        for (int i = 0; i < recType.getFieldTypes().length; i++) {
-            if (nulls.get(i) == false) {
-                IAType type = recType.getFieldTypes()[i];
-                if (type.getTypeTag() != ATypeTag.NULL && type.getTypeTag() != ATypeTag.UNION) {
-                    return i;
-                }
-
-                if (type.getTypeTag() == ATypeTag.UNION) { // union
-                    List<IAType> unionList = ((AUnionType) type).getUnionList();
-                    for (int j = 0; j < unionList.size(); j++) {
-                        if (unionList.get(j).getTypeTag() == ATypeTag.NULL) {
-                            isNull = true;
-                            break;
-                        }
-                    }
-                    if (!isNull) {
-                        return i;
-                    }
-                }
-            }
-        }
-        return -1;
-    }
-
-    private void parseOrderedList(AOrderedListType oltype, DataOutput out)
-            throws IOException, AsterixException, AdmLexerException {
-        ArrayBackedValueStorage itemBuffer = getTempBuffer();
-        OrderedListBuilder orderedListBuilder = (OrderedListBuilder) getOrderedListBuilder();
-
-        IAType itemType = null;
-        if (oltype != null) {
-            itemType = oltype.getItemType();
-        }
-        orderedListBuilder.reset(oltype);
-
-        int token;
-        boolean inList = true;
-        boolean expectingListItem = false;
-        boolean first = true;
-        do {
-            token = admLexer.next();
-            if (token == AdmLexer.TOKEN_END_ORDERED_LIST) {
-                if (expectingListItem) {
-                    throw new ParseException("Found END_COLLECTION while expecting a list item.");
-                }
-                inList = false;
-            } else if (token == AdmLexer.TOKEN_COMMA) {
-                if (first) {
-                    throw new ParseException("Found COMMA before any list item.");
-                }
-                if (expectingListItem) {
-                    throw new ParseException("Found COMMA while expecting a list item.");
-                }
-                expectingListItem = true;
-            } else {
-                expectingListItem = false;
-                itemBuffer.reset();
-
-                admFromLexerStream(token, itemType, itemBuffer.getDataOutput(), false);
-                orderedListBuilder.addItem(itemBuffer);
-            }
-            first = false;
-        } while (inList);
-        orderedListBuilder.write(out, true);
-    }
-
-    private void parseUnorderedList(AUnorderedListType uoltype, DataOutput out)
-            throws IOException, AsterixException, AdmLexerException {
-        ArrayBackedValueStorage itemBuffer = getTempBuffer();
-        UnorderedListBuilder unorderedListBuilder = (UnorderedListBuilder) getUnorderedListBuilder();
-
-        IAType itemType = null;
-
-        if (uoltype != null) {
-            itemType = uoltype.getItemType();
-        }
-        unorderedListBuilder.reset(uoltype);
-
-        int token;
-        boolean inList = true;
-        boolean expectingListItem = false;
-        boolean first = true;
-        do {
-            token = admLexer.next();
-            if (token == AdmLexer.TOKEN_END_RECORD) {
-                if (admLexer.next() == AdmLexer.TOKEN_END_RECORD) {
-                    if (expectingListItem) {
-                        throw new ParseException("Found END_COLLECTION while expecting a list item.");
-                    } else {
-                        inList = false;
-                    }
-                } else {
-                    throw new ParseException("Found END_RECORD while expecting a list item.");
-                }
-            } else if (token == AdmLexer.TOKEN_COMMA) {
-                if (first) {
-                    throw new ParseException("Found COMMA before any list item.");
-                }
-                if (expectingListItem) {
-                    throw new ParseException("Found COMMA while expecting a list item.");
-                }
-                expectingListItem = true;
-            } else {
-                expectingListItem = false;
-                itemBuffer.reset();
-                admFromLexerStream(token, itemType, itemBuffer.getDataOutput(), false);
-                unorderedListBuilder.addItem(itemBuffer);
-            }
-            first = false;
-        } while (inList);
-        unorderedListBuilder.write(out, true);
-    }
-
-    private IARecordBuilder getRecordBuilder() {
-        return recordBuilderPool.allocate(ATypeTag.RECORD);
-    }
-
-    private IAsterixListBuilder getOrderedListBuilder() {
-        return listBuilderPool.allocate(ATypeTag.ORDEREDLIST);
-    }
-
-    private IAsterixListBuilder getUnorderedListBuilder() {
-        return listBuilderPool.allocate(ATypeTag.UNORDEREDLIST);
-    }
-
-    private ArrayBackedValueStorage getTempBuffer() {
-        return (ArrayBackedValueStorage) abvsBuilderPool.allocate(ATypeTag.BINARY);
-    }
-
-    private void parseToBinaryTarget(int lexerToken, String tokenImage, DataOutput out)
-            throws ParseException, HyracksDataException {
-        switch (lexerToken) {
-            case AdmLexer.TOKEN_HEX_CONS: {
-                parseHexBinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
-                break;
-            }
-            case AdmLexer.TOKEN_BASE64_CONS: {
-                parseBase64BinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
-                break;
-            }
-        }
-    }
-
-    private void parseToNumericTarget(ATypeTag typeTag, IAType objectType, DataOutput out)
-            throws AsterixException, IOException {
-        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
-        if (targetTypeTag == null || !parseValue(admLexer.getLastTokenImage(), targetTypeTag, out)) {
-            throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
-        }
-    }
-
-    private void parseAndCastNumeric(ATypeTag typeTag, IAType objectType, DataOutput out)
-            throws AsterixException, IOException {
-        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
-        DataOutput dataOutput = out;
-        if (targetTypeTag != typeTag) {
-            castBuffer.reset();
-            dataOutput = castBuffer.getDataOutput();
-        }
-
-        if (targetTypeTag == null || !parseValue(admLexer.getLastTokenImage(), typeTag, dataOutput)) {
-            throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
-        }
-
-        // If two type tags are not the same, either we try to promote or demote source type to the target type
-        if (targetTypeTag != typeTag) {
-            if (ATypeHierarchy.canPromote(typeTag, targetTypeTag)) {
-                // can promote typeTag to targetTypeTag
-                ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag, targetTypeTag);
-                if (promoteComputer == null) {
-                    throw new AsterixException(
-                            "Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
-                }
-                // do the promotion; note that the type tag field should be skipped
-                promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
-                        castBuffer.getLength() - 1, out);
-            } else if (ATypeHierarchy.canDemote(typeTag, targetTypeTag)) {
-                //can demote source type to the target type
-                ITypeConvertComputer demoteComputer = ATypeHierarchy.getTypeDemoteComputer(typeTag, targetTypeTag);
-                if (demoteComputer == null) {
-                    throw new AsterixException(
-                            "Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
-                }
-                // do the demotion; note that the type tag field should be skipped
-                demoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
-                        castBuffer.getLength() - 1, out);
-            }
-        }
-    }
-
-    private void parseConstructor(ATypeTag typeTag, IAType objectType, DataOutput out)
-            throws AsterixException, AdmLexerException, IOException {
-        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
-        if (targetTypeTag != null) {
-            DataOutput dataOutput = out;
-            if (targetTypeTag != typeTag) {
-                castBuffer.reset();
-                dataOutput = castBuffer.getDataOutput();
-            }
-            int token = admLexer.next();
-            if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
-                token = admLexer.next();
-                if (token == AdmLexer.TOKEN_STRING_LITERAL) {
-                    final String unquoted = admLexer.getLastTokenImage().substring(1,
-                            admLexer.getLastTokenImage().length() - 1);
-                    if (!parseValue(unquoted, typeTag, dataOutput)) {
-                        throw new ParseException("Missing deserializer method for constructor: "
-                                + AdmLexer.tokenKindToString(token) + ".");
-                    }
-                    token = admLexer.next();
-                    if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
-                        if (targetTypeTag != typeTag) {
-                            ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag,
-                                    targetTypeTag);
-                            // the availability if the promote computer should be consistent with the availability of a target type
-                            assert promoteComputer != null;
-                            // do the promotion; note that the type tag field should be skipped
-                            promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
-                                    castBuffer.getLength() - 1, out);
-                        }
-                        return;
-                    }
-                }
-            }
-        }
-        throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + ". Got " + typeTag + " instead.");
-    }
-
-    private boolean parseValue(final String unquoted, ATypeTag typeTag, DataOutput out)
-            throws AsterixException, HyracksDataException, IOException {
-        switch (typeTag) {
-            case BOOLEAN:
-                parseBoolean(unquoted, out);
-                return true;
-            case INT8:
-                parseInt8(unquoted, out);
-                return true;
-            case INT16:
-                parseInt16(unquoted, out);
-                return true;
-            case INT32:
-                parseInt32(unquoted, out);
-                return true;
-            case INT64:
-                parseInt64(unquoted, out);
-                return true;
-            case FLOAT:
-                aFloat.setValue(Float.parseFloat(unquoted));
-                floatSerde.serialize(aFloat, out);
-                return true;
-            case DOUBLE:
-                aDouble.setValue(Double.parseDouble(unquoted));
-                doubleSerde.serialize(aDouble, out);
-                return true;
-            case STRING:
-                aString.setValue(unquoted);
-                stringSerde.serialize(aString, out);
-                return true;
-            case TIME:
-                parseTime(unquoted, out);
-                return true;
-            case DATE:
-                parseDate(unquoted, out);
-                return true;
-            case DATETIME:
-                parseDateTime(unquoted, out);
-                return true;
-            case DURATION:
-                parseDuration(unquoted, out);
-                return true;
-            case DAYTIMEDURATION:
-                parseDateTimeDuration(unquoted, out);
-                return true;
-            case YEARMONTHDURATION:
-                parseYearMonthDuration(unquoted, out);
-                return true;
-            case POINT:
-                parsePoint(unquoted, out);
-                return true;
-            case POINT3D:
-                parse3DPoint(unquoted, out);
-                return true;
-            case CIRCLE:
-                parseCircle(unquoted, out);
-                return true;
-            case RECTANGLE:
-                parseRectangle(unquoted, out);
-                return true;
-            case LINE:
-                parseLine(unquoted, out);
-                return true;
-            case POLYGON:
-                APolygonSerializerDeserializer.parse(unquoted, out);
-                return true;
-            case UUID:
-                aUUID.fromStringToAMuatbleUUID(unquoted);
-                uuidSerde.serialize(aUUID, out);
-                return true;
-            default:
-                return false;
-        }
-    }
-
-    private void parseBoolean(String bool, DataOutput out) throws AsterixException, HyracksDataException {
-        String errorMessage = "This can not be an instance of boolean";
-        if (bool.equals("true")) {
-            booleanSerde.serialize(ABoolean.TRUE, out);
-        } else if (bool.equals("false")) {
-            booleanSerde.serialize(ABoolean.FALSE, out);
-        } else {
-            throw new ParseException(errorMessage);
-        }
-    }
-
-    private void parseInt8(String int8, DataOutput out) throws AsterixException, HyracksDataException {
-        String errorMessage = "This can not be an instance of int8";
-        boolean positive = true;
-        byte value = 0;
-        int offset = 0;
-
-        if (int8.charAt(offset) == '+') {
-            offset++;
-        } else if (int8.charAt(offset) == '-') {
-            offset++;
-            positive = false;
-        }
-        for (; offset < int8.length(); offset++) {
-            if (int8.charAt(offset) >= '0' && int8.charAt(offset) <= '9') {
-                value = (byte) (value * 10 + int8.charAt(offset) - '0');
-            } else if (int8.charAt(offset) == 'i' && int8.charAt(offset + 1) == '8' && offset + 2 == int8.length()) {
-                break;
-            } else {
-                throw new ParseException(errorMessage);
-            }
-        }
-        if (value < 0) {
-            throw new ParseException(errorMessage);
-        }
-        if (value > 0 && !positive) {
-            value *= -1;
-        }
-        aInt8.setValue(value);
-        int8Serde.serialize(aInt8, out);
-    }
-
-    private void parseInt16(String int16, DataOutput out) throws AsterixException, HyracksDataException {
-        String errorMessage = "This can not be an instance of int16";
-        boolean positive = true;
-        short value = 0;
-        int offset = 0;
-
-        if (int16.charAt(offset) == '+') {
-            offset++;
-        } else if (int16.charAt(offset) == '-') {
-            offset++;
-            positive = false;
-        }
-        for (; offset < int16.length(); offset++) {
-            if (int16.charAt(offset) >= '0' && int16.charAt(offset) <= '9') {
-                value = (short) (value * 10 + int16.charAt(offset) - '0');
-            } else if (int16.charAt(offset) == 'i' && int16.charAt(offset + 1) == '1' && int16.charAt(offset + 2) == '6'
-                    && offset + 3 == int16.length()) {
-                break;
-            } else {
-                throw new ParseException(errorMessage);
-            }
-        }
-        if (value < 0) {
-            throw new ParseException(errorMessage);
-        }
-        if (value > 0 && !positive) {
-            value *= -1;
-        }
-        aInt16.setValue(value);
-        int16Serde.serialize(aInt16, out);
-    }
-
-    private void parseInt32(String int32, DataOutput out) throws AsterixException, HyracksDataException {
-        String errorMessage = "This can not be an instance of int32";
-        boolean positive = true;
-        int value = 0;
-        int offset = 0;
-
-        if (int32.charAt(offset) == '+') {
-            offset++;
-        } else if (int32.charAt(offset) == '-') {
-            offset++;
-            positive = false;
-        }
-        for (; offset < int32.length(); offset++) {
-            if (int32.charAt(offset) >= '0' && int32.charAt(offset) <= '9') {
-                value = (value * 10 + int32.charAt(offset) - '0');
-            } else if (int32.charAt(offset) == 'i' && int32.charAt(offset + 1) == '3' && int32.charAt(offset + 2) == '2'
-                    && offset + 3 == int32.length()) {
-                break;
-            } else {
-                throw new ParseException(errorMessage);
-            }
-        }
-        if (value < 0) {
-            throw new ParseException(errorMessage);
-        }
-        if (value > 0 && !positive) {
-            value *= -1;
-        }
-
-        aInt32.setValue(value);
-        int32Serde.serialize(aInt32, out);
-    }
-
-    private void parseInt64(String int64, DataOutput out) throws AsterixException, HyracksDataException {
-        String errorMessage = "This can not be an instance of int64";
-        boolean positive = true;
-        long value = 0;
-        int offset = 0;
-
-        if (int64.charAt(offset) == '+') {
-            offset++;
-        } else if (int64.charAt(offset) == '-') {
-            offset++;
-            positive = false;
-        }
-        for (; offset < int64.length(); offset++) {
-            if (int64.charAt(offset) >= '0' && int64.charAt(offset) <= '9') {
-                value = (value * 10 + int64.charAt(offset) - '0');
-            } else if (int64.charAt(offset) == 'i' && int64.charAt(offset + 1) == '6' && int64.charAt(offset + 2) == '4'
-                    && offset + 3 == int64.length()) {
-                break;
-            } else {
-                throw new ParseException(errorMessage);
-            }
-        }
-        if (value < 0) {
-            throw new ParseException(errorMessage);
-        }
-        if (value > 0 && !positive) {
-            value *= -1;
-        }
-
-        aInt64.setValue(value);
-        int64Serde.serialize(aInt64, out);
-    }
-
-    /**
-     * Resets the pools before parsing a top-level record.
-     * In this way the elements in those pools can be re-used.
-     */
-    private void resetPools() {
-        listBuilderPool.reset();
-        recordBuilderPool.reset();
-        abvsBuilderPool.reset();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractDataParser.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractDataParser.java
deleted file mode 100644
index 794097f..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractDataParser.java
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.io.DataOutput;
-
-import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABinary;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.base.ACircle;
-import org.apache.asterix.om.base.ADate;
-import org.apache.asterix.om.base.ADateTime;
-import org.apache.asterix.om.base.ADayTimeDuration;
-import org.apache.asterix.om.base.ADouble;
-import org.apache.asterix.om.base.ADuration;
-import org.apache.asterix.om.base.AFloat;
-import org.apache.asterix.om.base.AInt16;
-import org.apache.asterix.om.base.AInt32;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.base.AInt8;
-import org.apache.asterix.om.base.AInterval;
-import org.apache.asterix.om.base.ALine;
-import org.apache.asterix.om.base.AMutableBinary;
-import org.apache.asterix.om.base.AMutableCircle;
-import org.apache.asterix.om.base.AMutableDate;
-import org.apache.asterix.om.base.AMutableDateTime;
-import org.apache.asterix.om.base.AMutableDayTimeDuration;
-import org.apache.asterix.om.base.AMutableDouble;
-import org.apache.asterix.om.base.AMutableDuration;
-import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.om.base.AMutableInt16;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.base.AMutableInt8;
-import org.apache.asterix.om.base.AMutableInterval;
-import org.apache.asterix.om.base.AMutableLine;
-import org.apache.asterix.om.base.AMutablePoint;
-import org.apache.asterix.om.base.AMutablePoint3D;
-import org.apache.asterix.om.base.AMutableRectangle;
-import org.apache.asterix.om.base.AMutableString;
-import org.apache.asterix.om.base.AMutableTime;
-import org.apache.asterix.om.base.AMutableUUID;
-import org.apache.asterix.om.base.AMutableYearMonthDuration;
-import org.apache.asterix.om.base.ANull;
-import org.apache.asterix.om.base.APoint;
-import org.apache.asterix.om.base.APoint3D;
-import org.apache.asterix.om.base.ARectangle;
-import org.apache.asterix.om.base.AString;
-import org.apache.asterix.om.base.ATime;
-import org.apache.asterix.om.base.AUUID;
-import org.apache.asterix.om.base.AYearMonthDuration;
-import org.apache.asterix.om.base.temporal.ADateParserFactory;
-import org.apache.asterix.om.base.temporal.ADurationParserFactory;
-import org.apache.asterix.om.base.temporal.ADurationParserFactory.ADurationParseOption;
-import org.apache.asterix.om.base.temporal.ATimeParserFactory;
-import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.util.bytes.Base64Parser;
-import org.apache.hyracks.util.bytes.HexParser;
-
-/**
- * Base class for data parsers. Includes the common set of definitions for
- * serializers/deserializers for built-in ADM types.
- */
-public abstract class AbstractDataParser implements IDataParser {
-
-    protected AMutableInt8 aInt8 = new AMutableInt8((byte) 0);
-    protected AMutableInt16 aInt16 = new AMutableInt16((short) 0);
-    protected AMutableInt32 aInt32 = new AMutableInt32(0);
-    protected AMutableInt64 aInt64 = new AMutableInt64(0);
-    protected AMutableDouble aDouble = new AMutableDouble(0);
-    protected AMutableFloat aFloat = new AMutableFloat(0);
-    protected AMutableString aString = new AMutableString("");
-    protected AMutableBinary aBinary = new AMutableBinary(null, 0, 0);
-    protected AMutableString aStringFieldName = new AMutableString("");
-    protected AMutableUUID aUUID = new AMutableUUID(0, 0);
-    // For temporal and spatial data types
-    protected AMutableTime aTime = new AMutableTime(0);
-    protected AMutableDateTime aDateTime = new AMutableDateTime(0L);
-    protected AMutableDuration aDuration = new AMutableDuration(0, 0);
-    protected AMutableDayTimeDuration aDayTimeDuration = new AMutableDayTimeDuration(0);
-    protected AMutableYearMonthDuration aYearMonthDuration = new AMutableYearMonthDuration(0);
-    protected AMutablePoint aPoint = new AMutablePoint(0, 0);
-    protected AMutablePoint3D aPoint3D = new AMutablePoint3D(0, 0, 0);
-    protected AMutableCircle aCircle = new AMutableCircle(null, 0);
-    protected AMutableRectangle aRectangle = new AMutableRectangle(null, null);
-    protected AMutablePoint aPoint2 = new AMutablePoint(0, 0);
-    protected AMutableLine aLine = new AMutableLine(null, null);
-    protected AMutableDate aDate = new AMutableDate(0);
-    protected final AMutableInterval aInterval = new AMutableInterval(0L, 0L, (byte) 0);
-
-    // Serializers
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<ADouble> doubleSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADOUBLE);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AString> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ASTRING);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<ABinary> binarySerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ABINARY);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AFloat> floatSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AFLOAT);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AInt8> int8Serde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT8);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AInt16> int16Serde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT16);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AInt32> int32Serde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT32);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AInt64> int64Serde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT64);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<ABoolean> booleanSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ABOOLEAN);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<ANull> nullSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ANULL);
-
-    protected final HexParser hexParser = new HexParser();
-    protected final Base64Parser base64Parser = new Base64Parser();
-
-    // For UUID, we assume that the format is the string representation of UUID
-    // (xxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx) when parsing the data.
-    // Thus, we need to call UUID.fromStringToAMuatbleUUID() to convert it to the internal representation (two long values).
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AUUID> uuidSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AUUID);
-
-    // To avoid race conditions, the serdes for temporal and spatial data types needs to be one per parser
-    // ^^^^^^^^^^^^^^^^^^^^^^^^ ??? then why all these serdes are static?
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<ATime> timeSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ATIME);
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<ADate> dateSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADATE);
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<ADateTime> datetimeSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADATETIME);
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<ADuration> durationSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADURATION);
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<ADayTimeDuration> dayTimeDurationSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADAYTIMEDURATION);
-    @SuppressWarnings("unchecked")
-    protected static final ISerializerDeserializer<AYearMonthDuration> yearMonthDurationSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AYEARMONTHDURATION);
-    @SuppressWarnings("unchecked")
-    protected final static ISerializerDeserializer<APoint> pointSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.APOINT);
-    @SuppressWarnings("unchecked")
-    protected final static ISerializerDeserializer<APoint3D> point3DSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.APOINT3D);
-    @SuppressWarnings("unchecked")
-    protected final static ISerializerDeserializer<ACircle> circleSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ACIRCLE);
-    @SuppressWarnings("unchecked")
-    protected final static ISerializerDeserializer<ARectangle> rectangleSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ARECTANGLE);
-    @SuppressWarnings("unchecked")
-    protected final static ISerializerDeserializer<ALine> lineSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ALINE);
-    @SuppressWarnings("unchecked")
-    private static final ISerializerDeserializer<AInterval> intervalSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINTERVAL);
-
-    protected String filename;
-
-    void setFilename(String filename) {
-        this.filename = filename;
-    }
-
-    protected void parseTime(String time, DataOutput out) throws HyracksDataException {
-        int chrononTimeInMs;
-        try {
-            chrononTimeInMs = ATimeParserFactory.parseTimePart(time, 0, time.length());
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-        aTime.setValue(chrononTimeInMs);
-        timeSerde.serialize(aTime, out);
-    }
-
-    protected void parseDate(String date, DataOutput out) throws HyracksDataException {
-        long chrononTimeInMs = 0;
-        try {
-            chrononTimeInMs = ADateParserFactory.parseDatePart(date, 0, date.length());
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-        short temp = 0;
-        if (chrononTimeInMs < 0 && chrononTimeInMs % GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
-            temp = 1;
-        }
-        aDate.setValue((int) (chrononTimeInMs / GregorianCalendarSystem.CHRONON_OF_DAY) - temp);
-        dateSerde.serialize(aDate, out);
-    }
-
-    protected void parseDateTime(String datetime, DataOutput out) throws HyracksDataException {
-        long chrononTimeInMs = 0;
-        try {
-            // +1 if it is negative (-)
-            short timeOffset = (short) ((datetime.charAt(0) == '-') ? 1 : 0);
-
-            timeOffset += 8;
-
-            if (datetime.charAt(timeOffset) != 'T') {
-                timeOffset += 2;
-                if (datetime.charAt(timeOffset) != 'T') {
-                    throw new AlgebricksException("This can not be an instance of datetime: missing T");
-                }
-            }
-            chrononTimeInMs = ADateParserFactory.parseDatePart(datetime, 0, timeOffset);
-            chrononTimeInMs += ATimeParserFactory.parseTimePart(datetime, timeOffset + 1,
-                    datetime.length() - timeOffset - 1);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-        aDateTime.setValue(chrononTimeInMs);
-        datetimeSerde.serialize(aDateTime, out);
-    }
-
-    protected void parseDuration(String duration, DataOutput out) throws HyracksDataException {
-        try {
-            ADurationParserFactory.parseDuration(duration, 0, duration.length(), aDuration, ADurationParseOption.All);
-            durationSerde.serialize(aDuration, out);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected void parseDateTimeDuration(String durationString, DataOutput out) throws HyracksDataException {
-        try {
-            ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aDayTimeDuration,
-                    ADurationParseOption.All);
-            dayTimeDurationSerde.serialize(aDayTimeDuration, out);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected void parseYearMonthDuration(String durationString, DataOutput out) throws HyracksDataException {
-        try {
-            ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aYearMonthDuration,
-                    ADurationParseOption.All);
-            yearMonthDurationSerde.serialize(aYearMonthDuration, out);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected void parsePoint(String point, DataOutput out) throws HyracksDataException {
-        try {
-            aPoint.setValue(Double.parseDouble(point.substring(0, point.indexOf(','))),
-                    Double.parseDouble(point.substring(point.indexOf(',') + 1, point.length())));
-            pointSerde.serialize(aPoint, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(point + " can not be an instance of point");
-        }
-    }
-
-    protected void parse3DPoint(String point3d, DataOutput out) throws HyracksDataException {
-        try {
-            int firstCommaIndex = point3d.indexOf(',');
-            int secondCommaIndex = point3d.indexOf(',', firstCommaIndex + 1);
-            aPoint3D.setValue(Double.parseDouble(point3d.substring(0, firstCommaIndex)),
-                    Double.parseDouble(point3d.substring(firstCommaIndex + 1, secondCommaIndex)),
-                    Double.parseDouble(point3d.substring(secondCommaIndex + 1, point3d.length())));
-            point3DSerde.serialize(aPoint3D, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(point3d + " can not be an instance of point3d");
-        }
-    }
-
-    protected void parseCircle(String circle, DataOutput out) throws HyracksDataException {
-        try {
-            String[] parts = circle.split(" ");
-            aPoint.setValue(Double.parseDouble(parts[0].split(",")[0]), Double.parseDouble(parts[0].split(",")[1]));
-            aCircle.setValue(aPoint, Double.parseDouble(parts[1].substring(0, parts[1].length())));
-            circleSerde.serialize(aCircle, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(circle + " can not be an instance of circle");
-        }
-    }
-
-    protected void parseRectangle(String rectangle, DataOutput out) throws HyracksDataException {
-        try {
-            String[] points = rectangle.split(" ");
-            if (points.length != 2) {
-                throw new HyracksDataException("rectangle consists of only 2 points.");
-            }
-            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
-            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
-            if (aPoint.getX() > aPoint2.getX() && aPoint.getY() > aPoint2.getY()) {
-                aRectangle.setValue(aPoint2, aPoint);
-            } else if (aPoint.getX() < aPoint2.getX() && aPoint.getY() < aPoint2.getY()) {
-                aRectangle.setValue(aPoint, aPoint2);
-            } else {
-                throw new IllegalArgumentException(
-                        "Rectangle arugment must be either (bottom left point, top right point) or (top right point, bottom left point)");
-            }
-            rectangleSerde.serialize(aRectangle, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(rectangle + " can not be an instance of rectangle");
-        }
-    }
-
-    protected void parseLine(String line, DataOutput out) throws HyracksDataException {
-        try {
-            String[] points = line.split(" ");
-            if (points.length != 2) {
-                throw new HyracksDataException("line consists of only 2 points.");
-            }
-            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
-            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
-            aLine.setValue(aPoint, aPoint2);
-            lineSerde.serialize(aLine, out);
-        } catch (HyracksDataException e) {
-            throw new HyracksDataException(line + " can not be an instance of line");
-        }
-    }
-
-    protected void parseHexBinaryString(char[] input, int start, int length, DataOutput out)
-            throws HyracksDataException {
-        hexParser.generateByteArrayFromHexString(input, start, length);
-        aBinary.setValue(hexParser.getByteArray(), 0, hexParser.getLength());
-        binarySerde.serialize(aBinary, out);
-    }
-
-    protected void parseBase64BinaryString(char[] input, int start, int length, DataOutput out)
-            throws HyracksDataException {
-        base64Parser.generatePureByteArrayFromBase64String(input, start, length);
-        aBinary.setValue(base64Parser.getByteArray(), 0, base64Parser.getLength());
-        binarySerde.serialize(aBinary, out);
-    }
-
-    protected void parseDateTimeInterval(String interval, DataOutput out) throws HyracksDataException {
-        long chrononTimeInMsStart = 0;
-        long chrononTimeInMsEnd = 0;
-        try {
-            // the starting point for parsing (so for the accessor)
-            int startOffset = 0;
-            int endOffset, timeSeperatorOffsetInDatetimeString;
-
-            // Get the index for the comma
-            int commaIndex = interval.indexOf(',');
-            if (commaIndex < 1) {
-                throw new AlgebricksException("comma is missing for a string of interval");
-            }
-
-            endOffset = commaIndex - 1;
-            timeSeperatorOffsetInDatetimeString = interval.indexOf('T');
-
-            if (timeSeperatorOffsetInDatetimeString < 0) {
-                throw new AlgebricksException(
-                        "This can not be an instance of interval: missing T for a datetime value.");
-            }
-
-            chrononTimeInMsStart = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
-
-            chrononTimeInMsStart += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
-
-            // Interval End
-            startOffset = commaIndex + 1;
-            endOffset = interval.length() - 1;
-
-            timeSeperatorOffsetInDatetimeString = interval.indexOf('T', startOffset);
-
-            if (timeSeperatorOffsetInDatetimeString < 0) {
-                throw new AlgebricksException(
-                        "This can not be an instance of interval: missing T for a datetime value.");
-            }
-
-            chrononTimeInMsEnd = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
-
-            chrononTimeInMsEnd += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-
-        try {
-            aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.DATETIME.serialize());
-        } catch (AlgebricksException e) {
-            throw new HyracksDataException(e);
-        }
-
-        intervalSerde.serialize(aInterval, out);
-    }
-
-    protected void parseTimeInterval(String interval, DataOutput out) throws HyracksDataException {
-        long chrononTimeInMsStart = 0;
-        long chrononTimeInMsEnd = 0;
-        try {
-            int startOffset = 0;
-            int endOffset;
-
-            // Get the index for the comma
-            int commaIndex = interval.indexOf(',');
-            if (commaIndex < 0) {
-                throw new AlgebricksException("comma is missing for a string of interval");
-            }
-
-            endOffset = commaIndex - 1;
-            // Interval Start
-            chrononTimeInMsStart = parseTimePart(interval, startOffset, endOffset);
-
-            if (chrononTimeInMsStart < 0) {
-                chrononTimeInMsStart += GregorianCalendarSystem.CHRONON_OF_DAY;
-            }
-
-            // Interval End
-            startOffset = commaIndex + 1;
-            endOffset = interval.length() - 1;
-
-            chrononTimeInMsEnd = parseTimePart(interval, startOffset, endOffset);
-            if (chrononTimeInMsEnd < 0) {
-                chrononTimeInMsEnd += GregorianCalendarSystem.CHRONON_OF_DAY;
-            }
-
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-
-        try {
-            aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.TIME.serialize());
-        } catch (AlgebricksException e) {
-            throw new HyracksDataException(e);
-        }
-        intervalSerde.serialize(aInterval, out);
-    }
-
-    protected void parseDateInterval(String interval, DataOutput out) throws HyracksDataException {
-        long chrononTimeInMsStart = 0;
-        long chrononTimeInMsEnd = 0;
-        try {
-            // the starting point for parsing (so for the accessor)
-            int startOffset = 0;
-            int endOffset;
-
-            // Get the index for the comma
-            int commaIndex = interval.indexOf(',');
-            if (commaIndex < 1) {
-                throw new AlgebricksException("comma is missing for a string of interval");
-            }
-
-            endOffset = commaIndex - 1;
-            chrononTimeInMsStart = parseDatePart(interval, startOffset, endOffset);
-
-            // Interval End
-            startOffset = commaIndex + 1;
-            endOffset = interval.length() - 1;
-
-            chrononTimeInMsEnd = parseDatePart(interval, startOffset, endOffset);
-
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-
-        try {
-            aInterval.setValue((chrononTimeInMsStart / GregorianCalendarSystem.CHRONON_OF_DAY),
-                    (chrononTimeInMsEnd / GregorianCalendarSystem.CHRONON_OF_DAY), ATypeTag.DATE.serialize());
-        } catch (AlgebricksException e) {
-            throw new HyracksDataException(e);
-        }
-        intervalSerde.serialize(aInterval, out);
-    }
-
-    private long parseDatePart(String interval, int startOffset, int endOffset)
-            throws AlgebricksException, HyracksDataException {
-
-        while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
-            endOffset--;
-        }
-
-        while (interval.charAt(startOffset) == '"' || interval.charAt(startOffset) == ' ') {
-            startOffset++;
-        }
-
-        return ADateParserFactory.parseDatePart(interval, startOffset, endOffset - startOffset + 1);
-    }
-
-    private int parseTimePart(String interval, int startOffset, int endOffset)
-            throws AlgebricksException, HyracksDataException {
-
-        while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
-            endOffset--;
-        }
-
-        while (interval.charAt(startOffset) == '"' || interval.charAt(startOffset) == ' ') {
-            startOffset++;
-        }
-
-        return ATimeParserFactory.parseTimePart(interval, startOffset, endOffset - startOffset + 1);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractTupleParser.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractTupleParser.java
deleted file mode 100644
index f3199e9..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AbstractTupleParser.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-
-/**
- * An abstract class implementation for ITupleParser. It provides common
- * functionality involved in parsing data in an external format and packing
- * frames with formed tuples.
- */
-public abstract class AbstractTupleParser implements ITupleParser {
-
-    protected static Logger LOGGER = Logger.getLogger(AbstractTupleParser.class.getName());
-
-    protected ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
-    protected DataOutput dos = tb.getDataOutput();
-    protected final ARecordType recType;
-    protected final IHyracksCommonContext ctx;
-
-    public AbstractTupleParser(IHyracksCommonContext ctx, ARecordType recType) throws HyracksDataException {
-        this.recType = recType;
-        this.ctx = ctx;
-    }
-
-    public abstract IDataParser getDataParser();
-
-    public abstract ITupleForwardPolicy getTupleParserPolicy();
-
-    @Override
-    public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
-        IDataParser parser = getDataParser();
-        ITupleForwardPolicy policy = getTupleParserPolicy();
-        try {
-            parser.initialize(in, recType, true);
-            policy.initialize(ctx, writer);
-            while (true) {
-                tb.reset();
-                if (!parser.parse(tb.getDataOutput())) {
-                    break;
-                }
-                tb.addFieldEndOffset();
-                policy.addTuple(tb);
-            }
-            policy.close();
-        } catch (AsterixException ae) {
-            throw new HyracksDataException(ae);
-        } catch (IOException ioe) {
-            throw new HyracksDataException(ioe);
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AsterixTupleParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AsterixTupleParserFactory.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AsterixTupleParserFactory.java
deleted file mode 100644
index 2053a75..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/AsterixTupleParserFactory.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.FeedPolicyAccessor;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.common.parse.ITupleForwardPolicy.TupleForwardPolicyType;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.data.parsers.DoubleParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.LongParserFactory;
-import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public class AsterixTupleParserFactory implements ITupleParserFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    public static enum InputDataFormat {
-        ADM,
-        DELIMITED,
-        UNKNOWN
-    }
-
-    public static final String HAS_HEADER = "has.header";
-    public static final String KEY_FORMAT = "format";
-    public static final String FORMAT_ADM = "adm";
-    public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
-    public static final String FORMAT_BINARY = "binary";
-
-    public static final String KEY_PATH = "path";
-    public static final String KEY_SOURCE_DATATYPE = "type-name";
-    public static final String KEY_DELIMITER = "delimiter";
-    public static final String KEY_PARSER_FACTORY = "parser";
-    public static final String KEY_HEADER = "header";
-    public static final String KEY_QUOTE = "quote";
-    public static final String TIME_TRACKING = "time.tracking";
-    public static final String DEFAULT_QUOTE = "\"";
-    public static final String AT_LEAST_ONE_SEMANTICS = FeedPolicyAccessor.AT_LEAST_ONE_SEMANTICS;
-    public static final String NODE_RESOLVER_FACTORY_PROPERTY = "node.Resolver";
-    public static final String DEFAULT_DELIMITER = ",";
-
-    private static Map<ATypeTag, IValueParserFactory> valueParserFactoryMap = initializeValueParserFactoryMap();
-
-    private static Map<ATypeTag, IValueParserFactory> initializeValueParserFactoryMap() {
-        Map<ATypeTag, IValueParserFactory> m = new HashMap<ATypeTag, IValueParserFactory>();
-        m.put(ATypeTag.INT32, IntegerParserFactory.INSTANCE);
-        m.put(ATypeTag.FLOAT, FloatParserFactory.INSTANCE);
-        m.put(ATypeTag.DOUBLE, DoubleParserFactory.INSTANCE);
-        m.put(ATypeTag.INT64, LongParserFactory.INSTANCE);
-        m.put(ATypeTag.STRING, UTF8StringParserFactory.INSTANCE);
-        return m;
-    }
-
-    private final ARecordType recordType;
-    private final Map<String, String> configuration;
-    private final InputDataFormat inputDataFormat;
-
-    public AsterixTupleParserFactory(Map<String, String> configuration, ARecordType recType, InputDataFormat dataFormat) {
-        this.recordType = recType;
-        this.configuration = configuration;
-        this.inputDataFormat = dataFormat;
-    }
-
-    @Override
-    public ITupleParser createTupleParser(IHyracksCommonContext ctx) throws HyracksDataException {
-        ITupleParser tupleParser = null;
-        try {
-            String parserFactoryClassname = (String) configuration.get(KEY_PARSER_FACTORY);
-            ITupleParserFactory parserFactory = null;
-            if (parserFactoryClassname != null) {
-                parserFactory = (ITupleParserFactory) Class.forName(parserFactoryClassname).newInstance();
-                tupleParser = parserFactory.createTupleParser(ctx);
-            } else {
-                IDataParser dataParser = null;
-                dataParser = createDataParser(ctx);
-                ITupleForwardPolicy policy = getTupleParserPolicy(configuration);
-                policy.configure(configuration);
-                tupleParser = new GenericTupleParser(ctx, recordType, dataParser, policy);
-            }
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-        return tupleParser;
-    }
-
-    private static class GenericTupleParser extends AbstractTupleParser {
-
-        private final IDataParser dataParser;
-
-        private final ITupleForwardPolicy policy;
-
-        public GenericTupleParser(IHyracksCommonContext ctx, ARecordType recType, IDataParser dataParser,
-                ITupleForwardPolicy policy) throws HyracksDataException {
-            super(ctx, recType);
-            this.dataParser = dataParser;
-            this.policy = policy;
-        }
-
-        @Override
-        public IDataParser getDataParser() {
-            return dataParser;
-        }
-
-        @Override
-        public ITupleForwardPolicy getTupleParserPolicy() {
-            return policy;
-        }
-
-    }
-
-    private IDataParser createDataParser(IHyracksCommonContext ctx) throws Exception {
-        IDataParser dataParser = null;
-        switch (inputDataFormat) {
-            case ADM:
-                dataParser = new ADMDataParser();
-                break;
-            case DELIMITED:
-                dataParser = configureDelimitedDataParser(ctx);
-                break;
-            case UNKNOWN:
-                String specifiedFormat = (String) configuration.get(KEY_FORMAT);
-                if (specifiedFormat == null) {
-                    throw new IllegalArgumentException(" Unspecified data format");
-                } else {
-                    if (FORMAT_ADM.equalsIgnoreCase(specifiedFormat.toUpperCase())) {
-                        dataParser = new ADMDataParser();
-                    } else if (FORMAT_DELIMITED_TEXT.equalsIgnoreCase(specifiedFormat.toUpperCase())) {
-                        dataParser = configureDelimitedDataParser(ctx);
-                    } else {
-                        throw new IllegalArgumentException(" format " + configuration.get(KEY_FORMAT)
-                                + " not supported");
-                    }
-                }
-        }
-        return dataParser;
-    }
-
-    public static ITupleForwardPolicy getTupleParserPolicy(Map<String, String> configuration) {
-        ITupleForwardPolicy policy = null;
-        ITupleForwardPolicy.TupleForwardPolicyType policyType = null;
-        String propValue = configuration.get(ITupleForwardPolicy.PARSER_POLICY);
-        if (propValue == null) {
-            policyType = TupleForwardPolicyType.FRAME_FULL;
-        } else {
-            policyType = TupleForwardPolicyType.valueOf(propValue.trim().toUpperCase());
-        }
-        switch (policyType) {
-            case FRAME_FULL:
-                policy = new FrameFullTupleForwardPolicy();
-                break;
-            case COUNTER_TIMER_EXPIRED:
-                policy = new CounterTimerTupleForwardPolicy();
-                break;
-            case RATE_CONTROLLED:
-                policy = new RateControlledTupleForwardPolicy();
-                break;
-        }
-        return policy;
-    }
-
-    private IDataParser configureDelimitedDataParser(IHyracksCommonContext ctx) throws AsterixException {
-        IValueParserFactory[] valueParserFactories = getValueParserFactories();
-        Character delimiter = getDelimiter(configuration);
-        char quote = getQuote(configuration, delimiter);
-        boolean hasHeader = hasHeader();
-        return new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, hasHeader);
-    }
-  
-
-    private boolean hasHeader() {
-        String value = configuration.get(KEY_HEADER);
-        if (value != null) {
-            return Boolean.valueOf(value);
-        }
-        return false;
-    }
-
-    private IValueParserFactory[] getValueParserFactories() {
-        int n = recordType.getFieldTypes().length;
-        IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
-        for (int i = 0; i < n; i++) {
-            ATypeTag tag = null;
-            if (recordType.getFieldTypes()[i].getTypeTag() == ATypeTag.UNION) {
-                List<IAType> unionTypes = ((AUnionType) recordType.getFieldTypes()[i]).getUnionList();
-                if (unionTypes.size() != 2 && unionTypes.get(0).getTypeTag() != ATypeTag.NULL) {
-                    throw new NotImplementedException("Non-optional UNION type is not supported.");
-                }
-                tag = unionTypes.get(1).getTypeTag();
-            } else {
-                tag = recordType.getFieldTypes()[i].getTypeTag();
-            }
-            if (tag == null) {
-                throw new NotImplementedException("Failed to get the type information for field " + i + ".");
-            }
-            IValueParserFactory vpf = valueParserFactoryMap.get(tag);
-            if (vpf == null) {
-                throw new NotImplementedException("No value parser factory for delimited fields of type " + tag);
-            }
-            fieldParserFactories[i] = vpf;
-        }
-        return fieldParserFactories;
-    }
-
-    // Get a delimiter from the given configuration
-    public static char getDelimiter(Map<String, String> configuration) throws AsterixException {
-        String delimiterValue = configuration.get(AsterixTupleParserFactory.KEY_DELIMITER);
-        if (delimiterValue == null) {
-            delimiterValue = AsterixTupleParserFactory.DEFAULT_DELIMITER;
-        } else if (delimiterValue.length() != 1) {
-            throw new AsterixException("'" + delimiterValue
-                    + "' is not a valid delimiter. The length of a delimiter should be 1.");
-        }
-        return delimiterValue.charAt(0);
-    }
-
-    // Get a quote from the given configuration when the delimiter is given
-    // Need to pass delimiter to check whether they share the same character
-    public static char getQuote(Map<String, String> configuration, char delimiter) throws AsterixException {
-        String quoteValue = configuration.get(AsterixTupleParserFactory.KEY_QUOTE);
-        if (quoteValue == null) {
-            quoteValue = AsterixTupleParserFactory.DEFAULT_QUOTE;
-        } else if (quoteValue.length() != 1) {
-            throw new AsterixException("'" + quoteValue + "' is not a valid quote. The length of a quote should be 1.");
-        }
-
-        // Since delimiter (char type value) can't be null,
-        // we only check whether delimiter and quote use the same character
-        if (quoteValue.charAt(0) == delimiter) {
-            throw new AsterixException("Quote '" + quoteValue + "' cannot be used with the delimiter '" + delimiter
-                    + "'. ");
-        }
-
-        return quoteValue.charAt(0);
-    }
-
-    // Get the header flag
-    public static boolean getHasHeader(Map<String, String> configuration) {
-        return Boolean.parseBoolean(configuration.get(AsterixTupleParserFactory.KEY_HEADER));
-    }
-
-}



[03/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/CounterTimerTupleForwardPolicy.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/CounterTimerTupleForwardPolicy.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/CounterTimerTupleForwardPolicy.java
deleted file mode 100644
index c06caef..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/CounterTimerTupleForwardPolicy.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
-
-public class CounterTimerTupleForwardPolicy implements ITupleForwardPolicy {
-
-    public static final String BATCH_SIZE = "batch-size";
-    public static final String BATCH_INTERVAL = "batch-interval";
-
-    private static final Logger LOGGER = Logger.getLogger(CounterTimerTupleForwardPolicy.class.getName());
-   
-    private FrameTupleAppender appender;
-    private IFrame frame;
-    private IFrameWriter writer;
-    private int batchSize;
-    private long batchInterval;
-    private int tuplesInFrame = 0;
-    private TimeBasedFlushTask flushTask;
-    private Timer timer;
-    private Object lock = new Object();
-    private boolean activeTimer = false;
-
-    public void configure(Map<String, String> configuration) {
-        String propValue = (String) configuration.get(BATCH_SIZE);
-        if (propValue != null) {
-            batchSize = Integer.parseInt(propValue);
-        } else {
-            batchSize = -1;
-        }
-
-        propValue = (String) configuration.get(BATCH_INTERVAL);
-        if (propValue != null) {
-            batchInterval = Long.parseLong(propValue);
-            activeTimer = true;
-        }
-    }
-
-    public void initialize(IHyracksCommonContext ctx, IFrameWriter writer) throws HyracksDataException {
-        this.appender = new FrameTupleAppender();
-        this.frame = new VSizeFrame(ctx);
-        appender.reset(frame, true);
-        this.writer = writer;
-        if (activeTimer) {
-            this.timer = new Timer();
-            this.flushTask = new TimeBasedFlushTask(writer, lock);
-            timer.scheduleAtFixedRate(flushTask, 0, batchInterval);
-        }
-    }
-
-    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
-        if (activeTimer) {
-            synchronized (lock) {
-                addTupleToFrame(tb);
-            }
-        } else {
-            addTupleToFrame(tb);
-        }
-        tuplesInFrame++;
-    }
-
-    private void addTupleToFrame(ArrayTupleBuilder tb) throws HyracksDataException {
-        if (tuplesInFrame == batchSize || !appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            if (LOGGER.isLoggable(Level.INFO)) {
-                LOGGER.info("flushing frame containg (" + tuplesInFrame + ") tuples");
-            }
-            FrameUtils.flushFrame(frame.getBuffer(), writer);
-            tuplesInFrame = 0;
-            appender.reset(frame, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-    public void close() throws HyracksDataException {
-        if (appender.getTupleCount() > 0) {
-            if (activeTimer) {
-                synchronized (lock) {
-                    FrameUtils.flushFrame(frame.getBuffer(), writer);
-                }
-            } else {
-                FrameUtils.flushFrame(frame.getBuffer(), writer);
-            }
-        }
-
-        if (timer != null) {
-            timer.cancel();
-        }
-    }
-
-    private class TimeBasedFlushTask extends TimerTask {
-
-        private IFrameWriter writer;
-        private final Object lock;
-
-        public TimeBasedFlushTask(IFrameWriter writer, Object lock) {
-            this.writer = writer;
-            this.lock = lock;
-        }
-
-        @Override
-        public void run() {
-            try {
-                if (tuplesInFrame > 0) {
-                    if (LOGGER.isLoggable(Level.INFO)) {
-                        LOGGER.info("TTL expired flushing frame (" + tuplesInFrame + ")");
-                    }
-                    synchronized (lock) {
-                        FrameUtils.flushFrame(frame.getBuffer(), writer);
-                        appender.reset(frame, true);
-                        tuplesInFrame = 0;
-                    }
-                }
-            } catch (HyracksDataException e) {
-                e.printStackTrace();
-            }
-        }
-
-    }
-
-    @Override
-    public TupleForwardPolicyType getType() {
-        return TupleForwardPolicyType.COUNTER_TIMER_EXPIRED;
-    }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/DelimitedDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/DelimitedDataParser.java
deleted file mode 100644
index ca7c5c4..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/DelimitedDataParser.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-
-import org.apache.asterix.builders.IARecordBuilder;
-import org.apache.asterix.builders.RecordBuilder;
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.dataflow.data.nontagged.serde.ANullSerializerDeserializer;
-import org.apache.asterix.om.base.AMutableString;
-import org.apache.asterix.om.base.ANull;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
-import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
-import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
-
-public class DelimitedDataParser extends AbstractDataParser implements IDataParser {
-
-    protected final IValueParserFactory[] valueParserFactories;
-    protected final char fieldDelimiter;
-    protected final char quote;
-    protected final boolean hasHeader;
-    protected final ARecordType recordType;
-    private IARecordBuilder recBuilder;
-    private ArrayBackedValueStorage fieldValueBuffer;
-    private DataOutput fieldValueBufferOutput;
-    private IValueParser[] valueParsers;
-    private FieldCursorForDelimitedDataParser cursor;
-    private byte[] fieldTypeTags;
-    private int[] fldIds;
-    private ArrayBackedValueStorage[] nameBuffers;
-    private boolean areAllNullFields;
-
-    public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
-            char quote, boolean hasHeader) {
-        this.recordType = recordType;
-        this.valueParserFactories = valueParserFactories;
-        this.fieldDelimiter = fieldDelimter;
-        this.quote = quote;
-        this.hasHeader = hasHeader;
-    }
-
-    @Override
-    public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException,
-            IOException {
-
-        valueParsers = new IValueParser[valueParserFactories.length];
-        for (int i = 0; i < valueParserFactories.length; ++i) {
-            valueParsers[i] = valueParserFactories[i].createValueParser();
-        }
-
-        fieldValueBuffer = new ArrayBackedValueStorage();
-        fieldValueBufferOutput = fieldValueBuffer.getDataOutput();
-        recBuilder = new RecordBuilder();
-        recBuilder.reset(recordType);
-        recBuilder.init();
-
-        int n = recordType.getFieldNames().length;
-        fieldTypeTags = new byte[n];
-        for (int i = 0; i < n; i++) {
-            ATypeTag tag = recordType.getFieldTypes()[i].getTypeTag();
-            fieldTypeTags[i] = tag.serialize();
-        }
-
-        fldIds = new int[n];
-        nameBuffers = new ArrayBackedValueStorage[n];
-        AMutableString str = new AMutableString(null);
-        for (int i = 0; i < n; i++) {
-            String name = recordType.getFieldNames()[i];
-            fldIds[i] = recBuilder.getFieldId(name);
-            if (fldIds[i] < 0) {
-                if (!recordType.isOpen()) {
-                    throw new HyracksDataException("Illegal field " + name + " in closed type " + recordType);
-                } else {
-                    nameBuffers[i] = new ArrayBackedValueStorage();
-                    fieldNameToBytes(name, str, nameBuffers[i]);
-                }
-            }
-        }
-
-        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote);
-    }
-
-    @Override
-    public boolean parse(DataOutput out) throws AsterixException, IOException {
-        if (hasHeader && cursor.recordCount == 0) {
-            // Consume all fields of first record
-            cursor.nextRecord();
-            while (cursor.nextField());
-        }
-        while (cursor.nextRecord()) {
-            recBuilder.reset(recordType);
-            recBuilder.init();
-            areAllNullFields = true;
-
-            for (int i = 0; i < valueParsers.length; ++i) {
-                if (!cursor.nextField()) {
-                    break;
-                }
-                fieldValueBuffer.reset();
-
-                if (cursor.fStart == cursor.fEnd && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
-                        && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
-                    // if the field is empty and the type is optional, insert
-                    // NULL. Note that string type can also process empty field as an
-                    // empty string
-                    if (!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) {
-                        throw new AsterixException("At record: " + cursor.recordCount + " - Field " + cursor.fieldCount
-                                + " is not an optional type so it cannot accept null value. ");
-                    }
-                    fieldValueBufferOutput.writeByte(ATypeTag.NULL.serialize());
-                    ANullSerializerDeserializer.INSTANCE.serialize(ANull.NULL, out);
-                } else {
-                    fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
-                    // Eliminate doule quotes in the field that we are going to parse
-                    if (cursor.isDoubleQuoteIncludedInThisField) {
-                        cursor.eliminateDoubleQuote(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart);
-                        cursor.fEnd -= cursor.doubleQuoteCount;
-                        cursor.isDoubleQuoteIncludedInThisField = false;
-                    }
-                    valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart,
-                            fieldValueBufferOutput);
-                    areAllNullFields = false;
-                }
-                if (fldIds[i] < 0) {
-                    recBuilder.addField(nameBuffers[i], fieldValueBuffer);
-                } else {
-                    recBuilder.addField(fldIds[i], fieldValueBuffer);
-                }
-            }
-
-            if (!areAllNullFields) {
-                recBuilder.write(out, true);
-                return true;
-            }
-        }
-        return false;
-    }
-
-    protected void fieldNameToBytes(String fieldName, AMutableString str, ArrayBackedValueStorage buffer)
-            throws HyracksDataException {
-        buffer.reset();
-        DataOutput out = buffer.getDataOutput();
-        str.setValue(fieldName);
-        try {
-            stringSerde.serialize(str, out);
-        } catch (IOException e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/FrameFullTupleForwardPolicy.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/FrameFullTupleForwardPolicy.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/FrameFullTupleForwardPolicy.java
deleted file mode 100644
index e22180c..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/FrameFullTupleForwardPolicy.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.util.Map;
-
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
-
-public class FrameFullTupleForwardPolicy implements ITupleForwardPolicy {
-
-	private FrameTupleAppender appender;
-	private IFrame frame;
-	private IFrameWriter writer;
-
-	public void configure(Map<String, String> configuration) {
-		// no-op
-	}
-
-	public void initialize(IHyracksCommonContext ctx, IFrameWriter writer)
-			throws HyracksDataException {
-		this.appender = new FrameTupleAppender();
-		this.frame = new VSizeFrame(ctx);
-		this.writer = writer;
-		appender.reset(frame, true);
-	}
-
-	public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
-		boolean success = appender.append(tb.getFieldEndOffsets(),
-				tb.getByteArray(), 0, tb.getSize());
-		if (!success) {
-			FrameUtils.flushFrame(frame.getBuffer(), writer);
-			appender.reset(frame, true);
-			success = appender.append(tb.getFieldEndOffsets(),
-					tb.getByteArray(), 0, tb.getSize());
-			if (!success) {
-				throw new IllegalStateException();
-			}
-		}
-	}
-
-	public void close() throws HyracksDataException {
-		if (appender.getTupleCount() > 0) {
-			FrameUtils.flushFrame(frame.getBuffer(), writer);
-		}
-
-	}
-
-	@Override
-	public TupleForwardPolicyType getType() {
-		return TupleForwardPolicyType.FRAME_FULL;
-	}
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/IDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/IDataParser.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/IDataParser.java
deleted file mode 100644
index ba90e6c..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/IDataParser.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.om.types.ARecordType;
-
-/**
- * Interface implemented by a parser
- */
-public interface IDataParser {
-
-    /**
-     * Initialize the parser prior to actual parsing.
-     * 
-     * @param in
-     *            input stream to be parsed
-     * @param recordType
-     *            record type associated with input data
-     * @param datasetRec
-     *            boolean flag set to true if input data represents dataset
-     *            records.
-     * @throws AsterixException
-     * @throws IOException
-     */
-    public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException,
-            IOException;
-
-    /**
-     * Parse data from source input stream and output ADM records.
-     * 
-     * @param out
-     *            DataOutput instance that for writing the parser output.
-     * @return
-     * @throws AsterixException
-     * @throws IOException
-     */
-    public boolean parse(DataOutput out) throws AsterixException, IOException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateContolledParserPolicy.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateContolledParserPolicy.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateContolledParserPolicy.java
deleted file mode 100644
index 7b5d331..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateContolledParserPolicy.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.util.Map;
-
-import org.apache.asterix.common.parse.ITupleParserPolicy;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
-
-public class RateContolledParserPolicy implements ITupleParserPolicy {
-
-    protected FrameTupleAppender appender;
-    protected IFrame  frame;
-    private IFrameWriter writer;
-    private long interTupleInterval;
-    private boolean delayConfigured;
-
-    public static final String INTER_TUPLE_INTERVAL = "tuple-interval";
-
-    public RateContolledParserPolicy() {
-
-    }
-
-    public TupleParserPolicy getType() {
-        return ITupleParserPolicy.TupleParserPolicy.FRAME_FULL;
-    }
-
- 
-    @Override
-    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
-        if (delayConfigured) {
-            try {
-                Thread.sleep(interTupleInterval);
-            } catch (InterruptedException e) {
-                throw new HyracksDataException(e);
-            }
-        }
-        boolean success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
-        if (!success) {
-            FrameUtils.flushFrame(frame.getBuffer(), writer);
-            appender.reset(frame, true);
-            success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
-            if (!success) {
-                throw new IllegalStateException();
-            }
-        }
-        appender.reset(frame, true);
-    }
-
-    @Override
-    public void close() throws HyracksDataException {
-        if (appender.getTupleCount() > 0) {
-            FrameUtils.flushFrame(frame.getBuffer(), writer);
-        }
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration) throws HyracksDataException {
-        String propValue = configuration.get(INTER_TUPLE_INTERVAL);
-        if (propValue != null) {
-            interTupleInterval = Long.parseLong(propValue);
-        } else {
-            interTupleInterval = 0;
-        }
-        delayConfigured = interTupleInterval != 0;
-        
-    }
-
-    @Override
-    public void initialize(IHyracksTaskContext ctx, IFrameWriter writer) throws HyracksDataException {
-        this.appender = new FrameTupleAppender();
-        this.frame = new VSizeFrame(ctx);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateControlledTupleForwardPolicy.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateControlledTupleForwardPolicy.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateControlledTupleForwardPolicy.java
deleted file mode 100644
index c5af720..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/operators/file/RateControlledTupleForwardPolicy.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operators.file;
-
-import java.util.Map;
-
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
-
-public class RateControlledTupleForwardPolicy implements ITupleForwardPolicy {
-
-    private FrameTupleAppender appender;
-    private IFrame frame;
-    private IFrameWriter writer;
-    private long interTupleInterval;
-    private boolean delayConfigured;
-
-    public static final String INTER_TUPLE_INTERVAL = "tuple-interval";
-
-    public void configure(Map<String, String> configuration) {
-        String propValue = configuration.get(INTER_TUPLE_INTERVAL);
-        if (propValue != null) {
-            interTupleInterval = Long.parseLong(propValue);
-        }
-        delayConfigured = interTupleInterval != 0;
-    }
-
-    public void initialize(IHyracksCommonContext ctx, IFrameWriter writer) throws HyracksDataException {
-        this.appender = new FrameTupleAppender();
-        this.frame = new VSizeFrame(ctx);
-        this.writer = writer;
-        appender.reset(frame, true);
-    }
-
-    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
-        if (delayConfigured) {
-            try {
-                Thread.sleep(interTupleInterval);
-            } catch (InterruptedException e) {
-                e.printStackTrace();
-            }
-        }
-        boolean success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
-        if (!success) {
-            FrameUtils.flushFrame(frame.getBuffer(), writer);
-            appender.reset(frame, true);
-            success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
-            if (!success) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-    public void close() throws HyracksDataException {
-        if (appender.getTupleCount() > 0) {
-            FrameUtils.flushFrame(frame.getBuffer(), writer);
-        }
-
-    }
-
-    @Override
-    public TupleForwardPolicyType getType() {
-        return TupleForwardPolicyType.RATE_CONTROLLED;
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/resources/adm.grammar
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/resources/adm.grammar b/asterix-runtime/src/main/resources/adm.grammar
deleted file mode 100644
index 1910436..0000000
--- a/asterix-runtime/src/main/resources/adm.grammar
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# LEXER GENERATOR configuration file
-# ---------------------------------------
-# Place *first* the generic configuration
-# then list your grammar.
-
-PACKAGE:          org.apache.asterix.runtime.operators.file.adm
-LEXER_NAME:       AdmLexer
-
-TOKENS:
-
-BOOLEAN_CONS   = string(boolean)
-INT8_CONS      = string(int8)
-INT16_CONS     = string(int16)
-INT32_CONS     = string(int32)
-INT64_CONS     = string(int64)
-INT64_CONS     = string(int)
-FLOAT_CONS     = string(float)
-DOUBLE_CONS    = string(double)
-DATE_CONS      = string(date)
-DATETIME_CONS  = string(datetime)
-DURATION_CONS  = string(duration)
-STRING_CONS    = string(string)
-HEX_CONS       = string(hex)
-BASE64_CONS    = string(base64)
-POINT_CONS     = string(point)
-POINT3D_CONS   = string(point3d)
-LINE_CONS      = string(line)
-POLYGON_CONS   = string(polygon)
-RECTANGLE_CONS = string(rectangle)
-CIRCLE_CONS    = string(circle)
-TIME_CONS      = string(time)
-INTERVAL_TIME_CONS      = string(interval-time)
-INTERVAL_DATE_CONS      = string(interval-date)
-INTERVAL_DATETIME_CONS  = string(interval-datetime)
-YEAR_MONTH_DURATION_CONS = string(year-month-duration)
-DAY_TIME_DURATION_CONS   = string(day-time-duration)
-UUID_CONS      = string(uuid)
-
-NULL_LITERAL   = string(null)
-TRUE_LITERAL   = string(true)
-FALSE_LITERAL  = string(false)
-
-CONSTRUCTOR_OPEN     = char(()
-CONSTRUCTOR_CLOSE    = char())
-START_RECORD         = char({)
-END_RECORD           = char(})
-COMMA                = char(\,)
-COLON                = char(:)
-START_ORDERED_LIST   = char([)
-END_ORDERED_LIST     = char(])
-START_UNORDERED_LIST = string({{)
-# END_UNORDERED_LIST  = }} is recognized as a double END_RECORD token
-
-STRING_LITERAL       = char("), anythingUntil(")
-
-INT_LITERAL          = signOrNothing(), digitSequence()
-INT8_LITERAL         = token(INT_LITERAL), string(i8)
-INT16_LITERAL        = token(INT_LITERAL), string(i16)
-INT32_LITERAL        = token(INT_LITERAL), string(i32)
-INT64_LITERAL        = token(INT_LITERAL), string(i64)
-
-@EXPONENT            = caseInsensitiveChar(e), signOrNothing(), digitSequence()
-
-DOUBLE_LITERAL		 = signOrNothing(), char(.), digitSequence()
-DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), char(.), digitSequence()
-DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), char(.), digitSequence(), token(@EXPONENT)
-DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), token(@EXPONENT)
-
-FLOAT_LITERAL		 = token(DOUBLE_LITERAL), caseInsensitiveChar(f)

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java b/asterix-runtime/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
deleted file mode 100644
index e23e255..0000000
--- a/asterix-runtime/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.operator.file;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.asterix.om.base.AMutableInterval;
-import org.apache.asterix.runtime.operators.file.ADMDataParser;
-import org.junit.Assert;
-import org.junit.Test;
-
-import junit.extensions.PA;
-
-public class ADMDataParserTest {
-
-    @Test
-    public void test() {
-        String[] dateIntervals = { "-9537-08-04, 9656-06-03", "-9537-04-04, 9656-06-04", "-9537-10-04, 9626-09-05" };
-        AMutableInterval[] parsedDateIntervals = new AMutableInterval[] {
-                new AMutableInterval(-4202630, 2807408, (byte) 17), new AMutableInterval(-4202752, 2807409, (byte) 17),
-                new AMutableInterval(-4202569, 2796544, (byte) 17), };
-
-        String[] timeIntervals = { "12:04:45.689Z, 12:41:59.002Z", "12:10:45.169Z, 15:37:48.736Z",
-                "04:16:42.321Z, 12:22:56.816Z" };
-        AMutableInterval[] parsedTimeIntervals = new AMutableInterval[] {
-                new AMutableInterval(43485689, 45719002, (byte) 18),
-                new AMutableInterval(43845169, 56268736, (byte) 18),
-                new AMutableInterval(15402321, 44576816, (byte) 18), };
-
-        String[] dateTimeIntervals = { "-2640-10-11T17:32:15.675Z, 4104-02-01T05:59:11.902Z",
-                "0534-12-08T08:20:31.487Z, 6778-02-16T22:40:21.653Z",
-                "2129-12-12T13:18:35.758Z, 8647-07-01T13:10:19.691Z" };
-        AMutableInterval[] parsedDateTimeIntervals = new AMutableInterval[] {
-                new AMutableInterval(-145452954464325L, 67345192751902L, (byte) 16),
-                new AMutableInterval(-45286270768513L, 151729886421653L, (byte) 16),
-                new AMutableInterval(5047449515758L, 210721439419691L, (byte) 16) };
-
-        Thread[] threads = new Thread[16];
-        AtomicInteger errorCount = new AtomicInteger(0);
-        for (int i = 0; i < threads.length; ++i) {
-            threads[i] = new Thread(new Runnable() {
-                ADMDataParser parser = new ADMDataParser();
-                ByteArrayOutputStream bos = new ByteArrayOutputStream();
-                DataOutput dos = new DataOutputStream(bos);
-
-                @Override
-                public void run() {
-                    try {
-                        int round = 0;
-                        while (round++ < 10000) {
-                            // Test parseDateInterval.
-                            for (int index = 0; index < dateIntervals.length; ++index) {
-                                PA.invokeMethod(parser, "parseDateInterval(java.lang.String, java.io.DataOutput)",
-                                        dateIntervals[index], dos);
-                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
-                                Assert.assertTrue(aInterval.equals(parsedDateIntervals[index]));
-                            }
-
-                            // Tests parseTimeInterval.
-                            for (int index = 0; index < timeIntervals.length; ++index) {
-                                PA.invokeMethod(parser, "parseTimeInterval(java.lang.String, java.io.DataOutput)",
-                                        timeIntervals[index], dos);
-                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
-                                Assert.assertTrue(aInterval.equals(parsedTimeIntervals[index]));
-                            }
-
-                            // Tests parseDateTimeInterval.
-                            for (int index = 0; index < dateTimeIntervals.length; ++index) {
-                                PA.invokeMethod(parser, "parseDateTimeInterval(java.lang.String, java.io.DataOutput)",
-                                        dateTimeIntervals[index], dos);
-                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
-                                Assert.assertTrue(aInterval.equals(parsedDateTimeIntervals[index]));
-                            }
-                        }
-                    } catch (Exception e) {
-                        errorCount.incrementAndGet();
-                        e.printStackTrace();
-                    }
-                }
-            });
-            // Kicks off test threads.
-            threads[i].start();
-        }
-
-        // Joins all the threads.
-        try {
-            for (int i = 0; i < threads.length; ++i) {
-                threads[i].join();
-            }
-        } catch (InterruptedException e) {
-            throw new IllegalStateException(e);
-        }
-        // Asserts no failure.
-        Assert.assertTrue(errorCount.get() == 0);
-    }
-
-}



[02/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/DataGenerator.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/DataGenerator.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/DataGenerator.java
deleted file mode 100644
index 43db6c4..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/DataGenerator.java
+++ /dev/null
@@ -1,1188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.nio.CharBuffer;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.asterix.external.util.Datatypes;
-
-public class DataGenerator {
-
-    private RandomDateGenerator randDateGen;
-    private RandomNameGenerator randNameGen;
-    private RandomMessageGenerator randMessageGen;
-    private RandomLocationGenerator randLocationGen;
-    private Random random = new Random();
-    private TwitterUser twUser = new TwitterUser();
-    private TweetMessage twMessage = new TweetMessage();
-    private static final String DEFAULT_COUNTRY = "US";
-
-    public DataGenerator(InitializationInfo info) {
-        initialize(info);
-    }
-
-    public class TweetMessageIterator implements Iterator<TweetMessage> {
-
-        private final int duration;
-        private long startTime = 0;
-        private int tweetId;
-
-        public TweetMessageIterator(int duration) {
-            this.duration = duration;
-            this.startTime = System.currentTimeMillis();
-        }
-
-        @Override
-        public boolean hasNext() {
-            if (duration == TweetGenerator.INFINITY) {
-                return true;
-            }
-            return System.currentTimeMillis() - startTime <= duration * 1000;
-        }
-
-        @Override
-        public TweetMessage next() {
-            tweetId++;
-            TweetMessage msg = null;
-            getTwitterUser(null);
-            Message message = randMessageGen.getNextRandomMessage();
-            Point location = randLocationGen.getRandomPoint();
-            DateTime sendTime = randDateGen.getNextRandomDatetime();
-            twMessage.reset(tweetId, twUser, location.getLatitude(), location.getLongitude(), sendTime.toString(),
-                    message, DEFAULT_COUNTRY);
-            msg = twMessage;
-            return msg;
-        }
-
-        @Override
-        public void remove() {
-            // TODO Auto-generated method stub
-
-        }
-
-    }
-
-    public static class InitializationInfo {
-        public Date startDate = new Date(1, 1, 2005);
-        public Date endDate = new Date(8, 20, 2012);
-        public String[] lastNames = DataGenerator.lastNames;
-        public String[] firstNames = DataGenerator.firstNames;
-        public String[] vendors = DataGenerator.vendors;
-        public String[] jargon = DataGenerator.jargon;
-        public String[] org_list = DataGenerator.org_list;
-    }
-
-    public void initialize(InitializationInfo info) {
-        randDateGen = new RandomDateGenerator(info.startDate, info.endDate);
-        randNameGen = new RandomNameGenerator(info.firstNames, info.lastNames);
-        randLocationGen = new RandomLocationGenerator(24, 49, 66, 98);
-        randMessageGen = new RandomMessageGenerator(info.vendors, info.jargon);
-    }
-
-    public void getTwitterUser(String usernameSuffix) {
-        String suggestedName = randNameGen.getRandomName();
-        String[] nameComponents = suggestedName.split(" ");
-        String screenName = nameComponents[0] + nameComponents[1] + randNameGen.getRandomNameSuffix();
-        String name = suggestedName;
-        if (usernameSuffix != null) {
-            name = name + usernameSuffix;
-        }
-        int numFriends = random.nextInt((int) (100)); // draw from Zipfian
-        int statusesCount = random.nextInt(500); // draw from Zipfian
-        int followersCount = random.nextInt((int) (200));
-        twUser.reset(screenName, numFriends, statusesCount, name, followersCount);
-    }
-
-    public static class RandomDateGenerator {
-
-        private final Date startDate;
-        private final Date endDate;
-        private final Random random = new Random();
-        private final int yearDifference;
-        private Date workingDate;
-        private Date recentDate;
-        private DateTime dateTime;
-
-        public RandomDateGenerator(Date startDate, Date endDate) {
-            this.startDate = startDate;
-            this.endDate = endDate;
-            this.yearDifference = endDate.getYear() - startDate.getYear() + 1;
-            this.workingDate = new Date();
-            this.recentDate = new Date();
-            this.dateTime = new DateTime();
-        }
-
-        public Date getStartDate() {
-            return startDate;
-        }
-
-        public Date getEndDate() {
-            return endDate;
-        }
-
-        public Date getNextRandomDate() {
-            int year = random.nextInt(yearDifference) + startDate.getYear();
-            int month;
-            int day;
-            if (year == endDate.getYear()) {
-                month = random.nextInt(endDate.getMonth()) + 1;
-                if (month == endDate.getMonth()) {
-                    day = random.nextInt(endDate.getDay()) + 1;
-                } else {
-                    day = random.nextInt(28) + 1;
-                }
-            } else {
-                month = random.nextInt(12) + 1;
-                day = random.nextInt(28) + 1;
-            }
-            workingDate.reset(month, day, year);
-            return workingDate;
-        }
-
-        public DateTime getNextRandomDatetime() {
-            Date randomDate = getNextRandomDate();
-            dateTime.reset(randomDate);
-            return dateTime;
-        }
-
-        public Date getNextRecentDate(Date date) {
-            int year = date.getYear()
-                    + (date.getYear() == endDate.getYear() ? 0 : random.nextInt(endDate.getYear() - date.getYear()));
-            int month = (year == endDate.getYear()) ? date.getMonth() == endDate.getMonth() ? (endDate.getMonth())
-                    : (date.getMonth() + random.nextInt(endDate.getMonth() - date.getMonth())) : random.nextInt(12) + 1;
-
-            int day = (year == endDate.getYear()) ? month == endDate.getMonth() ? date.getDay() == endDate.getDay() ? endDate
-                    .getDay() : date.getDay() + random.nextInt(endDate.getDay() - date.getDay())
-                    : random.nextInt(28) + 1
-                    : random.nextInt(28) + 1;
-            recentDate.reset(month, day, year);
-            return recentDate;
-        }
-
-    }
-
-    public static class DateTime extends Date {
-
-        private String hour = "10";
-        private String min = "10";
-        private String sec = "00";
-
-        public DateTime(int month, int day, int year, String hour, String min, String sec) {
-            super(month, day, year);
-            this.hour = hour;
-            this.min = min;
-            this.sec = sec;
-        }
-
-        public DateTime() {
-        }
-
-        public void reset(int month, int day, int year, String hour, String min, String sec) {
-            super.setDay(month);
-            super.setDay(day);
-            super.setYear(year);
-            this.hour = hour;
-            this.min = min;
-            this.sec = sec;
-        }
-
-        public DateTime(Date date) {
-            super(date.getMonth(), date.getDay(), date.getYear());
-        }
-
-        public void reset(Date date) {
-            reset(date.getMonth(), date.getDay(), date.getYear());
-        }
-
-        public DateTime(Date date, int hour, int min, int sec) {
-            super(date.getMonth(), date.getDay(), date.getYear());
-            this.hour = (hour < 10) ? "0" : "" + hour;
-            this.min = (min < 10) ? "0" : "" + min;
-            this.sec = (sec < 10) ? "0" : "" + sec;
-        }
-
-        public String toString() {
-            StringBuilder builder = new StringBuilder();
-            builder.append("\"");
-            builder.append(super.getYear());
-            builder.append("-");
-            builder.append(super.getMonth() < 10 ? "0" + super.getMonth() : super.getMonth());
-            builder.append("-");
-            builder.append(super.getDay() < 10 ? "0" + super.getDay() : super.getDay());
-            builder.append("T");
-            builder.append(hour + ":" + min + ":" + sec);
-            builder.append("\"");
-            return builder.toString();
-        }
-    }
-
-    public static class Message {
-
-        private char[] message = new char[500];
-        private List<String> referredTopics;
-        private int length;
-
-        public Message(char[] m, List<String> referredTopics) {
-            System.arraycopy(m, 0, message, 0, m.length);
-            length = m.length;
-            this.referredTopics = referredTopics;
-        }
-
-        public Message() {
-            referredTopics = new ArrayList<String>();
-            length = 0;
-        }
-
-        public List<String> getReferredTopics() {
-            return referredTopics;
-        }
-
-        public void reset(char[] m, int offset, int length, List<String> referredTopics) {
-            System.arraycopy(m, offset, message, 0, length);
-            this.length = length;
-            this.referredTopics = referredTopics;
-        }
-
-        public int getLength() {
-            return length;
-        }
-
-        public char charAt(int index) {
-            return message[index];
-        }
-
-    }
-
-    public static class Point {
-
-        private float latitude;
-        private float longitude;
-
-        public float getLatitude() {
-            return latitude;
-        }
-
-        public float getLongitude() {
-            return longitude;
-        }
-
-        public Point(float latitude, float longitude) {
-            this.latitude = latitude;
-            this.longitude = longitude;
-        }
-
-        public void reset(float latitude, float longitude) {
-            this.latitude = latitude;
-            this.longitude = longitude;
-        }
-
-        public Point() {
-        }
-
-        public String toString() {
-            StringBuilder builder = new StringBuilder();
-            builder.append("point(\"" + latitude + "," + longitude + "\")");
-            return builder.toString();
-        }
-    }
-
-    public static class RandomNameGenerator {
-
-        private String[] firstNames;
-        private String[] lastNames;
-
-        private final Random random = new Random();
-
-        private final String[] connectors = new String[] { "_", "#", "$", "@" };
-
-        public RandomNameGenerator(String[] firstNames, String[] lastNames) {
-            this.firstNames = firstNames;
-            this.lastNames = lastNames;
-        }
-
-        public String getRandomName() {
-            String name;
-            name = getSuggestedName();
-            return name;
-
-        }
-
-        private String getSuggestedName() {
-            int firstNameIndex = random.nextInt(firstNames.length);
-            int lastNameIndex = random.nextInt(lastNames.length);
-            String suggestedName = firstNames[firstNameIndex] + " " + lastNames[lastNameIndex];
-            return suggestedName;
-        }
-
-        public String getRandomNameSuffix() {
-            return connectors[random.nextInt(connectors.length)] + random.nextInt(1000);
-        }
-    }
-
-    public static class RandomMessageGenerator {
-
-        private final MessageTemplate messageTemplate;
-
-        public RandomMessageGenerator(String[] vendors, String[] jargon) {
-            List<String> vendorList = new ArrayList<String>();
-            for (String v : vendors) {
-                vendorList.add(v);
-            }
-            List<String> jargonList = new ArrayList<String>();
-            for (String j : jargon) {
-                jargonList.add(j);
-            }
-            this.messageTemplate = new MessageTemplate(vendorList, jargonList);
-        }
-
-        public Message getNextRandomMessage() {
-            return messageTemplate.getNextMessage();
-        }
-    }
-
-    public static class AbstractMessageTemplate {
-
-        protected final Random random = new Random();
-
-        protected String[] positiveVerbs = new String[] { "like", "love" };
-        protected String[] negativeVerbs = new String[] { "dislike", "hate", "can't stand" };
-
-        protected String[] negativeAdjectives = new String[] { "horrible", "bad", "terrible", "OMG" };
-        protected String[] postiveAdjectives = new String[] { "good", "awesome", "amazing", "mind-blowing" };
-
-        protected String[] otherWords = new String[] { "the", "its" };
-    }
-
-    public static class MessageTemplate extends AbstractMessageTemplate {
-
-        private List<String> vendors;
-        private List<String> jargon;
-        private CharBuffer buffer;
-        private List<String> referredTopics;
-        private Message message = new Message();
-
-        public MessageTemplate(List<String> vendors, List<String> jargon) {
-            this.vendors = vendors;
-            this.jargon = jargon;
-            buffer = CharBuffer.allocate(2500);
-            referredTopics = new ArrayList<String>();
-        }
-
-        public Message getNextMessage() {
-            buffer.position(0);
-            buffer.limit(2500);
-            referredTopics.clear();
-            boolean isPositive = random.nextBoolean();
-            String[] verbArray = isPositive ? positiveVerbs : negativeVerbs;
-            String[] adjectiveArray = isPositive ? postiveAdjectives : negativeAdjectives;
-            String verb = verbArray[random.nextInt(verbArray.length)];
-            String adjective = adjectiveArray[random.nextInt(adjectiveArray.length)];
-
-            buffer.put(" ");
-            buffer.put(verb);
-            buffer.put(" ");
-            String vendor = vendors.get(random.nextInt(vendors.size()));
-            referredTopics.add(vendor);
-            buffer.append(vendor);
-            buffer.append(" ");
-            buffer.append(otherWords[random.nextInt(otherWords.length)]);
-            buffer.append(" ");
-            String jargonTerm = jargon.get(random.nextInt(jargon.size()));
-            referredTopics.add(jargonTerm);
-            buffer.append(jargonTerm);
-            buffer.append(" is ");
-            buffer.append(adjective);
-            if (random.nextBoolean()) {
-                buffer.append(isPositive ? ":)" : ":(");
-            }
-
-            buffer.flip();
-            message.reset(buffer.array(), 0, buffer.limit(), referredTopics);
-            return message;
-        }
-    }
-
-    public static class RandomUtil {
-
-        public static Random random = new Random();
-
-        public static int[] getKFromN(int k, int n) {
-            int[] result = new int[k];
-            int cnt = 0;
-            HashSet<Integer> values = new HashSet<Integer>();
-            while (cnt < k) {
-                int val = random.nextInt(n + 1);
-                if (values.contains(val)) {
-                    continue;
-                }
-
-                result[cnt++] = val;
-                values.add(val);
-            }
-            return result;
-        }
-    }
-
-    public static class RandomLocationGenerator {
-
-        private Random random = new Random();
-
-        private final int beginLat;
-        private final int endLat;
-        private final int beginLong;
-        private final int endLong;
-
-        private Point point;
-
-        public RandomLocationGenerator(int beginLat, int endLat, int beginLong, int endLong) {
-            this.beginLat = beginLat;
-            this.endLat = endLat;
-            this.beginLong = beginLong;
-            this.endLong = endLong;
-            this.point = new Point();
-        }
-
-        public Point getRandomPoint() {
-            int latMajor = beginLat + random.nextInt(endLat - beginLat);
-            int latMinor = random.nextInt(100);
-            float latitude = latMajor + ((float) latMinor) / 100;
-
-            int longMajor = beginLong + random.nextInt(endLong - beginLong);
-            int longMinor = random.nextInt(100);
-            float longitude = longMajor + ((float) longMinor) / 100;
-
-            point.reset(latitude, longitude);
-            return point;
-        }
-
-    }
-
-    public static class TweetMessage {
-
-        private static final String[] DEFAULT_FIELDS = new String[] { TweetFields.TWEETID, TweetFields.USER,
-                TweetFields.LATITUDE, TweetFields.LONGITUDE, TweetFields.MESSAGE_TEXT, TweetFields.CREATED_AT,
-                TweetFields.COUNTRY };
-
-        private int id;
-        private TwitterUser user;
-        private double latitude;
-        private double longitude;
-        private String created_at;
-        private Message messageText;
-        private String country;
-
-        public static final class TweetFields {
-            public static final String TWEETID = "id";
-            public static final String USER = "user";
-            public static final String LATITUDE = "latitude";
-            public static final String LONGITUDE = "longitude";
-            public static final String MESSAGE_TEXT = "message_text";
-            public static final String CREATED_AT = "created_at";
-            public static final String COUNTRY = "country";
-
-        }
-
-        public TweetMessage() {
-        }
-
-        public TweetMessage(int tweetid, TwitterUser user, double latitude, double longitude, String created_at,
-                Message messageText, String country) {
-            this.id = tweetid;
-            this.user = user;
-            this.latitude = latitude;
-            this.longitude = longitude;
-            this.created_at = created_at;
-            this.messageText = messageText;
-            this.country = country;
-        }
-
-        public void reset(int tweetid, TwitterUser user, double latitude, double longitude, String created_at,
-                Message messageText, String country) {
-            this.id = tweetid;
-            this.user = user;
-            this.latitude = latitude;
-            this.longitude = longitude;
-            this.created_at = created_at;
-            this.messageText = messageText;
-            this.country = country;
-        }
-
-        public String getAdmEquivalent(String[] fields) {
-            if (fields == null) {
-                fields = DEFAULT_FIELDS;
-            }
-            StringBuilder builder = new StringBuilder();
-            builder.append("{");
-            for (String field : fields) {
-                switch (field) {
-                    case Datatypes.Tweet.ID:
-                        appendFieldName(builder, Datatypes.Tweet.ID);
-                        builder.append("int64(\"" + id + "\")");
-                        break;
-                    case Datatypes.Tweet.USER:
-                        appendFieldName(builder, Datatypes.Tweet.USER);
-                        builder.append(user);
-                        break;
-                    case Datatypes.Tweet.LATITUDE:
-                        appendFieldName(builder, Datatypes.Tweet.LATITUDE);
-                        builder.append(latitude);
-                        break;
-                    case Datatypes.Tweet.LONGITUDE:
-                        appendFieldName(builder, Datatypes.Tweet.LONGITUDE);
-                        builder.append(longitude);
-                        break;
-                    case Datatypes.Tweet.MESSAGE:
-                        appendFieldName(builder, Datatypes.Tweet.MESSAGE);
-                        builder.append("\"");
-                        for (int i = 0; i < messageText.getLength(); i++) {
-                            builder.append(messageText.charAt(i));
-                        }
-                        builder.append("\"");
-                        break;
-                    case Datatypes.Tweet.CREATED_AT:
-                        appendFieldName(builder, Datatypes.Tweet.CREATED_AT);
-                        builder.append(created_at);
-                        break;
-                    case Datatypes.Tweet.COUNTRY:
-                        appendFieldName(builder, Datatypes.Tweet.COUNTRY);
-                        builder.append("\"" + country + "\"");
-                        break;
-                }
-                builder.append(",");
-            }
-            builder.deleteCharAt(builder.length() - 1);
-            builder.append("}");
-            return builder.toString();
-        }
-
-        private void appendFieldName(StringBuilder builder, String fieldName) {
-            builder.append("\"" + fieldName + "\":");
-        }
-
-        public int getTweetid() {
-            return id;
-        }
-
-        public void setTweetid(int tweetid) {
-            this.id = tweetid;
-        }
-
-        public TwitterUser getUser() {
-            return user;
-        }
-
-        public void setUser(TwitterUser user) {
-            this.user = user;
-        }
-
-        public double getLatitude() {
-            return latitude;
-        }
-
-        public String getSendTime() {
-            return created_at;
-        }
-
-        public Message getMessageText() {
-            return messageText;
-        }
-
-        public void setMessageText(Message messageText) {
-            this.messageText = messageText;
-        }
-
-        public String getCountry() {
-            return country;
-        }
-
-    }
-
-    public static class TwitterUser {
-
-        private String screenName;
-        private String lang = "en";
-        private int friendsCount;
-        private int statusesCount;
-        private String name;
-        private int followersCount;
-
-        public TwitterUser() {
-
-        }
-
-        public TwitterUser(String screenName, int friendsCount, int statusesCount, String name, int followersCount) {
-            this.screenName = screenName;
-            this.friendsCount = friendsCount;
-            this.statusesCount = statusesCount;
-            this.name = name;
-            this.followersCount = followersCount;
-        }
-
-        public void reset(String screenName, int friendsCount, int statusesCount, String name, int followersCount) {
-            this.screenName = screenName;
-            this.friendsCount = friendsCount;
-            this.statusesCount = statusesCount;
-            this.name = name;
-            this.followersCount = followersCount;
-        }
-
-        public String getScreenName() {
-            return screenName;
-        }
-
-        public int getFriendsCount() {
-            return friendsCount;
-        }
-
-        public int getStatusesCount() {
-            return statusesCount;
-        }
-
-        public String getName() {
-            return name;
-        }
-
-        public int getFollowersCount() {
-            return followersCount;
-        }
-
-        public String toString() {
-            StringBuilder builder = new StringBuilder();
-            builder.append("{");
-            builder.append("\"screen_name\":" + "\"" + screenName + "\"");
-            builder.append(",");
-            builder.append("\"language\":" + "\"" + lang + "\"");
-            builder.append(",");
-            builder.append("\"friends_count\":" + friendsCount);
-            builder.append(",");
-            builder.append("\"status_count\":" + statusesCount);
-            builder.append(",");
-            builder.append("\"name\":" + "\"" + name + "\"");
-            builder.append(",");
-            builder.append("\"followers_count\":" + followersCount);
-            builder.append("}");
-            return builder.toString();
-        }
-
-    }
-
-    public static class Date {
-
-        private int day;
-        private int month;
-        private int year;
-
-        public Date(int month, int day, int year) {
-            this.month = month;
-            this.day = day;
-            this.year = year;
-        }
-
-        public void reset(int month, int day, int year) {
-            this.month = month;
-            this.day = day;
-            this.year = year;
-        }
-
-        public int getDay() {
-            return day;
-        }
-
-        public int getMonth() {
-            return month;
-        }
-
-        public int getYear() {
-            return year;
-        }
-
-        public Date() {
-        }
-
-        public String toString() {
-            StringBuilder builder = new StringBuilder();
-            builder.append("date");
-            builder.append("(\"");
-            builder.append(year);
-            builder.append("-");
-            builder.append(month < 10 ? "0" + month : "" + month);
-            builder.append("-");
-            builder.append(day < 10 ? "0" + day : "" + day);
-            builder.append("\")");
-            return builder.toString();
-        }
-
-        public void setDay(int day) {
-            this.day = day;
-        }
-
-        public void setMonth(int month) {
-            this.month = month;
-        }
-
-        public void setYear(int year) {
-            this.year = year;
-        }
-    }
-
-    public static String[] lastNames = { "Hoopengarner", "Harrow", "Gardner", "Blyant", "Best", "Buttermore", "Gronko",
-            "Mayers", "Countryman", "Neely", "Ruhl", "Taggart", "Bash", "Cason", "Hil", "Zalack", "Mingle", "Carr",
-            "Rohtin", "Wardle", "Pullman", "Wire", "Kellogg", "Hiles", "Keppel", "Bratton", "Sutton", "Wickes",
-            "Muller", "Friedline", "Llora", "Elizabeth", "Anderson", "Gaskins", "Rifler", "Vinsant", "Stanfield",
-            "Black", "Guest", "Hujsak", "Carter", "Weidemann", "Hays", "Patton", "Hayhurst", "Paynter", "Cressman",
-            "Fiddler", "Evans", "Sherlock", "Woodworth", "Jackson", "Bloise", "Schneider", "Ring", "Kepplinger",
-            "James", "Moon", "Bennett", "Bashline", "Ryals", "Zeal", "Christman", "Milliron", "Nash", "Ewing", "Camp",
-            "Mason", "Richardson", "Bowchiew", "Hahn", "Wilson", "Wood", "Toyley", "Williamson", "Lafortune", "Errett",
-            "Saltser", "Hirleman", "Brindle", "Newbiggin", "Ulery", "Lambert", "Shick", "Kuster", "Moore", "Finck",
-            "Powell", "Jolce", "Townsend", "Sauter", "Cowher", "Wolfe", "Cavalet", "Porter", "Laborde", "Ballou",
-            "Murray", "Stoddard", "Pycroft", "Milne", "King", "Todd", "Staymates", "Hall", "Romanoff", "Keilbach",
-            "Sandford", "Hamilton", "Fye", "Kline", "Weeks", "Mcelroy", "Mccullough", "Bryant", "Hill", "Moore",
-            "Ledgerwood", "Prevatt", "Eckert", "Read", "Hastings", "Doverspike", "Allshouse", "Bryan", "Mccallum",
-            "Lombardi", "Mckendrick", "Cattley", "Barkley", "Steiner", "Finlay", "Priebe", "Armitage", "Hall", "Elder",
-            "Erskine", "Hatcher", "Walker", "Pearsall", "Dunkle", "Haile", "Adams", "Miller", "Newbern", "Basinger",
-            "Fuhrer", "Brinigh", "Mench", "Blackburn", "Bastion", "Mccune", "Bridger", "Hynes", "Quinn", "Courtney",
-            "Geddinge", "Field", "Seelig", "Cable", "Earhart", "Harshman", "Roby", "Beals", "Berry", "Reed", "Hector",
-            "Pittman", "Haverrman", "Kalp", "Briner", "Joghs", "Cowart", "Close", "Wynne", "Harden", "Weldy",
-            "Stephenson", "Hildyard", "Moberly", "Wells", "Mackendoerfer", "Fisher", "Oppie", "Oneal", "Churchill",
-            "Keister", "Alice", "Tavoularis", "Fisher", "Hair", "Burns", "Veith", "Wile", "Fuller", "Fields", "Clark",
-            "Randolph", "Stone", "Mcclymonds", "Holtzer", "Donkin", "Wilkinson", "Rosensteel", "Albright", "Stahl",
-            "Fox", "Kadel", "Houser", "Hanseu", "Henderson", "Davis", "Bicknell", "Swain", "Mercer", "Holdeman",
-            "Enderly", "Caesar", "Margaret", "Munshower", "Elless", "Lucy", "Feufer", "Schofield", "Graham",
-            "Blatenberger", "Benford", "Akers", "Campbell", "Ann", "Sadley", "Ling", "Gongaware", "Schmidt", "Endsley",
-            "Groah", "Flanders", "Reichard", "Lowstetter", "Sandblom", "Griffis", "Basmanoff", "Coveney", "Hawker",
-            "Archibald", "Hutton", "Barnes", "Diegel", "Raybould", "Focell", "Breitenstein", "Murray", "Chauvin",
-            "Busk", "Pheleps", "Teagarden", "Northey", "Baumgartner", "Fleming", "Harris", "Parkinson", "Carpenter",
-            "Whirlow", "Bonner", "Wortman", "Rogers", "Scott", "Lowe", "Mckee", "Huston", "Bullard", "Throckmorton",
-            "Rummel", "Mathews", "Dull", "Saline", "Tue", "Woolery", "Lalty", "Schrader", "Ramsey", "Eisenmann",
-            "Philbrick", "Sybilla", "Wallace", "Fonblanque", "Paul", "Orbell", "Higgens", "Casteel", "Franks",
-            "Demuth", "Eisenman", "Hay", "Robinson", "Fischer", "Hincken", "Wylie", "Leichter", "Bousum",
-            "Littlefield", "Mcdonald", "Greif", "Rhodes", "Wall", "Steele", "Baldwin", "Smith", "Stewart", "Schere",
-            "Mary", "Aultman", "Emrick", "Guess", "Mitchell", "Painter", "Aft", "Hasely", "Weldi", "Loewentsein",
-            "Poorbaugh", "Kepple", "Noton", "Judge", "Jackson", "Style", "Adcock", "Diller", "Marriman", "Johnston",
-            "Children", "Monahan", "Ehret", "Shaw", "Congdon", "Pinney", "Millard", "Crissman", "Tanner", "Rose",
-            "Knisely", "Cypret", "Sommer", "Poehl", "Hardie", "Bender", "Overholt", "Gottwine", "Beach", "Leslie",
-            "Trevithick", "Langston", "Magor", "Shotts", "Howe", "Hunter", "Cross", "Kistler", "Dealtry", "Christner",
-            "Pennington", "Thorley", "Eckhardstein", "Van", "Stroh", "Stough", "Stall", "Beedell", "Shea", "Garland",
-            "Mays", "Pritchard", "Frankenberger", "Rowley", "Lane", "Baum", "Alliman", "Park", "Jardine", "Butler",
-            "Cherry", "Kooser", "Baxter", "Billimek", "Downing", "Hurst", "Wood", "Baird", "Watkins", "Edwards",
-            "Kemerer", "Harding", "Owens", "Eiford", "Keener", "Garneis", "Fiscina", "Mang", "Draudy", "Mills",
-            "Gibson", "Reese", "Todd", "Ramos", "Levett", "Wilks", "Ward", "Mosser", "Dunlap", "Kifer", "Christopher",
-            "Ashbaugh", "Wynter", "Rawls", "Cribbs", "Haynes", "Thigpen", "Schreckengost", "Bishop", "Linton",
-            "Chapman", "James", "Jerome", "Hook", "Omara", "Houston", "Maclagan", "Sandys", "Pickering", "Blois",
-            "Dickson", "Kemble", "Duncan", "Woodward", "Southern", "Henley", "Treeby", "Cram", "Elsas", "Driggers",
-            "Warrick", "Overstreet", "Hindman", "Buck", "Sulyard", "Wentzel", "Swink", "Butt", "Schaeffer",
-            "Hoffhants", "Bould", "Willcox", "Lotherington", "Bagley", "Graff", "White", "Wheeler", "Sloan",
-            "Rodacker", "Hanford", "Jowers", "Kunkle", "Cass", "Powers", "Gilman", "Mcmichaels", "Hobbs", "Herndon",
-            "Prescott", "Smail", "Mcdonald", "Biery", "Orner", "Richards", "Mueller", "Isaman", "Bruxner", "Goodman",
-            "Barth", "Turzanski", "Vorrasi", "Stainforth", "Nehling", "Rahl", "Erschoff", "Greene", "Mckinnon",
-            "Reade", "Smith", "Pery", "Roose", "Greenwood", "Weisgarber", "Curry", "Holts", "Zadovsky", "Parrish",
-            "Putnam", "Munson", "Mcindoe", "Nickolson", "Brooks", "Bollinger", "Stroble", "Siegrist", "Fulton",
-            "Tomey", "Zoucks", "Roberts", "Otis", "Clarke", "Easter", "Johnson", "Fylbrigg", "Taylor", "Swartzbaugh",
-            "Weinstein", "Gadow", "Sayre", "Marcotte", "Wise", "Atweeke", "Mcfall", "Napier", "Eisenhart", "Canham",
-            "Sealis", "Baughman", "Gertraht", "Losey", "Laurence", "Eva", "Pershing", "Kern", "Pirl", "Rega",
-            "Sanborn", "Kanaga", "Sanders", "Anderson", "Dickinson", "Osteen", "Gettemy", "Crom", "Snyder", "Reed",
-            "Laurenzi", "Riggle", "Tillson", "Fowler", "Raub", "Jenner", "Koepple", "Soames", "Goldvogel", "Dimsdale",
-            "Zimmer", "Giesen", "Baker", "Beail", "Mortland", "Bard", "Sanner", "Knopsnider", "Jenkins", "Bailey",
-            "Werner", "Barrett", "Faust", "Agg", "Tomlinson", "Williams", "Little", "Greenawalt", "Wells", "Wilkins",
-            "Gisiko", "Bauerle", "Harrold", "Prechtl", "Polson", "Faast", "Winton", "Garneys", "Peters", "Potter",
-            "Porter", "Tennant", "Eve", "Dugger", "Jones", "Burch", "Cowper", "Whittier" };
-
-    public static String[] firstNames = { "Albert", "Jacquelin", "Dona", "Alia", "Mayme", "Genoveva", "Emma", "Lena",
-            "Melody", "Vilma", "Katelyn", "Jeremy", "Coral", "Leann", "Lita", "Gilda", "Kayla", "Alvina", "Maranda",
-            "Verlie", "Khadijah", "Karey", "Patrice", "Kallie", "Corey", "Mollie", "Daisy", "Melanie", "Sarita",
-            "Nichole", "Pricilla", "Terresa", "Berneice", "Arianne", "Brianne", "Lavinia", "Ulrike", "Lesha", "Adell",
-            "Ardelle", "Marisha", "Laquita", "Karyl", "Maryjane", "Kendall", "Isobel", "Raeann", "Heike", "Barbera",
-            "Norman", "Yasmine", "Nevada", "Mariam", "Edith", "Eugena", "Lovie", "Maren", "Bennie", "Lennie", "Tamera",
-            "Crystal", "Randi", "Anamaria", "Chantal", "Jesenia", "Avis", "Shela", "Randy", "Laurena", "Sharron",
-            "Christiane", "Lorie", "Mario", "Elizabeth", "Reina", "Adria", "Lakisha", "Brittni", "Azzie", "Dori",
-            "Shaneka", "Asuncion", "Katheryn", "Laurice", "Sharita", "Krystal", "Reva", "Inger", "Alpha", "Makeda",
-            "Anabel", "Loni", "Tiara", "Meda", "Latashia", "Leola", "Chin", "Daisey", "Ivory", "Amalia", "Logan",
-            "Tyler", "Kyong", "Carolann", "Maryetta", "Eufemia", "Anya", "Doreatha", "Lorna", "Rutha", "Ehtel",
-            "Debbie", "Chassidy", "Sang", "Christa", "Lottie", "Chun", "Karine", "Peggie", "Amina", "Melany", "Alayna",
-            "Scott", "Romana", "Naomi", "Christiana", "Salena", "Taunya", "Mitsue", "Regina", "Chelsie", "Charity",
-            "Dacia", "Aletha", "Latosha", "Lia", "Tamica", "Chery", "Bianca", "Shu", "Georgianne", "Myriam", "Austin",
-            "Wan", "Mallory", "Jana", "Georgie", "Jenell", "Kori", "Vicki", "Delfina", "June", "Mellisa", "Catherina",
-            "Claudie", "Tynisha", "Dayle", "Enriqueta", "Belen", "Pia", "Sarai", "Rosy", "Renay", "Kacie", "Frieda",
-            "Cayla", "Elissa", "Claribel", "Sabina", "Mackenzie", "Raina", "Cira", "Mitzie", "Aubrey", "Serafina",
-            "Maria", "Katharine", "Esperanza", "Sung", "Daria", "Billye", "Stefanie", "Kasha", "Holly", "Suzanne",
-            "Inga", "Flora", "Andria", "Genevie", "Eladia", "Janet", "Erline", "Renna", "Georgeanna", "Delorse",
-            "Elnora", "Rudy", "Rima", "Leanora", "Letisha", "Love", "Alverta", "Pinkie", "Domonique", "Jeannie",
-            "Jose", "Jacqueline", "Tara", "Lily", "Erna", "Tennille", "Galina", "Tamala", "Kirby", "Nichelle",
-            "Myesha", "Farah", "Santa", "Ludie", "Kenia", "Yee", "Micheline", "Maryann", "Elaina", "Ethelyn",
-            "Emmaline", "Shanell", "Marina", "Nila", "Alane", "Shakira", "Dorris", "Belinda", "Elois", "Barbie",
-            "Carita", "Gisela", "Lura", "Fransisca", "Helga", "Peg", "Leonarda", "Earlie", "Deetta", "Jacquetta",
-            "Blossom", "Kayleigh", "Deloras", "Keshia", "Christinia", "Dulce", "Bernie", "Sheba", "Lashanda", "Tula",
-            "Claretta", "Kary", "Jeanette", "Lupita", "Lenora", "Hisako", "Sherise", "Glynda", "Adela", "Chia",
-            "Sudie", "Mindy", "Caroyln", "Lindsey", "Xiomara", "Mercedes", "Onie", "Loan", "Alexis", "Tommie",
-            "Donette", "Monica", "Soo", "Camellia", "Lavera", "Valery", "Ariana", "Sophia", "Loris", "Ginette",
-            "Marielle", "Tari", "Julissa", "Alesia", "Suzanna", "Emelda", "Erin", "Ladawn", "Sherilyn", "Candice",
-            "Nereida", "Fairy", "Carl", "Joel", "Marilee", "Gracia", "Cordie", "So", "Shanita", "Drew", "Cassie",
-            "Sherie", "Marget", "Norma", "Delois", "Debera", "Chanelle", "Catarina", "Aracely", "Carlene", "Tricia",
-            "Aleen", "Katharina", "Marguerita", "Guadalupe", "Margorie", "Mandie", "Kathe", "Chong", "Sage", "Faith",
-            "Maryrose", "Stephany", "Ivy", "Pauline", "Susie", "Cristen", "Jenifer", "Annette", "Debi", "Karmen",
-            "Luci", "Shayla", "Hope", "Ocie", "Sharie", "Tami", "Breana", "Kerry", "Rubye", "Lashay", "Sondra",
-            "Katrice", "Brunilda", "Cortney", "Yan", "Zenobia", "Penni", "Addie", "Lavona", "Noel", "Anika",
-            "Herlinda", "Valencia", "Bunny", "Tory", "Victoria", "Carrie", "Mikaela", "Wilhelmina", "Chung",
-            "Hortencia", "Gerda", "Wen", "Ilana", "Sibyl", "Candida", "Victorina", "Chantell", "Casie", "Emeline",
-            "Dominica", "Cecila", "Delora", "Miesha", "Nova", "Sally", "Ronald", "Charlette", "Francisca", "Mina",
-            "Jenna", "Loraine", "Felisa", "Lulu", "Page", "Lyda", "Babara", "Flor", "Walter", "Chan", "Sherika",
-            "Kala", "Luna", "Vada", "Syreeta", "Slyvia", "Karin", "Renata", "Robbi", "Glenda", "Delsie", "Lizzie",
-            "Genia", "Caitlin", "Bebe", "Cory", "Sam", "Leslee", "Elva", "Caren", "Kasie", "Leticia", "Shannan",
-            "Vickey", "Sandie", "Kyle", "Chang", "Terrilyn", "Sandra", "Elida", "Marketta", "Elsy", "Tu", "Carman",
-            "Ashlie", "Vernia", "Albertine", "Vivian", "Elba", "Bong", "Margy", "Janetta", "Xiao", "Teofila", "Danyel",
-            "Nickole", "Aleisha", "Tera", "Cleotilde", "Dara", "Paulita", "Isela", "Maricela", "Rozella", "Marivel",
-            "Aurora", "Melissa", "Carylon", "Delinda", "Marvella", "Candelaria", "Deidre", "Tawanna", "Myrtie",
-            "Milagro", "Emilie", "Coretta", "Ivette", "Suzann", "Ammie", "Lucina", "Lory", "Tena", "Eleanor",
-            "Cherlyn", "Tiana", "Brianna", "Myra", "Flo", "Carisa", "Kandi", "Erlinda", "Jacqulyn", "Fermina", "Riva",
-            "Palmira", "Lindsay", "Annmarie", "Tamiko", "Carline", "Amelia", "Quiana", "Lashawna", "Veola", "Belva",
-            "Marsha", "Verlene", "Alex", "Leisha", "Camila", "Mirtha", "Melva", "Lina", "Arla", "Cythia", "Towanda",
-            "Aracelis", "Tasia", "Aurore", "Trinity", "Bernadine", "Farrah", "Deneen", "Ines", "Betty", "Lorretta",
-            "Dorethea", "Hertha", "Rochelle", "Juli", "Shenika", "Yung", "Lavon", "Deeanna", "Nakia", "Lynnette",
-            "Dinorah", "Nery", "Elene", "Carolee", "Mira", "Franchesca", "Lavonda", "Leida", "Paulette", "Dorine",
-            "Allegra", "Keva", "Jeffrey", "Bernardina", "Maryln", "Yoko", "Faviola", "Jayne", "Lucilla", "Charita",
-            "Ewa", "Ella", "Maggie", "Ivey", "Bettie", "Jerri", "Marni", "Bibi", "Sabrina", "Sarah", "Marleen",
-            "Katherin", "Remona", "Jamika", "Antonina", "Oliva", "Lajuana", "Fonda", "Sigrid", "Yael", "Billi",
-            "Verona", "Arminda", "Mirna", "Tesha", "Katheleen", "Bonita", "Kamilah", "Patrica", "Julio", "Shaina",
-            "Mellie", "Denyse", "Deandrea", "Alena", "Meg", "Kizzie", "Krissy", "Karly", "Alleen", "Yahaira", "Lucie",
-            "Karena", "Elaine", "Eloise", "Buena", "Marianela", "Renee", "Nan", "Carolynn", "Windy", "Avril", "Jane",
-            "Vida", "Thea", "Marvel", "Rosaline", "Tifany", "Robena", "Azucena", "Carlota", "Mindi", "Andera", "Jenny",
-            "Courtney", "Lyndsey", "Willette", "Kristie", "Shaniqua", "Tabatha", "Ngoc", "Una", "Marlena", "Louetta",
-            "Vernie", "Brandy", "Jacquelyne", "Jenelle", "Elna", "Erminia", "Ida", "Audie", "Louis", "Marisol",
-            "Shawana", "Harriette", "Karol", "Kitty", "Esmeralda", "Vivienne", "Eloisa", "Iris", "Jeanice", "Cammie",
-            "Jacinda", "Shena", "Floy", "Theda", "Lourdes", "Jayna", "Marg", "Kati", "Tanna", "Rosalyn", "Maxima",
-            "Soon", "Angelika", "Shonna", "Merle", "Kassandra", "Deedee", "Heidi", "Marti", "Renae", "Arleen",
-            "Alfredia", "Jewell", "Carley", "Pennie", "Corina", "Tonisha", "Natividad", "Lilliana", "Darcie", "Shawna",
-            "Angel", "Piedad", "Josefa", "Rebbeca", "Natacha", "Nenita", "Petrina", "Carmon", "Chasidy", "Temika",
-            "Dennise", "Renetta", "Augusta", "Shirlee", "Valeri", "Casimira", "Janay", "Berniece", "Deborah", "Yaeko",
-            "Mimi", "Digna", "Irish", "Cher", "Yong", "Lucila", "Jimmie", "Junko", "Lezlie", "Waneta", "Sandee",
-            "Marquita", "Eura", "Freeda", "Annabell", "Laree", "Jaye", "Wendy", "Toshia", "Kylee", "Aleta", "Emiko",
-            "Clorinda", "Sixta", "Audrea", "Juanita", "Birdie", "Reita", "Latanya", "Nia", "Leora", "Laurine",
-            "Krysten", "Jerrie", "Chantel", "Ira", "Sena", "Andre", "Jann", "Marla", "Precious", "Katy", "Gabrielle",
-            "Yvette", "Brook", "Shirlene", "Eldora", "Laura", "Milda", "Euna", "Jettie", "Debora", "Lise", "Edythe",
-            "Leandra", "Shandi", "Araceli", "Johanne", "Nieves", "Denese", "Carmelita", "Nohemi", "Annice", "Natalie",
-            "Yolande", "Jeffie", "Vashti", "Vickie", "Obdulia", "Youlanda", "Lupe", "Tomoko", "Monserrate", "Domitila",
-            "Etsuko", "Adrienne", "Lakesha", "Melissia", "Odessa", "Meagan", "Veronika", "Jolyn", "Isabelle", "Leah",
-            "Rhiannon", "Gianna", "Audra", "Sommer", "Renate", "Perla", "Thao", "Myong", "Lavette", "Mark", "Emilia",
-            "Ariane", "Karl", "Dorie", "Jacquie", "Mia", "Malka", "Shenita", "Tashina", "Christine", "Cherri", "Roni",
-            "Fran", "Mildred", "Sara", "Clarissa", "Fredia", "Elease", "Samuel", "Earlene", "Vernita", "Mae", "Concha",
-            "Renea", "Tamekia", "Hye", "Ingeborg", "Tessa", "Kelly", "Kristin", "Tam", "Sacha", "Kanisha", "Jillian",
-            "Tiffanie", "Ashlee", "Madelyn", "Donya", "Clementine", "Mickie", "My", "Zena", "Terrie", "Samatha",
-            "Gertie", "Tarra", "Natalia", "Sharlene", "Evie", "Shalon", "Rosalee", "Numbers", "Jodi", "Hattie",
-            "Naoma", "Valene", "Whitley", "Claude", "Alline", "Jeanne", "Camie", "Maragret", "Viola", "Kris", "Marlo",
-            "Arcelia", "Shari", "Jalisa", "Corrie", "Eleonor", "Angelyn", "Merry", "Lauren", "Melita", "Gita",
-            "Elenor", "Aurelia", "Janae", "Lyndia", "Margeret", "Shawanda", "Rolande", "Shirl", "Madeleine", "Celinda",
-            "Jaleesa", "Shemika", "Joye", "Tisa", "Trudie", "Kathrine", "Clarita", "Dinah", "Georgia", "Antoinette",
-            "Janis", "Suzette", "Sherri", "Herta", "Arie", "Hedy", "Cassi", "Audrie", "Caryl", "Jazmine", "Jessica",
-            "Beverly", "Elizbeth", "Marylee", "Londa", "Fredericka", "Argelia", "Nana", "Donnette", "Damaris",
-            "Hailey", "Jamee", "Kathlene", "Glayds", "Lydia", "Apryl", "Verla", "Adam", "Concepcion", "Zelda",
-            "Shonta", "Vernice", "Detra", "Meghann", "Sherley", "Sheri", "Kiyoko", "Margarita", "Adaline", "Mariela",
-            "Velda", "Ailene", "Juliane", "Aiko", "Edyth", "Cecelia", "Shavon", "Florance", "Madeline", "Rheba",
-            "Deann", "Ignacia", "Odelia", "Heide", "Mica", "Jennette", "Maricruz", "Ouida", "Darcy", "Laure",
-            "Justina", "Amada", "Laine", "Cruz", "Sunny", "Francene", "Roxanna", "Nam", "Nancie", "Deanna", "Letty",
-            "Britni", "Kazuko", "Lacresha", "Simon", "Caleb", "Milton", "Colton", "Travis", "Miles", "Jonathan",
-            "Logan", "Rolf", "Emilio", "Roberto", "Marcus", "Tim", "Delmar", "Devon", "Kurt", "Edward", "Jeffrey",
-            "Elvis", "Alfonso", "Blair", "Wm", "Sheldon", "Leonel", "Michal", "Federico", "Jacques", "Leslie",
-            "Augustine", "Hugh", "Brant", "Hong", "Sal", "Modesto", "Curtis", "Jefferey", "Adam", "John", "Glenn",
-            "Vance", "Alejandro", "Refugio", "Lucio", "Demarcus", "Chang", "Huey", "Neville", "Preston", "Bert",
-            "Abram", "Foster", "Jamison", "Kirby", "Erich", "Manual", "Dustin", "Derrick", "Donnie", "Jospeh", "Chris",
-            "Josue", "Stevie", "Russ", "Stanley", "Nicolas", "Samuel", "Waldo", "Jake", "Max", "Ernest", "Reinaldo",
-            "Rene", "Gale", "Morris", "Nathan", "Maximo", "Courtney", "Theodore", "Octavio", "Otha", "Delmer",
-            "Graham", "Dean", "Lowell", "Myles", "Colby", "Boyd", "Adolph", "Jarrod", "Nick", "Mark", "Clinton", "Kim",
-            "Sonny", "Dalton", "Tyler", "Jody", "Orville", "Luther", "Rubin", "Hollis", "Rashad", "Barton", "Vicente",
-            "Ted", "Rick", "Carmine", "Clifton", "Gayle", "Christopher", "Jessie", "Bradley", "Clay", "Theo", "Josh",
-            "Mitchell", "Boyce", "Chung", "Eugenio", "August", "Norbert", "Sammie", "Jerry", "Adan", "Edmundo",
-            "Homer", "Hilton", "Tod", "Kirk", "Emmett", "Milan", "Quincy", "Jewell", "Herb", "Steve", "Carmen",
-            "Bobby", "Odis", "Daron", "Jeremy", "Carl", "Hunter", "Tuan", "Thurman", "Asa", "Brenton", "Shane",
-            "Donny", "Andreas", "Teddy", "Dario", "Cyril", "Hoyt", "Teodoro", "Vincenzo", "Hilario", "Daren",
-            "Agustin", "Marquis", "Ezekiel", "Brendan", "Johnson", "Alden", "Richie", "Granville", "Chad", "Joseph",
-            "Lamont", "Jordon", "Gilberto", "Chong", "Rosendo", "Eddy", "Rob", "Dewitt", "Andre", "Titus", "Russell",
-            "Rigoberto", "Dick", "Garland", "Gabriel", "Hank", "Darius", "Ignacio", "Lazaro", "Johnie", "Mauro",
-            "Edmund", "Trent", "Harris", "Osvaldo", "Marvin", "Judson", "Rodney", "Randall", "Renato", "Richard",
-            "Denny", "Jon", "Doyle", "Cristopher", "Wilson", "Christian", "Jamie", "Roland", "Ken", "Tad", "Romeo",
-            "Seth", "Quinton", "Byron", "Ruben", "Darrel", "Deandre", "Broderick", "Harold", "Ty", "Monroe", "Landon",
-            "Mohammed", "Angel", "Arlen", "Elias", "Andres", "Carlton", "Numbers", "Tony", "Thaddeus", "Issac",
-            "Elmer", "Antoine", "Ned", "Fermin", "Grover", "Benito", "Abdul", "Cortez", "Eric", "Maxwell", "Coy",
-            "Gavin", "Rich", "Andy", "Del", "Giovanni", "Major", "Efren", "Horacio", "Joaquin", "Charles", "Noah",
-            "Deon", "Pasquale", "Reed", "Fausto", "Jermaine", "Irvin", "Ray", "Tobias", "Carter", "Yong", "Jorge",
-            "Brent", "Daniel", "Zane", "Walker", "Thad", "Shaun", "Jaime", "Mckinley", "Bradford", "Nathanial",
-            "Jerald", "Aubrey", "Virgil", "Abel", "Philip", "Chester", "Chadwick", "Dominick", "Britt", "Emmitt",
-            "Ferdinand", "Julian", "Reid", "Santos", "Dwain", "Morgan", "James", "Marion", "Micheal", "Eddie", "Brett",
-            "Stacy", "Kerry", "Dale", "Nicholas", "Darrick", "Freeman", "Scott", "Newton", "Sherman", "Felton",
-            "Cedrick", "Winfred", "Brad", "Fredric", "Dewayne", "Virgilio", "Reggie", "Edgar", "Heriberto", "Shad",
-            "Timmy", "Javier", "Nestor", "Royal", "Lynn", "Irwin", "Ismael", "Jonas", "Wiley", "Austin", "Kieth",
-            "Gonzalo", "Paris", "Earnest", "Arron", "Jarred", "Todd", "Erik", "Maria", "Chauncey", "Neil", "Conrad",
-            "Maurice", "Roosevelt", "Jacob", "Sydney", "Lee", "Basil", "Louis", "Rodolfo", "Rodger", "Roman", "Corey",
-            "Ambrose", "Cristobal", "Sylvester", "Benton", "Franklin", "Marcelo", "Guillermo", "Toby", "Jeramy",
-            "Donn", "Danny", "Dwight", "Clifford", "Valentine", "Matt", "Jules", "Kareem", "Ronny", "Lonny", "Son",
-            "Leopoldo", "Dannie", "Gregg", "Dillon", "Orlando", "Weston", "Kermit", "Damian", "Abraham", "Walton",
-            "Adrian", "Rudolf", "Will", "Les", "Norberto", "Fred", "Tyrone", "Ariel", "Terry", "Emmanuel", "Anderson",
-            "Elton", "Otis", "Derek", "Frankie", "Gino", "Lavern", "Jarod", "Kenny", "Dane", "Keenan", "Bryant",
-            "Eusebio", "Dorian", "Ali", "Lucas", "Wilford", "Jeremiah", "Warner", "Woodrow", "Galen", "Bob",
-            "Johnathon", "Amado", "Michel", "Harry", "Zachery", "Taylor", "Booker", "Hershel", "Mohammad", "Darrell",
-            "Kyle", "Stuart", "Marlin", "Hyman", "Jeffery", "Sidney", "Merrill", "Roy", "Garrett", "Porter", "Kenton",
-            "Giuseppe", "Terrance", "Trey", "Felix", "Buster", "Von", "Jackie", "Linwood", "Darron", "Francisco",
-            "Bernie", "Diego", "Brendon", "Cody", "Marco", "Ahmed", "Antonio", "Vince", "Brooks", "Kendrick", "Ross",
-            "Mohamed", "Jim", "Benny", "Gerald", "Pablo", "Charlie", "Antony", "Werner", "Hipolito", "Minh", "Mel",
-            "Derick", "Armand", "Fidel", "Lewis", "Donnell", "Desmond", "Vaughn", "Guadalupe", "Keneth", "Rodrick",
-            "Spencer", "Chas", "Gus", "Harlan", "Wes", "Carmelo", "Jefferson", "Gerard", "Jarvis", "Haywood", "Hayden",
-            "Sergio", "Gene", "Edgardo", "Colin", "Horace", "Dominic", "Aldo", "Adolfo", "Juan", "Man", "Lenard",
-            "Clement", "Everett", "Hal", "Bryon", "Mason", "Emerson", "Earle", "Laurence", "Columbus", "Lamar",
-            "Douglas", "Ian", "Fredrick", "Marc", "Loren", "Wallace", "Randell", "Noble", "Ricardo", "Rory", "Lindsey",
-            "Boris", "Bill", "Carlos", "Domingo", "Grant", "Craig", "Ezra", "Matthew", "Van", "Rudy", "Danial",
-            "Brock", "Maynard", "Vincent", "Cole", "Damion", "Ellsworth", "Marcel", "Markus", "Rueben", "Tanner",
-            "Reyes", "Hung", "Kennith", "Lindsay", "Howard", "Ralph", "Jed", "Monte", "Garfield", "Avery", "Bernardo",
-            "Malcolm", "Sterling", "Ezequiel", "Kristofer", "Luciano", "Casey", "Rosario", "Ellis", "Quintin",
-            "Trevor", "Miquel", "Jordan", "Arthur", "Carson", "Tyron", "Grady", "Walter", "Jonathon", "Ricky",
-            "Bennie", "Terrence", "Dion", "Dusty", "Roderick", "Isaac", "Rodrigo", "Harrison", "Zack", "Dee", "Devin",
-            "Rey", "Ulysses", "Clint", "Greg", "Dino", "Frances", "Wade", "Franklyn", "Jude", "Bradly", "Salvador",
-            "Rocky", "Weldon", "Lloyd", "Milford", "Clarence", "Alec", "Allan", "Bobbie", "Oswaldo", "Wilfred",
-            "Raleigh", "Shelby", "Willy", "Alphonso", "Arnoldo", "Robbie", "Truman", "Nicky", "Quinn", "Damien",
-            "Lacy", "Marcos", "Parker", "Burt", "Carroll", "Denver", "Buck", "Dong", "Normand", "Billie", "Edwin",
-            "Troy", "Arden", "Rusty", "Tommy", "Kenneth", "Leo", "Claud", "Joel", "Kendall", "Dante", "Milo", "Cruz",
-            "Lucien", "Ramon", "Jarrett", "Scottie", "Deshawn", "Ronnie", "Pete", "Alonzo", "Whitney", "Stefan",
-            "Sebastian", "Edmond", "Enrique", "Branden", "Leonard", "Loyd", "Olin", "Ron", "Rhett", "Frederic",
-            "Orval", "Tyrell", "Gail", "Eli", "Antonia", "Malcom", "Sandy", "Stacey", "Nickolas", "Hosea", "Santo",
-            "Oscar", "Fletcher", "Dave", "Patrick", "Dewey", "Bo", "Vito", "Blaine", "Randy", "Robin", "Winston",
-            "Sammy", "Edwardo", "Manuel", "Valentin", "Stanford", "Filiberto", "Buddy", "Zachariah", "Johnnie",
-            "Elbert", "Paul", "Isreal", "Jerrold", "Leif", "Owen", "Sung", "Junior", "Raphael", "Josef", "Donte",
-            "Allen", "Florencio", "Raymond", "Lauren", "Collin", "Eliseo", "Bruno", "Martin", "Lyndon", "Kurtis",
-            "Salvatore", "Erwin", "Michael", "Sean", "Davis", "Alberto", "King", "Rolland", "Joe", "Tory", "Chase",
-            "Dallas", "Vernon", "Beau", "Terrell", "Reynaldo", "Monty", "Jame", "Dirk", "Florentino", "Reuben", "Saul",
-            "Emory", "Esteban", "Michale", "Claudio", "Jacinto", "Kelley", "Levi", "Andrea", "Lanny", "Wendell",
-            "Elwood", "Joan", "Felipe", "Palmer", "Elmo", "Lawrence", "Hubert", "Rudolph", "Duane", "Cordell",
-            "Everette", "Mack", "Alan", "Efrain", "Trenton", "Bryan", "Tom", "Wilmer", "Clyde", "Chance", "Lou",
-            "Brain", "Justin", "Phil", "Jerrod", "George", "Kris", "Cyrus", "Emery", "Rickey", "Lincoln", "Renaldo",
-            "Mathew", "Luke", "Dwayne", "Alexis", "Jackson", "Gil", "Marty", "Burton", "Emil", "Glen", "Willian",
-            "Clemente", "Keven", "Barney", "Odell", "Reginald", "Aurelio", "Damon", "Ward", "Gustavo", "Harley",
-            "Peter", "Anibal", "Arlie", "Nigel", "Oren", "Zachary", "Scot", "Bud", "Wilbert", "Bart", "Josiah",
-            "Marlon", "Eldon", "Darryl", "Roger", "Anthony", "Omer", "Francis", "Patricia", "Moises", "Chuck",
-            "Waylon", "Hector", "Jamaal", "Cesar", "Julius", "Rex", "Norris", "Ollie", "Isaias", "Quentin", "Graig",
-            "Lyle", "Jeffry", "Karl", "Lester", "Danilo", "Mike", "Dylan", "Carlo", "Ryan", "Leon", "Percy", "Lucius",
-            "Jamel", "Lesley", "Joey", "Cornelius", "Rico", "Arnulfo", "Chet", "Margarito", "Ernie", "Nathanael",
-            "Amos", "Cleveland", "Luigi", "Alfonzo", "Phillip", "Clair", "Elroy", "Alva", "Hans", "Shon", "Gary",
-            "Jesus", "Cary", "Silas", "Keith", "Israel", "Willard", "Randolph", "Dan", "Adalberto", "Claude",
-            "Delbert", "Garry", "Mary", "Larry", "Riley", "Robt", "Darwin", "Barrett", "Steven", "Kelly", "Herschel",
-            "Darnell", "Scotty", "Armando", "Miguel", "Lawerence", "Wesley", "Garth", "Carol", "Micah", "Alvin",
-            "Billy", "Earl", "Pat", "Brady", "Cory", "Carey", "Bernard", "Jayson", "Nathaniel", "Gaylord", "Archie",
-            "Dorsey", "Erasmo", "Angelo", "Elisha", "Long", "Augustus", "Hobert", "Drew", "Stan", "Sherwood",
-            "Lorenzo", "Forrest", "Shawn", "Leigh", "Hiram", "Leonardo", "Gerry", "Myron", "Hugo", "Alvaro", "Leland",
-            "Genaro", "Jamey", "Stewart", "Elden", "Irving", "Olen", "Antone", "Freddy", "Lupe", "Joshua", "Gregory",
-            "Andrew", "Sang", "Wilbur", "Gerardo", "Merlin", "Williams", "Johnny", "Alex", "Tommie", "Jimmy",
-            "Donovan", "Dexter", "Gaston", "Tracy", "Jeff", "Stephen", "Berry", "Anton", "Darell", "Fritz", "Willis",
-            "Noel", "Mariano", "Crawford", "Zoey", "Alex", "Brianna", "Carlie", "Lloyd", "Cal", "Astor", "Randolf",
-            "Magdalene", "Trevelyan", "Terance", "Roy", "Kermit", "Harriett", "Crystal", "Laurinda", "Kiersten",
-            "Phyllida", "Liz", "Bettie", "Rena", "Colten", "Berenice", "Sindy", "Wilma", "Amos", "Candi", "Ritchie",
-            "Dirk", "Kathlyn", "Callista", "Anona", "Flossie", "Sterling", "Calista", "Regan", "Erica", "Jeana",
-            "Keaton", "York", "Nolan", "Daniel", "Benton", "Tommie", "Serenity", "Deanna", "Chas", "Heron", "Marlyn",
-            "Xylia", "Tristin", "Lyndon", "Andriana", "Madelaine", "Maddison", "Leila", "Chantelle", "Audrey",
-            "Connor", "Daley", "Tracee", "Tilda", "Eliot", "Merle", "Linwood", "Kathryn", "Silas", "Alvina",
-            "Phinehas", "Janis", "Alvena", "Zubin", "Gwendolen", "Caitlyn", "Bertram", "Hailee", "Idelle", "Homer",
-            "Jannah", "Delbert", "Rhianna", "Cy", "Jefferson", "Wayland", "Nona", "Tempest", "Reed", "Jenifer",
-            "Ellery", "Nicolina", "Aldous", "Prince", "Lexia", "Vinnie", "Doug", "Alberic", "Kayleen", "Woody",
-            "Rosanne", "Ysabel", "Skyler", "Twyla", "Geordie", "Leta", "Clive", "Aaron", "Scottie", "Celeste", "Chuck",
-            "Erle", "Lallie", "Jaycob", "Ray", "Carrie", "Laurita", "Noreen", "Meaghan", "Ulysses", "Andy", "Drogo",
-            "Dina", "Yasmin", "Mya", "Luvenia", "Urban", "Jacob", "Laetitia", "Sherry", "Love", "Michaela", "Deonne",
-            "Summer", "Brendon", "Sheena", "Mason", "Jayson", "Linden", "Salal", "Darrell", "Diana", "Hudson",
-            "Lennon", "Isador", "Charley", "April", "Ralph", "James", "Mina", "Jolyon", "Laurine", "Monna", "Carita",
-            "Munro", "Elsdon", "Everette", "Radclyffe", "Darrin", "Herbert", "Gawain", "Sheree", "Trudy", "Emmaline",
-            "Kassandra", "Rebecca", "Basil", "Jen", "Don", "Osborne", "Lilith", "Hannah", "Fox", "Rupert", "Paulene",
-            "Darius", "Wally", "Baptist", "Sapphire", "Tia", "Sondra", "Kylee", "Ashton", "Jepson", "Joetta", "Val",
-            "Adela", "Zacharias", "Zola", "Marmaduke", "Shannah", "Posie", "Oralie", "Brittany", "Ernesta", "Raymund",
-            "Denzil", "Daren", "Roosevelt", "Nelson", "Fortune", "Mariel", "Nick", "Jaden", "Upton", "Oz", "Margaux",
-            "Precious", "Albert", "Bridger", "Jimmy", "Nicola", "Rosalynne", "Keith", "Walt", "Della", "Joanna",
-            "Xenia", "Esmeralda", "Major", "Simon", "Rexana", "Stacy", "Calanthe", "Sherley", "Kaitlyn", "Graham",
-            "Ramsey", "Abbey", "Madlyn", "Kelvin", "Bill", "Rue", "Monica", "Caileigh", "Laraine", "Booker", "Jayna",
-            "Greta", "Jervis", "Sherman", "Kendrick", "Tommy", "Iris", "Geffrey", "Kaelea", "Kerr", "Garrick", "Jep",
-            "Audley", "Nic", "Bronte", "Beulah", "Patricia", "Jewell", "Deidra", "Cory", "Everett", "Harper",
-            "Charity", "Godfrey", "Jaime", "Sinclair", "Talbot", "Dayna", "Cooper", "Rosaline", "Jennie", "Eileen",
-            "Latanya", "Corinna", "Roxie", "Caesar", "Charles", "Pollie", "Lindsey", "Sorrel", "Dwight", "Jocelyn",
-            "Weston", "Shyla", "Valorie", "Bessie", "Josh", "Lessie", "Dayton", "Kathi", "Chasity", "Wilton", "Adam",
-            "William", "Ash", "Angela", "Ivor", "Ria", "Jazmine", "Hailey", "Jo", "Silvestra", "Ernie", "Clifford",
-            "Levi", "Matilda", "Quincey", "Camilla", "Delicia", "Phemie", "Laurena", "Bambi", "Lourdes", "Royston",
-            "Chastity", "Lynwood", "Elle", "Brenda", "Phoebe", "Timothy", "Raschelle", "Lilly", "Burt", "Rina",
-            "Rodney", "Maris", "Jaron", "Wilf", "Harlan", "Audra", "Vincent", "Elwyn", "Drew", "Wynter", "Ora",
-            "Lissa", "Virgil", "Xavier", "Chad", "Ollie", "Leyton", "Karolyn", "Skye", "Roni", "Gladys", "Dinah",
-            "Penny", "August", "Osmund", "Whitaker", "Brande", "Cornell", "Phil", "Zara", "Kilie", "Gavin", "Coty",
-            "Randy", "Teri", "Keira", "Pru", "Clemency", "Kelcey", "Nevil", "Poppy", "Gareth", "Christabel", "Bastian",
-            "Wynonna", "Roselyn", "Goddard", "Collin", "Trace", "Neal", "Effie", "Denys", "Virginia", "Richard",
-            "Isiah", "Harrietta", "Gaylord", "Diamond", "Trudi", "Elaine", "Jemmy", "Gage", "Annabel", "Quincy", "Syd",
-            "Marianna", "Philomena", "Aubree", "Kathie", "Jacki", "Kelley", "Bess", "Cecil", "Maryvonne", "Kassidy",
-            "Anselm", "Dona", "Darby", "Jamison", "Daryl", "Darell", "Teal", "Lennie", "Bartholomew", "Katie",
-            "Maybelline", "Kimball", "Elvis", "Les", "Flick", "Harley", "Beth", "Bidelia", "Montague", "Helen", "Ozzy",
-            "Stef", "Debra", "Maxene", "Stefanie", "Russ", "Avril", "Johnathan", "Orson", "Chelsey", "Josephine",
-            "Deshaun", "Wendell", "Lula", "Ferdinanda", "Greg", "Brad", "Kynaston", "Dena", "Russel", "Robertina",
-            "Misti", "Leon", "Anjelica", "Bryana", "Myles", "Judi", "Curtis", "Davin", "Kristia", "Chrysanta",
-            "Hayleigh", "Hector", "Osbert", "Eustace", "Cary", "Tansy", "Cayley", "Maryann", "Alissa", "Ike",
-            "Tranter", "Reina", "Alwilda", "Sidony", "Columbine", "Astra", "Jillie", "Stephania", "Jonah", "Kennedy",
-            "Ferdinand", "Allegria", "Donella", "Kelleigh", "Darian", "Eldreda", "Jayden", "Herbie", "Jake", "Winston",
-            "Vi", "Annie", "Cherice", "Hugo", "Tricia", "Haydee", "Cassarah", "Darden", "Mallory", "Alton", "Hadley",
-            "Romayne", "Lacey", "Ern", "Alayna", "Cecilia", "Seward", "Tilly", "Edgar", "Concordia", "Ibbie", "Dahlia",
-            "Oswin", "Stu", "Brett", "Maralyn", "Kristeen", "Dotty", "Robyn", "Nessa", "Tresha", "Guinevere",
-            "Emerson", "Haze", "Lyn", "Henderson", "Lexa", "Jaylen", "Gail", "Lizette", "Tiara", "Robbie", "Destiny",
-            "Alice", "Livia", "Rosy", "Leah", "Jan", "Zach", "Vita", "Gia", "Micheal", "Rowina", "Alysha", "Bobbi",
-            "Delores", "Osmond", "Karaugh", "Wilbur", "Kasandra", "Renae", "Kaety", "Dora", "Gaye", "Amaryllis",
-            "Katelyn", "Dacre", "Prudence", "Ebony", "Camron", "Jerrold", "Vivyan", "Randall", "Donna", "Misty",
-            "Damon", "Selby", "Esmund", "Rian", "Garry", "Julius", "Raelene", "Clement", "Dom", "Tibby", "Moss",
-            "Millicent", "Gwendoline", "Berry", "Ashleigh", "Lilac", "Quin", "Vere", "Creighton", "Harriet", "Malvina",
-            "Lianne", "Pearle", "Kizzie", "Kara", "Petula", "Jeanie", "Maria", "Pacey", "Victoria", "Huey", "Toni",
-            "Rose", "Wallis", "Diggory", "Josiah", "Delma", "Keysha", "Channing", "Prue", "Lee", "Ryan", "Sidney",
-            "Valerie", "Clancy", "Ezra", "Gilbert", "Clare", "Laz", "Crofton", "Mike", "Annabella", "Tara", "Eldred",
-            "Arthur", "Jaylon", "Peronel", "Paden", "Dot", "Marian", "Amyas", "Alexus", "Esmond", "Abbie", "Stanley",
-            "Brittani", "Vickie", "Errol", "Kimberlee", "Uland", "Ebenezer", "Howie", "Eveline", "Andrea", "Trish",
-            "Hopkin", "Bryanna", "Temperance", "Valarie", "Femie", "Alix", "Terrell", "Lewin", "Lorrin", "Happy",
-            "Micah", "Rachyl", "Sloan", "Gertrude", "Elizabeth", "Dorris", "Andra", "Bram", "Gary", "Jeannine",
-            "Maurene", "Irene", "Yolonda", "Jonty", "Coleen", "Cecelia", "Chantal", "Stuart", "Caris", "Ros",
-            "Kaleigh", "Mirabelle", "Kolby", "Primrose", "Susannah", "Ginny", "Jinny", "Dolly", "Lettice", "Sonny",
-            "Melva", "Ernest", "Garret", "Reagan", "Trenton", "Gallagher", "Edwin", "Nikolas", "Corrie", "Lynette",
-            "Ettie", "Sly", "Debbi", "Eudora", "Brittney", "Tacey", "Marius", "Anima", "Gordon", "Olivia", "Kortney",
-            "Shantel", "Kolleen", "Nevaeh", "Buck", "Sera", "Liliana", "Aric", "Kalyn", "Mick", "Libby", "Ingram",
-            "Alexandria", "Darleen", "Jacklyn", "Hughie", "Tyler", "Aida", "Ronda", "Deemer", "Taryn", "Laureen",
-            "Samantha", "Dave", "Hardy", "Baldric", "Montgomery", "Gus", "Ellis", "Titania", "Luke", "Chase", "Haidee",
-            "Mayra", "Isabell", "Trinity", "Milo", "Abigail", "Tacita", "Meg", "Hervey", "Natasha", "Sadie", "Holden",
-            "Dee", "Mansel", "Perry", "Randi", "Frederica", "Georgina", "Kolour", "Debbie", "Seraphina", "Elspet",
-            "Julyan", "Raven", "Zavia", "Jarvis", "Jaymes", "Grover", "Cairo", "Alea", "Jordon", "Braxton", "Donny",
-            "Rhoda", "Tonya", "Bee", "Alyssia", "Ashlyn", "Reanna", "Lonny", "Arlene", "Deb", "Jane", "Nikole",
-            "Bettina", "Harrison", "Tamzen", "Arielle", "Adelaide", "Faith", "Bridie", "Wilburn", "Fern", "Nan",
-            "Shaw", "Zeke", "Alan", "Dene", "Gina", "Alexa", "Bailey", "Sal", "Tammy", "Maximillian", "America",
-            "Sylvana", "Fitz", "Mo", "Marissa", "Cass", "Eldon", "Wilfrid", "Tel", "Joann", "Kendra", "Tolly",
-            "Leanne", "Ferdie", "Haven", "Lucas", "Marlee", "Cyrilla", "Red", "Phoenix", "Jazmin", "Carin", "Gena",
-            "Lashonda", "Tucker", "Genette", "Kizzy", "Winifred", "Melody", "Keely", "Kaylyn", "Radcliff", "Lettie",
-            "Foster", "Lyndsey", "Nicholas", "Farley", "Louisa", "Dana", "Dortha", "Francine", "Doran", "Bonita",
-            "Hal", "Sawyer", "Reginald", "Aislin", "Nathan", "Baylee", "Abilene", "Ladonna", "Maurine", "Shelly",
-            "Deandre", "Jasmin", "Roderic", "Tiffany", "Amanda", "Verity", "Wilford", "Gayelord", "Whitney", "Demelza",
-            "Kenton", "Alberta", "Kyra", "Tabitha", "Sampson", "Korey", "Lillian", "Edison", "Clayton", "Steph",
-            "Maya", "Dusty", "Jim", "Ronny", "Adrianne", "Bernard", "Harris", "Kiley", "Alexander", "Kisha", "Ethalyn",
-            "Patience", "Briony", "Indigo", "Aureole", "Makenzie", "Molly", "Sherilyn", "Barry", "Laverne", "Hunter",
-            "Rocky", "Tyreek", "Madalyn", "Phyliss", "Chet", "Beatrice", "Faye", "Lavina", "Madelyn", "Tracey",
-            "Gyles", "Patti", "Carlyn", "Stephanie", "Jackalyn", "Larrie", "Kimmy", "Isolda", "Emelina", "Lis",
-            "Zillah", "Cody", "Sheard", "Rufus", "Paget", "Mae", "Rexanne", "Luvinia", "Tamsen", "Rosanna", "Greig",
-            "Stacia", "Mabelle", "Quianna", "Lotus", "Delice", "Bradford", "Angus", "Cosmo", "Earlene", "Adrian",
-            "Arlie", "Noelle", "Sabella", "Isa", "Adelle", "Innocent", "Kirby", "Trixie", "Kenelm", "Nelda", "Melia",
-            "Kendal", "Dorinda", "Placid", "Linette", "Kam", "Sherisse", "Evan", "Ewart", "Janice", "Linton",
-            "Jacaline", "Charissa", "Douglas", "Aileen", "Kemp", "Oli", "Amethyst", "Rosie", "Nigella", "Sherill",
-            "Anderson", "Alanna", "Eric", "Claudia", "Jennifer", "Boniface", "Harriet", "Vernon", "Lucy", "Shawnee",
-            "Gerard", "Cecily", "Romey", "Randall", "Wade", "Lux", "Dawson", "Gregg", "Kade", "Roxanne", "Melinda",
-            "Rolland", "Rowanne", "Fannie", "Isidore", "Melia", "Harvie", "Salal", "Eleonor", "Jacquette", "Lavone",
-            "Shanika", "Tarquin", "Janet", "Josslyn", "Maegan", "Augusta", "Aubree", "Francene", "Martie", "Marisa",
-            "Tyreek", "Tatianna", "Caleb", "Sheridan", "Nellie", "Barbara", "Wat", "Jayla", "Esmaralda", "Graeme",
-            "Lavena", "Jemima", "Nikolas", "Triston", "Portia", "Kyla", "Marcus", "Raeburn", "Jamison", "Earl", "Wren",
-            "Leighton", "Lagina", "Lucasta", "Dina", "Amaranta", "Jessika", "Claud", "Bernard", "Winifred", "Ebba",
-            "Sammi", "Gall", "Chloe", "Ottoline", "Herbert", "Janice", "Gareth", "Channing", "Caleigh", "Kailee",
-            "Ralphie", "Tamzen", "Quincy", "Beaumont", "Albert", "Jadyn", "Violet", "Luanna", "Moriah", "Humbert",
-            "Jed", "Leona", "Hale", "Mitch", "Marlin", "Nivek", "Darwin", "Dirk", "Liliana", "Meadow", "Bernadine",
-            "Jorie", "Peyton", "Astra", "Roscoe", "Gina", "Lovell", "Jewel", "Romayne", "Rosy", "Imogene",
-            "Margaretta", "Lorinda", "Hopkin", "Bobby", "Flossie", "Bennie", "Horatio", "Jonah", "Lyn", "Deana",
-            "Juliana", "Blanch", "Wright", "Kendal", "Woodrow", "Tania", "Austyn", "Val", "Mona", "Charla", "Rudyard",
-            "Pamela", "Raven", "Zena", "Nicola", "Kaelea", "Conor", "Virgil", "Sonnie", "Goodwin", "Christianne",
-            "Linford", "Myron", "Denton", "Charita", "Brody", "Ginnie", "Harrison", "Jeanine", "Quin", "Isolda",
-            "Zoie", "Pearce", "Margie", "Larrie", "Angelina", "Marcia", "Jessamine", "Delilah", "Dick", "Luana",
-            "Delicia", "Lake", "Luvenia", "Vaughan", "Concordia", "Gayelord", "Cheyenne", "Felix", "Dorris", "Pen",
-            "Kristeen", "Parris", "Everitt", "Josephina", "Amy", "Tommie", "Adrian", "April", "Rosaline", "Zachery",
-            "Trace", "Phoebe", "Jenelle", "Kameron", "Katharine", "Media", "Colton", "Tad", "Quianna", "Kerenza",
-            "Greta", "Luvinia", "Pete", "Tonya", "Beckah", "Barbra", "Jon", "Tetty", "Corey", "Sylvana", "Kizzy",
-            "Korey", "Trey", "Haydee", "Penny", "Mandy", "Panda", "Coline", "Ramsey", "Sukie", "Annabel", "Sarina",
-            "Corbin", "Suzanna", "Rob", "Duana", "Shell", "Jason", "Eddy", "Rube", "Roseann", "Celia", "Brianne",
-            "Nerissa", "Jera", "Humphry", "Ashlynn", "Terrence", "Philippina", "Coreen", "Kolour", "Indiana", "Paget",
-            "Marlyn", "Hester", "Isbel", "Ocean", "Harris", "Leslie", "Vere", "Monroe", "Isabelle", "Bertie", "Clitus",
-            "Dave", "Alethea", "Lessie", "Louiza", "Madlyn", "Garland", "Wolf", "Lalo", "Donny", "Amabel", "Tianna",
-            "Louie", "Susie", "Mackenzie", "Renie", "Tess", "Marmaduke", "Gwendolen", "Bettina", "Beatrix", "Esmund",
-            "Minnie", "Carlie", "Barnabas", "Ruthie", "Honour", "Haylie", "Xavior", "Freddie", "Ericka", "Aretha",
-            "Edie", "Madelina", "Anson", "Tabby", "Derrick", "Jocosa", "Deirdre", "Aislin", "Chastity", "Abigail",
-            "Wynonna", "Zo", "Eldon", "Krystine", "Ghislaine", "Zavia", "Nolene", "Marigold", "Kelley", "Sylvester",
-            "Odell", "George", "Laurene", "Franklyn", "Clarice", "Mo", "Dustin", "Debbi", "Lina", "Tony", "Acacia",
-            "Hettie", "Natalee", "Marcie", "Brittany", "Elnora", "Rachel", "Dawn", "Basil", "Christal", "Anjelica",
-            "Fran", "Tawny", "Delroy", "Tameka", "Lillie", "Ceara", "Deanna", "Deshaun", "Ken", "Bradford", "Justina",
-            "Merle", "Draven", "Gretta", "Harriette", "Webster", "Nathaniel", "Anemone", "Coleen", "Ruth", "Chryssa",
-            "Hortensia", "Saffie", "Deonne", "Leopold", "Harlan", "Lea", "Eppie", "Lucinda", "Tilda", "Fanny", "Titty",
-            "Lockie", "Jepson", "Sherisse", "Maralyn", "Ethel", "Sly", "Ebenezer", "Canute", "Ella", "Freeman",
-            "Reuben", "Olivette", "Nona", "Rik", "Amice", "Kristine", "Kathie", "Jayne", "Jeri", "Mckenna", "Bertram",
-            "Kaylee", "Livia", "Gil", "Wallace", "Maryann", "Keeleigh", "Laurinda", "Doran", "Khloe", "Dakota",
-            "Yaron", "Kimberleigh", "Gytha", "Doris", "Marylyn", "Benton", "Linnette", "Esther", "Jakki", "Rowina",
-            "Marian", "Roselyn", "Norbert", "Maggie", "Caesar", "Phinehas", "Jerry", "Jasmine", "Antonette", "Miriam",
-            "Monna", "Maryvonne", "Jacquetta", "Bernetta", "Napier", "Annie", "Gladwin", "Sheldon", "Aric", "Elouise",
-            "Gawain", "Kristia", "Gabe", "Kyra", "Red", "Tod", "Dudley", "Lorraine", "Ryley", "Sabina", "Poppy",
-            "Leland", "Aileen", "Eglantine", "Alicia", "Jeni", "Addy", "Tiffany", "Geffrey", "Lavina", "Collin",
-            "Clover", "Vin", "Jerome", "Doug", "Vincent", "Florence", "Scarlet", "Celeste", "Desdemona", "Tiphanie",
-            "Kassandra", "Ashton", "Madison", "Art", "Magdalene", "Iona", "Josepha", "Anise", "Ferne", "Derek",
-            "Huffie", "Qiana", "Ysabel", "Tami", "Shannah", "Xavier", "Willard", "Winthrop", "Vickie", "Maura",
-            "Placid", "Tiara", "Reggie", "Elissa", "Isa", "Chrysanta", "Jeff", "Bessie", "Terri", "Amilia", "Brett",
-            "Daniella", "Damion", "Carolina", "Maximillian", "Travers", "Benjamin", "Oprah", "Darcy", "Yolanda",
-            "Nicolina", "Crofton", "Jarrett", "Kaitlin", "Shauna", "Keren", "Bevis", "Kalysta", "Sharron", "Alyssa",
-            "Blythe", "Zelma", "Caelie", "Norwood", "Billie", "Patrick", "Gary", "Cambria", "Tylar", "Mason", "Helen",
-            "Melyssa", "Gene", "Gilberta", "Carter", "Herbie", "Harmonie", "Leola", "Eugenia", "Clint", "Pauletta",
-            "Edwyna", "Georgina", "Teal", "Harper", "Izzy", "Dillon", "Kezia", "Evangeline", "Colene", "Madelaine",
-            "Zilla", "Rudy", "Dottie", "Caris", "Morton", "Marge", "Tacey", "Parker", "Troy", "Liza", "Lewin",
-            "Tracie", "Justine", "Dallas", "Linden", "Ray", "Loretta", "Teri", "Elvis", "Diane", "Julianna", "Manfred",
-            "Denise", "Eireen", "Ann", "Kenith", "Linwood", "Kathlyn", "Bernice", "Shelley", "Oswald", "Amedeus",
-            "Homer", "Tanzi", "Ted", "Ralphina", "Hyacinth", "Lotus", "Matthias", "Arlette", "Clark", "Cecil",
-            "Elspeth", "Alvena", "Noah", "Millard", "Brenden", "Cole", "Philipa", "Nina", "Thelma", "Iantha", "Reid",
-            "Jefferson", "Meg", "Elsie", "Shirlee", "Nathan", "Nancy", "Simona", "Racheal", "Carin", "Emory", "Delice",
-            "Kristi", "Karaugh", "Kaety", "Tilly", "Em", "Alanis", "Darrin", "Jerrie", "Hollis", "Cary", "Marly",
-            "Carita", "Jody", "Farley", "Hervey", "Rosalin", "Cuthbert", "Stewart", "Jodene", "Caileigh", "Briscoe",
-            "Dolores", "Sheree", "Eustace", "Nigel", "Detta", "Barret", "Rowland", "Kenny", "Githa", "Zoey", "Adela",
-            "Petronella", "Opal", "Coleman", "Niles", "Cyril", "Dona", "Alberic", "Allannah", "Jules", "Avalon",
-            "Hadley", "Thomas", "Renita", "Calanthe", "Heron", "Shawnda", "Chet", "Malina", "Manny", "Rina", "Frieda",
-            "Eveleen", "Deshawn", "Amos", "Raelene", "Paige", "Molly", "Nannie", "Ileen", "Brendon", "Milford",
-            "Unice", "Rebeccah", "Caedmon", "Gae", "Doreen", "Vivian", "Louis", "Raphael", "Vergil", "Lise", "Glenn",
-            "Karyn", "Terance", "Reina", "Jake", "Gordon", "Wisdom", "Isiah", "Gervase", "Fern", "Marylou", "Roddy",
-            "Justy", "Derick", "Shantelle", "Adam", "Chantel", "Madoline", "Emmerson", "Lexie", "Mickey", "Stephen",
-            "Dane", "Stacee", "Elwin", "Tracey", "Alexandra", "Ricky", "Ian", "Kasey", "Rita", "Alanna", "Georgene",
-            "Deon", "Zavier", "Ophelia", "Deforest", "Lowell", "Zubin", "Hardy", "Osmund", "Tabatha", "Debby",
-            "Katlyn", "Tallulah", "Priscilla", "Braden", "Wil", "Keziah", "Jen", "Aggie", "Korbin", "Lemoine",
-            "Barnaby", "Tranter", "Goldie", "Roderick", "Trina", "Emery", "Pris", "Sidony", "Adelle", "Tate", "Wilf",
-            "Zola", "Brande", "Chris", "Calanthia", "Lilly", "Kaycee", "Lashonda", "Jasmin", "Elijah", "Shantel",
-            "Simon", "Rosalind", "Jarod", "Kaylie", "Corrine", "Joselyn", "Archibald", "Mariabella", "Winton",
-            "Merlin", "Chad", "Ursula", "Kristopher", "Hewie", "Adrianna", "Lyndsay", "Jasmyn", "Tim", "Evette",
-            "Margaret", "Samson", "Bronte", "Terence", "Leila", "Candice", "Tori", "Jamey", "Coriander", "Conrad",
-            "Floyd", "Karen", "Lorin", "Maximilian", "Cairo", "Emily", "Yasmin", "Karolyn", "Bryan", "Lanny",
-            "Kimberly", "Rick", "Chaz", "Krystle", "Lyric", "Laura", "Garrick", "Flip", "Monty", "Brendan",
-            "Ermintrude", "Rayner", "Merla", "Titus", "Marva", "Patricia", "Leone", "Tracy", "Jaqueline", "Hallam",
-            "Delores", "Cressida", "Carlyle", "Leann", "Kelcey", "Laurence", "Ryan", "Reynold", "Mark", "Collyn",
-            "Audie", "Sammy", "Ellery", "Sallie", "Pamelia", "Adolph", "Lydia", "Titania", "Ron", "Bridger", "Aline",
-            "Read", "Kelleigh", "Weldon", "Irving", "Garey", "Diggory", "Evander", "Kylee", "Deidre", "Ormond",
-            "Laurine", "Reannon", "Arline", "Pat"
-
-    };
-
-    public static String[] jargon = { "wireless", "signal", "network", "3G", "plan", "touch-screen",
-            "customer-service", "reachability", "voice-command", "shortcut-menu", "customization", "platform", "speed",
-            "voice-clarity", "voicemail-service" };
-
-    public static String[] vendors = { "at&t", "verizon", "t-mobile", "sprint", "motorola", "samsung", "iphone" };
-
-    public static String[] org_list = { "Latsonity", "ganjalax", "Zuncan", "Lexitechno", "Hot-tech", "subtam",
-            "Coneflex", "Ganjatax", "physcane", "Tranzap", "Qvohouse", "Zununoing", "jaydax", "Keytech", "goldendexon",
-            "Villa-tech", "Trustbam", "Newcom", "Voltlane", "Ontohothex", "Ranhotfan", "Alphadax", "Transhigh",
-            "kin-ron", "Doublezone", "Solophase", "Vivaace", "silfind", "Basecone", "sonstreet", "Freshfix",
-            "Techitechi", "Kanelectrics", "linedexon", "Goldcity", "Newfase", "Technohow", "Zimcone", "Salthex",
-            "U-ron", "Solfix", "whitestreet", "Xx-technology", "Hexviafind", "over-it", "Strongtone", "Tripplelane",
-            "geomedia", "Scotcity", "Inchex", "Vaiatech", "Striptaxon", "Hatcom", "tresline", "Sanjodax", "freshdox",
-            "Sumlane", "Quadlane", "Newphase", "overtech", "Voltbam", "Icerunin", "Fixdintex", "Hexsanhex", "Statcode",
-            "Greencare", "U-electrics", "Zamcorporation", "Ontotanin", "Tanzimcare", "Groovetex", "Ganjastrip",
-            "Redelectronics", "Dandamace", "Whitemedia", "strongex", "Streettax", "highfax", "Mathtech", "Xx-drill",
-            "Sublamdox", "Unijobam", "Rungozoom", "Fixelectrics", "Villa-dox", "Ransaofan", "Plexlane", "itlab",
-            "Lexicone", "Fax-fax", "Viatechi", "Inchdox", "Kongreen", "Doncare", "Y-geohex", "Opeelectronics",
-            "Medflex", "Dancode", "Roundhex", "Labzatron", "Newhotplus", "Sancone", "Ronholdings", "Quoline",
-            "zoomplus", "Fix-touch", "Codetechno", "Tanzumbam", "Indiex", "Canline" };
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/ExternalFilesIndexOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/ExternalFilesIndexOperatorDescriptor.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/ExternalFilesIndexOperatorDescriptor.java
deleted file mode 100644
index bc73eba..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/ExternalFilesIndexOperatorDescriptor.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.FilesIndexDescription;
-import org.apache.asterix.external.indexing.dataflow.FileIndexTupleTranslator;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.btree.impls.BTree;
-import org.apache.hyracks.storage.am.common.api.IIndex;
-import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
-import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.IndexException;
-import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
-import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTree;
-import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTree.LSMTwoPCBTreeBulkLoader;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider;
-
-/**
- * This operator is intended solely for external dataset files replicated index.
- * It either create and bulkload when used for a new index
- * or bulkmodify the index creating a hidden transaction component which later might be committed or deleted by another operator
- *
- * @author alamouda
- */
-public class ExternalFilesIndexOperatorDescriptor extends AbstractTreeIndexOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-    private boolean createNewIndex;
-    private List<ExternalFile> files;
-
-    public ExternalFilesIndexOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
-            IFileSplitProvider fileSplitProvider, IIndexDataflowHelperFactory dataflowHelperFactory,
-            ILocalResourceFactoryProvider localResourceFactoryProvider, List<ExternalFile> files, boolean createNewIndex) {
-        super(spec, 0, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider,
-                new FilesIndexDescription().EXTERNAL_FILE_INDEX_TYPE_TRAITS,
-                new FilesIndexDescription().FILES_INDEX_COMP_FACTORIES, FilesIndexDescription.BLOOM_FILTER_FIELDS,
-                dataflowHelperFactory, null, false, false, null, localResourceFactoryProvider,
-                NoOpOperationCallbackFactory.INSTANCE, NoOpOperationCallbackFactory.INSTANCE);
-        this.createNewIndex = createNewIndex;
-        this.files = files;
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
-        final IIndexDataflowHelper indexHelper = getIndexDataflowHelperFactory().createIndexDataflowHelper(this, ctx,
-                partition);
-        return new AbstractOperatorNodePushable() {
-
-            @SuppressWarnings("incomplete-switch")
-            @Override
-            public void initialize() throws HyracksDataException {
-                FileIndexTupleTranslator filesTupleTranslator = new FileIndexTupleTranslator();
-                if (createNewIndex) {
-                    // Create
-                    indexHelper.create();
-                    // Open and get
-                    indexHelper.open();
-                    try {
-                        IIndex index = indexHelper.getIndexInstance();
-                        // Create bulk loader
-
-                        IIndexBulkLoader bulkLoader = index.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false,
-                                files.size(), false);
-                        // Load files
-                        for (ExternalFile file : files) {
-                            bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
-                        }
-                        bulkLoader.end();
-                    } catch (IndexException | IOException | AsterixException e) {
-                        throw new HyracksDataException(e);
-                    } finally {
-                        indexHelper.close();
-                    }
-                } else {
-                    ///////// Bulk modify //////////
-                    // Open and get
-                    indexHelper.open();
-                    IIndex index = indexHelper.getIndexInstance();
-                    LSMTwoPCBTreeBulkLoader bulkLoader = null;
-                    try {
-                        bulkLoader = (LSMTwoPCBTreeBulkLoader) ((ExternalBTree) index).createTransactionBulkLoader(
-                                BTree.DEFAULT_FILL_FACTOR, false, files.size(), false);
-                        // Load files
-                        // The files must be ordered according to their numbers
-                        for (ExternalFile file : files) {
-                            switch (file.getPendingOp()) {
-                                case PENDING_ADD_OP:
-                                case PENDING_APPEND_OP:
-                                    bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
-                                    break;
-                                case PENDING_DROP_OP:
-                                    bulkLoader.delete(filesTupleTranslator.getTupleFromFile(file));
-                                    break;
-                            }
-                        }
-                        bulkLoader.end();
-                    } catch (IndexException | IOException | AsterixException e) {
-                        if (bulkLoader != null) {
-                            bulkLoader.abort();
-                        }
-                        throw new HyracksDataException(e);
-                    } finally {
-                        indexHelper.close();
-                    }
-                }
-            }
-
-            @Override
-            public void deinitialize() throws HyracksDataException {
-            }
-
-            @Override
-            public int getInputArity() {
-                return 0;
-            }
-
-            @Override
-            public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc)
-                    throws HyracksDataException {
-            }
-
-            @Override
-            public IFrameWriter getInputFrameWriter(int index) {
-                return null;
-            }
-
-        };
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GULongIDGenerator.java
----------------------------------------------------------------------
diff --git a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GULongIDGenerator.java b/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GULongIDGenerator.java
deleted file mode 100644
index 00f3bcb..0000000
--- a/asterix-tools/src/main/java/org/apache/asterix/tools/external/data/GULongIDGenerator.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.tools.external.data;
-
-import java.nio.ByteBuffer;
-import java.util.concurrent.atomic.AtomicLong;
-
-public class GULongIDGenerator {
-
-    private final int partition;
-    private final long baseValue;
-    private final AtomicLong nextValue;
-
-    public GULongIDGenerator(int partition, byte seed) {
-        this.partition = partition;
-        ByteBuffer buffer = ByteBuffer.allocate(8);
-        buffer.put(seed);
-        buffer.put((byte) partition);
-        buffer.putInt(0);
-        buffer.putShort((short) 0);
-        buffer.flip();
-        this.baseValue = new Long(buffer.getLong());
-        this.nextValue = new AtomicLong(baseValue);
-    }
-
-    public long getNextULong() {
-        return nextValue.incrementAndGet();
-    }
-
-    public int getPartition() {
-        return partition;
-    }
-
-}


[14/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/HDFSSeekableLineReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/HDFSSeekableLineReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/HDFSSeekableLineReader.java
deleted file mode 100644
index 7916a16..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/HDFSSeekableLineReader.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that provides a line reader from an input stream which also allows performing seek operations
- */
-public class HDFSSeekableLineReader {
-    private static final int DEFAULT_BUFFER_SIZE = 32 * 1024;
-    private int bufferSize = DEFAULT_BUFFER_SIZE;
-    private FSDataInputStream reader;
-
-    private byte[] buffer;
-    // the number of bytes of real data in the buffer
-    private int bufferLength = 0;
-    // the current position in the buffer
-    private int bufferPosn = 0;
-
-    private long currentFilePos = 0L;
-
-    private static final byte CR = '\r';
-    private static final byte LF = '\n';
-
-    public static final String KEY_BUFFER_SIZE = "io.file.buffer.size";
-
-    /**
-     * Create a line reader that reads from the given stream using the
-     * default buffer-size (32k).
-     * 
-     * @param in
-     *            The input stream
-     * @throws IOException
-     */
-    public HDFSSeekableLineReader(FSDataInputStream in) throws IOException {
-        this(in, DEFAULT_BUFFER_SIZE);
-    }
-
-    /**
-     * Create a line reader that reads from the given stream using the
-     * given buffer-size.
-     * 
-     * @param in
-     *            The input stream
-     * @param bufferSize
-     *            Size of the read buffer
-     * @throws IOException
-     */
-    public HDFSSeekableLineReader(FSDataInputStream in, int bufferSize) throws IOException {
-        this.reader = in;
-        this.bufferSize = bufferSize;
-        this.buffer = new byte[this.bufferSize];
-        currentFilePos = in.getPos();
-    }
-
-    public HDFSSeekableLineReader() throws IOException {
-        this.bufferSize = DEFAULT_BUFFER_SIZE;
-        this.buffer = new byte[this.bufferSize];
-    }
-
-    /**
-     * Create a line reader that reads from the given stream using the <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
-     * 
-     * @param in
-     *            input stream
-     * @param conf
-     *            configuration
-     * @throws IOException
-     */
-    public HDFSSeekableLineReader(FSDataInputStream in, Configuration conf) throws IOException {
-        this(in, conf.getInt(KEY_BUFFER_SIZE, DEFAULT_BUFFER_SIZE));
-    }
-
-    /**
-     * Read one line from the InputStream into the given Text. A line
-     * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
-     * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated
-     * line.
-     *
-     * @param str
-     *            the object to store the given line (without newline)
-     * @param maxLineLength
-     *            the maximum number of bytes to store into str;
-     *            the rest of the line is silently discarded.
-     * @param maxBytesToConsume
-     *            the maximum number of bytes to consume
-     *            in this call. This is only a hint, because if the line cross
-     *            this threshold, we allow it to happen. It can overshoot
-     *            potentially by as much as one buffer length.
-     * @return the number of bytes read including the (longest) newline
-     *         found.
-     * @throws IOException
-     *             if the underlying stream throws
-     */
-    public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
-        /* We're reading data from in, but the head of the stream may be
-         * already buffered in buffer, so we have several cases:
-         * 1. No newline characters are in the buffer, so we need to copy
-         *    everything and read another buffer from the stream.
-         * 2. An unambiguously terminated line is in buffer, so we just
-         *    copy to str.
-         * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
-         *    in CR.  In this case we copy everything up to CR to str, but
-         *    we also need to see what follows CR: if it's LF, then we
-         *    need consume LF as well, so next call to readLine will read
-         *    from after that.
-         * We use a flag prevCharCR to signal if previous character was CR
-         * and, if it happens to be at the end of the buffer, delay
-         * consuming it until we have a chance to look at the char that
-         * follows.
-         */
-        str.clear();
-        int txtLength = 0; //tracks str.getLength(), as an optimization
-        int newlineLength = 0; //length of terminating newline
-        boolean prevCharCR = false; //true of prev char was CR
-        long bytesConsumed = 0;
-        do {
-            int startPosn = bufferPosn; //starting from where we left off the last time
-            if (bufferPosn >= bufferLength) {
-                startPosn = bufferPosn = 0;
-                if (prevCharCR)
-                    ++bytesConsumed; //account for CR from previous read
-                bufferLength = reader.read(buffer);
-                if (bufferLength <= 0)
-                    break; // EOF
-            }
-            for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
-                if (buffer[bufferPosn] == LF) {
-                    newlineLength = (prevCharCR) ? 2 : 1;
-                    ++bufferPosn; // at next invocation proceed from following byte
-                    break;
-                }
-                if (prevCharCR) { //CR + notLF, we are at notLF
-                    newlineLength = 1;
-                    break;
-                }
-                prevCharCR = (buffer[bufferPosn] == CR);
-            }
-            int readLength = bufferPosn - startPosn;
-            if (prevCharCR && newlineLength == 0)
-                --readLength; //CR at the end of the buffer
-            bytesConsumed += readLength;
-            int appendLength = readLength - newlineLength;
-            if (appendLength > maxLineLength - txtLength) {
-                appendLength = maxLineLength - txtLength;
-            }
-            if (appendLength > 0) {
-                str.append(buffer, startPosn, appendLength);
-                txtLength += appendLength;
-            }
-        } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
-
-        if (bytesConsumed > (long) Integer.MAX_VALUE)
-            throw new IOException("Too many bytes before newline: " + bytesConsumed);
-        currentFilePos = reader.getPos() - bufferLength + bufferPosn;
-        return (int) bytesConsumed;
-    }
-
-    /**
-     * Read from the InputStream into the given Text.
-     * 
-     * @param str
-     *            the object to store the given line
-     * @param maxLineLength
-     *            the maximum number of bytes to store into str.
-     * @return the number of bytes read including the newline
-     * @throws IOException
-     *             if the underlying stream throws
-     */
-    public int readLine(Text str, int maxLineLength) throws IOException {
-        return readLine(str, maxLineLength, Integer.MAX_VALUE);
-    }
-
-    /**
-     * Read from the InputStream into the given Text.
-     * 
-     * @param str
-     *            the object to store the given line
-     * @return the number of bytes read including the newline
-     * @throws IOException
-     *             if the underlying stream throws
-     */
-    public int readLine(Text str) throws IOException {
-        return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
-    }
-
-    public void seek(long desired) throws IOException {
-        if (reader.getPos() <= desired || currentFilePos > desired) {
-            // desired position is ahead of stream or before the current position, seek to position
-            reader.seek(desired);
-            bufferLength = 0;
-            bufferPosn = 0;
-            currentFilePos = desired;
-        } else if (currentFilePos < desired) {
-            // desired position is in the buffer
-            int difference = (int) (desired - currentFilePos);
-            bufferPosn += difference;
-            currentFilePos = desired;
-        }
-    }
-
-    public FSDataInputStream getReader() {
-        return reader;
-    }
-
-    public void resetReader(FSDataInputStream reader) throws IOException {
-        this.reader = reader;
-        bufferLength = 0;
-        bufferPosn = 0;
-        currentFilePos = reader.getPos();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/ILookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/ILookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/ILookupReader.java
deleted file mode 100644
index d48aaf7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/ILookupReader.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-public interface ILookupReader {
-    public Object read(int fileNumber, long recordOffset) throws Exception;
-    public void close();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileDataReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileDataReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileDataReader.java
deleted file mode 100644
index 50853d4..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileDataReader.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-
-//Used in two cases:
-//1. building an index over a dataset
-//2. performing full scan over a dataset that has built index (to provide consistent view) with RCFile format
-
-@SuppressWarnings("rawtypes")
-public class RCFileDataReader extends AbstractHDFSReader {
-
-    private RecordReader reader;
-    private Object key;
-    private Object value;
-    private int currentSplitIndex = 0;
-    private String fileName;
-    private long recordGroupOffset;
-    private long nextRecordGroupOffset;
-    private boolean executed[];
-    private InputSplit[] inputSplits;
-    private String[] readSchedule;
-    private String nodeName;
-    private JobConf conf;
-    private List<ExternalFile> files;
-    private FileSystem hadoopFS;
-
-    public RCFileDataReader(InputSplit[] inputSplits, String[] readSchedule, String nodeName, JobConf conf,
-            boolean executed[], List<ExternalFile> files) throws IOException {
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.readSchedule = readSchedule;
-        this.nodeName = nodeName;
-        this.conf = conf;
-        this.files = files;
-        hadoopFS = FileSystem.get(conf);
-    }
-
-    private boolean moveToNext() throws IOException {
-        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-            /**
-             * read all the partitions scheduled to the current node
-             */
-            if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                /**
-                 * pick an unread split to read synchronize among
-                 * simultaneous partitions in the same machine
-                 */
-                synchronized (executed) {
-                    if (executed[currentSplitIndex] == false) {
-                        executed[currentSplitIndex] = true;
-                    } else {
-                        continue;
-                    }
-                }
-
-                /**
-                 * read the split
-                 */
-                try {
-                    if (files != null) {
-                        fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
-                        FileStatus fileStatus = hadoopFS.getFileStatus(new Path(fileName));
-                        //skip if not the same file stored in the files snapshot
-                        if (fileStatus.getModificationTime() != files.get(currentSplitIndex).getLastModefiedTime()
-                                .getTime())
-                            continue;
-                    }
-                    reader = getRecordReader(currentSplitIndex);
-                    recordGroupOffset = -1;
-                    nextRecordGroupOffset = reader.getPos();
-                } catch (Exception e) {
-                    continue;
-                }
-                key = reader.createKey();
-                value = reader.createValue();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    @Override
-    public int read(byte[] buffer, int offset, int len) throws IOException {
-        throw new NotImplementedException("Use readNext()");
-    }
-
-    @Override
-    public int read() throws IOException {
-        throw new NotImplementedException("Use readNext()");
-    }
-
-    private RecordReader getRecordReader(int slitIndex) throws IOException {
-        RecordReader reader;
-        try {
-            reader = conf.getInputFormat().getRecordReader(inputSplits[slitIndex], conf, getReporter());
-        } catch (Exception e) {
-            e.printStackTrace();
-            throw e;
-        }
-        return reader;
-    }
-
-    @Override
-    public boolean initialize() throws IOException {
-        return moveToNext();
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Object readNext() throws IOException {
-
-        if (reader == null) {
-            return null;
-        }
-        if (reader.next(key, value)) {
-            if (reader.getPos() != nextRecordGroupOffset) {
-                recordGroupOffset = nextRecordGroupOffset;
-                nextRecordGroupOffset = reader.getPos();
-            }
-            return value;
-        }
-        while (moveToNext()) {
-            if (reader.next(key, value)) {
-                if (reader.getPos() != nextRecordGroupOffset) {
-                    recordGroupOffset = nextRecordGroupOffset;
-                    nextRecordGroupOffset = reader.getPos();
-                }
-                return value;
-            }
-        }
-        return null;
-    }
-
-    @Override
-    public String getFileName() throws Exception {
-        return files.get(currentSplitIndex).getFileName();
-    }
-
-    @Override
-    public long getReaderPosition() throws Exception {
-        return recordGroupOffset;
-    }
-
-    @Override
-    public int getFileNumber() throws Exception {
-        return files.get(currentSplitIndex).getFileNumber();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileLookupReader.java
deleted file mode 100644
index f312228..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/RCFileLookupReader.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.RCFile.Reader;
-import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Writable;
-
-public class RCFileLookupReader {
-    private FileSystem fs;
-    private Configuration conf;
-    private int fileNumber = -1;
-    private int rowNumber;
-    private long recordGroupOffset;
-    private Reader reader;
-    boolean skipFile = false;
-    private LongWritable rcKey = new LongWritable();
-    private BytesRefArrayWritable rcValue = new BytesRefArrayWritable();
-    private ExternalFile currentFile = new ExternalFile(null, null, 0, null, null, 0L,
-            ExternalFilePendingOp.PENDING_NO_OP);
-    private ExternalFileIndexAccessor filesIndexAccessor;
-
-    public RCFileLookupReader(ExternalFileIndexAccessor filesIndexAccessor, Configuration conf) throws IOException {
-        fs = FileSystem.get(conf);
-        this.conf = conf;
-        this.filesIndexAccessor = filesIndexAccessor;
-    }
-
-    public Writable read(int fileNumber, long recordGroupOffset, int rowNumber) throws Exception {
-        if (fileNumber != this.fileNumber) {
-            filesIndexAccessor.searchForFile(fileNumber, currentFile);
-            try {
-                FileStatus fileStatus = fs.getFileStatus(new Path(currentFile.getFileName()));
-                if (fileStatus.getModificationTime() != currentFile.getLastModefiedTime().getTime()) {
-                    this.fileNumber = fileNumber;
-                    skipFile = true;
-                    return null;
-                } else {
-                    this.fileNumber = fileNumber;
-                    skipFile = false;
-                }
-            } catch (FileNotFoundException e) {
-                // Couldn't find file, skip it
-                this.fileNumber = fileNumber;
-                skipFile = true;
-                return null;
-            }
-            // Close old file and open new one
-            if (reader != null)
-                reader.close();
-            reader = new Reader(fs, new Path(currentFile.getFileName()), conf);
-            this.recordGroupOffset = -1;
-            this.rowNumber = -1;
-        } else if (skipFile) {
-            return null;
-        }
-        // Seek to the record group if needed
-        if (recordGroupOffset != this.recordGroupOffset) {
-            this.recordGroupOffset = recordGroupOffset;
-            if (reader.getPosition() != recordGroupOffset)
-                reader.seek(recordGroupOffset);
-            reader.resetBuffer();
-            this.rowNumber = -1;
-        }
-
-        // skip rows to the record row
-        while (this.rowNumber < rowNumber) {
-            reader.next(rcKey);
-            reader.getCurrentRow(rcValue);
-            this.rowNumber++;
-        }
-        return rcValue;
-    }
-
-    public void close() throws Exception {
-        if (reader != null)
-            reader.close();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupInputStream.java
deleted file mode 100644
index e787921..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupInputStream.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.util.ReflectionUtils;
-
-@SuppressWarnings("deprecation")
-public class SequenceFileLookupInputStream extends AbstractHDFSLookupInputStream {
-
-    private SequenceFile.Reader reader;
-    private Writable seqKey;
-    private Text seqValue = new Text();
-    private Configuration conf;
-
-    public SequenceFileLookupInputStream(ExternalFileIndexAccessor fileIndexAccessor, JobConf conf) throws IOException {
-        super(fileIndexAccessor, conf);
-        this.conf = conf;
-    }
-
-    @Override
-    protected void openFile(String fileName) throws IOException {
-        if (reader != null) {
-            reader.close();
-        }
-        reader = new SequenceFile.Reader(fs, new Path(fileName), conf);
-        seqKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
-    }
-
-    @Override
-    public void close() throws IOException {
-        if (reader != null) {
-            reader.close();
-        }
-        super.close();
-    }
-
-    @Override
-    protected boolean read(long recordOffset) {
-        try {
-            reader.seek(recordOffset);
-            reader.next(seqKey, seqValue);
-            pendingValue = seqValue.toString();
-            return true;
-        } catch (IOException e) {
-            // Same Question: seek and read failed afer openning file succeede, should we do something about it?
-            e.printStackTrace();
-            return false;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupReader.java
deleted file mode 100644
index 76b3660..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/SequenceFileLookupReader.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.Reader;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.ReflectionUtils;
-
-public class SequenceFileLookupReader implements ILookupReader {
-
-    private Reader reader;
-    private Writable key;
-    private Writable value;
-    private FileSystem fs;
-    private int fileNumber = -1;
-    private boolean skipFile = false;
-    private ExternalFile file = new ExternalFile(null, null, 0, null, null, 0L, ExternalFilePendingOp.PENDING_NO_OP);
-    private ExternalFileIndexAccessor filesIndexAccessor;
-    private Configuration conf;
-
-    public SequenceFileLookupReader(ExternalFileIndexAccessor filesIndexAccessor, Configuration conf)
-            throws IOException {
-        fs = FileSystem.get(conf);
-        this.filesIndexAccessor = filesIndexAccessor;
-        this.conf = conf;
-    }
-
-    @Override
-    public Writable read(int fileNumber, long recordOffset) throws Exception {
-        if (fileNumber != this.fileNumber) {
-            //get file name
-            this.fileNumber = fileNumber;
-            filesIndexAccessor.searchForFile(fileNumber, file);
-            try {
-                FileStatus fileStatus = fs.getFileStatus(new Path(file.getFileName()));
-                if (fileStatus.getModificationTime() != file.getLastModefiedTime().getTime()) {
-                    this.fileNumber = fileNumber;
-                    skipFile = true;
-                    return null;
-                } else {
-                    this.fileNumber = fileNumber;
-                    skipFile = false;
-                    openFile(file.getFileName());
-                }
-            } catch (FileNotFoundException e) {
-                // file was not found, do nothing and skip its tuples
-                this.fileNumber = fileNumber;
-                skipFile = true;
-                return null;
-            }
-        } else if (skipFile) {
-            return null;
-        }
-        reader.seek(recordOffset);
-        reader.next(key, value);
-        return value;
-    }
-
-    @SuppressWarnings("deprecation")
-    private void openFile(String FileName) throws IOException {
-        if (reader != null)
-            try {
-                reader.close();
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        reader = new SequenceFile.Reader(fs, new Path(FileName), conf);
-        key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
-        value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
-    }
-
-    @Override
-    public void close() {
-        if (reader != null)
-            try {
-                reader.close();
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupInputStream.java
deleted file mode 100644
index ea82c18..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupInputStream.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-
-public class TextFileLookupInputStream extends AbstractHDFSLookupInputStream {
-
-    private HDFSSeekableLineReader lineReader = new HDFSSeekableLineReader();
-    private Text value = new Text();
-
-    public TextFileLookupInputStream(ExternalFileIndexAccessor filesIndexAccessor, JobConf conf) throws IOException {
-        super(filesIndexAccessor, conf);
-    }
-
-    @Override
-    public void openFile(String FileName) throws IOException {
-        if (lineReader.getReader() != null) {
-            lineReader.getReader().close();
-        }
-        lineReader.resetReader(fs.open(new Path(FileName)));
-    }
-
-    @Override
-    public void close() {
-        if (lineReader.getReader() != null) {
-            try {
-                lineReader.getReader().close();
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        }
-    }
-
-    @Override
-    protected boolean read(long recordOffset) {
-        try {
-            lineReader.seek(recordOffset);
-            lineReader.readLine(value);
-            pendingValue = value.toString();
-            return true;
-        } catch (IOException e) {
-            // file was opened and then when trying to seek and read, an error occurred <- should we throw an exception ???->
-            e.printStackTrace();
-            return false;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupReader.java
deleted file mode 100644
index 5864df2..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextFileLookupReader.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-
-public class TextFileLookupReader implements ILookupReader {
-    private FileSystem fs;
-    private int fileNumber = -1;
-    private boolean skipFile = false;
-    private ExternalFile file = new ExternalFile(null, null, 0, null, null, 0L, ExternalFilePendingOp.PENDING_NO_OP);
-    private ExternalFileIndexAccessor filesIndexAccessor;
-    private HDFSSeekableLineReader lineReader;
-    private Text value = new Text();
-
-    public TextFileLookupReader(ExternalFileIndexAccessor filesIndexAccessor, Configuration conf) throws IOException {
-        this.fs = FileSystem.get(conf);
-        this.filesIndexAccessor = filesIndexAccessor;
-        this.lineReader = new HDFSSeekableLineReader();
-    }
-
-    @Override
-    public String read(int fileNumber, long recordOffset) throws Exception {
-        if (fileNumber != this.fileNumber) {
-            this.fileNumber = fileNumber;
-            filesIndexAccessor.searchForFile(fileNumber, file);
-
-            try {
-                FileStatus fileStatus = fs.getFileStatus(new Path(file.getFileName()));
-                if (fileStatus.getModificationTime() != file.getLastModefiedTime().getTime()) {
-                    this.fileNumber = fileNumber;
-                    skipFile = true;
-                    return null;
-                } else {
-                    this.fileNumber = fileNumber;
-                    skipFile = false;
-                    openFile(file.getFileName());
-                }
-            } catch (FileNotFoundException e) {
-                // File is not there, skip it and do nothing
-                this.fileNumber = fileNumber;
-                skipFile = true;
-                return null;
-            }
-        } else if (skipFile) {
-            return null;
-        }
-        lineReader.seek(recordOffset);
-        lineReader.readLine(value);
-        return value.toString();
-    }
-
-    private void openFile(String FileName) throws IOException {
-        if (lineReader.getReader() != null) {
-            lineReader.getReader().close();
-        }
-        lineReader.resetReader(fs.open(new Path(FileName)));
-    }
-
-    @Override
-    public void close() {
-        if (lineReader.getReader() != null) {
-            try {
-                lineReader.getReader().close();
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualDataReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualDataReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualDataReader.java
deleted file mode 100644
index 5e4f013..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualDataReader.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-
-// Used in two cases:
-// 1. building an index over a dataset
-// 2. performing full scan over a dataset that has built index (to provide consistent view)
-
-@SuppressWarnings("rawtypes")
-public class TextualDataReader extends AbstractHDFSReader {
-
-    private RecordReader<Object, Text> reader;
-    private Object key;
-    private Text value;
-    private boolean hasMore = false;
-    private int EOL = "\n".getBytes()[0];
-    private Text pendingValue = null;
-    private int currentSplitIndex = 0;
-    private String fileName;
-    private long recordOffset;
-    private boolean executed[];
-    private InputSplit[] inputSplits;
-    private String[] readSchedule;
-    private String nodeName;
-    private JobConf conf;
-    private List<ExternalFile> files;
-    private FileSystem hadoopFS;
-
-    public TextualDataReader(InputSplit[] inputSplits, String[] readSchedule, String nodeName, JobConf conf,
-            boolean executed[], List<ExternalFile> files) throws IOException {
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.readSchedule = readSchedule;
-        this.nodeName = nodeName;
-        this.conf = conf;
-        this.files = files;
-        hadoopFS = FileSystem.get(conf);
-    }
-
-    @Override
-    public boolean initialize() throws Exception {
-        return moveToNext();
-    }
-
-    @Override
-    public Object readNext() throws Exception {
-        if (reader == null) {
-            return null;
-        }
-        recordOffset = reader.getPos();
-        if (reader.next(key, value)) {
-            return value;
-        }
-        while (moveToNext()) {
-            recordOffset = reader.getPos();
-            if (reader.next(key, value)) {
-                return value;
-            }
-        }
-        return null;
-    }
-
-    @Override
-    public int getFileNumber() throws Exception {
-        return files.get(currentSplitIndex).getFileNumber();
-    }
-
-    @Override
-    public String getFileName() throws Exception {
-        return files.get(currentSplitIndex).getFileName();
-    }
-
-    @Override
-    public long getReaderPosition() throws Exception {
-        return recordOffset;
-    }
-
-    @Override
-    public int read() throws IOException {
-        throw new NotImplementedException("Use read(byte[], int, int");
-    }
-
-    @SuppressWarnings("unchecked")
-    private boolean moveToNext() throws IOException {
-        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-            /**
-             * read all the partitions scheduled to the current node
-             */
-            if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                /**
-                 * pick an unread split to read synchronize among
-                 * simultaneous partitions in the same machine
-                 */
-                synchronized (executed) {
-                    if (executed[currentSplitIndex] == false) {
-                        executed[currentSplitIndex] = true;
-                    } else {
-                        continue;
-                    }
-                }
-
-                /**
-                 * read the split
-                 */
-                try {
-                    if (files != null) {
-                        fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
-                        FileStatus fileStatus = hadoopFS.getFileStatus(new Path(fileName));
-                        // Skip if not the same file stored in the files snapshot
-                        if (fileStatus.getModificationTime() != files.get(currentSplitIndex).getLastModefiedTime()
-                                .getTime())
-                            continue;
-                    }
-                    // It is the same file
-                    reader = getRecordReader(currentSplitIndex);
-                } catch (Exception e) {
-                    // ignore exceptions <-- This might change later -->
-                    continue;
-                }
-                key = reader.createKey();
-                value = reader.createValue();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    private RecordReader getRecordReader(int splitIndex) throws IOException {
-        RecordReader reader;
-        if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
-            SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
-            reader = format.getRecordReader(inputSplits[splitIndex], conf, getReporter());
-        } else {
-            TextInputFormat format = (TextInputFormat) conf.getInputFormat();
-            reader = format.getRecordReader(inputSplits[splitIndex], conf, getReporter());
-        }
-        return reader;
-    }
-
-    // Return one record at a time <to preserve the indexing information>
-    @Override
-    public int read(byte[] buffer, int offset, int len) throws IOException {
-        if (reader == null) {
-            if (!moveToNext()) {
-                // nothing to read
-                return -1;
-            }
-        }
-
-        int numBytes = 0;
-        if (pendingValue != null) {
-            int sizeOfNextTuple = pendingValue.getLength() + 1;
-            if (sizeOfNextTuple > len) {
-                return 0;
-            }
-            System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
-            buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
-            numBytes += pendingValue.getLength() + 1;
-            pendingValue = null;
-            return numBytes;
-        }
-        if (numBytes < len) {
-            //store the byte location
-            recordOffset = reader.getPos();
-            hasMore = reader.next(key, value);
-            if (!hasMore) {
-                while (moveToNext()) {
-                    //store the byte location
-                    recordOffset = reader.getPos();
-                    hasMore = reader.next(key, value);
-                    if (hasMore) {
-                        //return the value read
-                        int sizeOfNextTuple = value.getLength() + 1;
-                        if (numBytes + sizeOfNextTuple > len) {
-                            pendingValue = value;
-                            return 0;
-                        } else {
-                            System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
-                            buffer[offset + numBytes + value.getLength()] = (byte) EOL;
-                            numBytes += sizeOfNextTuple;
-                            return numBytes;
-                        }
-                    }
-                }
-                return -1;
-            } else {
-                //return the value read
-                int sizeOfNextTuple = value.getLength() + 1;
-                if (numBytes + sizeOfNextTuple > len) {
-                    pendingValue = value;
-                    return 0;
-                } else {
-                    System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
-                    buffer[offset + numBytes + value.getLength()] = (byte) EOL;
-                    numBytes += sizeOfNextTuple;
-                    return numBytes;
-                }
-            }
-        }
-        return numBytes;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualFullScanDataReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualFullScanDataReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualFullScanDataReader.java
deleted file mode 100644
index 9fe09a2..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/TextualFullScanDataReader.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.Counters.Counter;
-
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-
-public class TextualFullScanDataReader extends InputStream {
-
-    private RecordReader<Object, Text> reader;
-    private Object key;
-    private Text value;
-    private boolean hasMore = false;
-    private int EOL = "\n".getBytes()[0];
-    private Text pendingValue = null;
-    private int currentSplitIndex = 0;
-    private boolean executed[];
-    private InputSplit[] inputSplits;
-    private String[] readSchedule;
-    private String nodeName;
-    private JobConf conf;
-
-    public TextualFullScanDataReader(boolean executed[], InputSplit[] inputSplits, String[] readSchedule,
-            String nodeName, JobConf conf) {
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.readSchedule = readSchedule;
-        this.nodeName = nodeName;
-        this.conf = conf;
-    }
-
-    @Override
-    public int available() {
-        return 1;
-    }
-
-    @SuppressWarnings("unchecked")
-    private boolean moveToNext() throws IOException {
-        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-            /**
-             * read all the partitions scheduled to the current node
-             */
-            if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                /**
-                 * pick an unread split to read synchronize among
-                 * simultaneous partitions in the same machine
-                 */
-                synchronized (executed) {
-                    if (executed[currentSplitIndex] == false) {
-                        executed[currentSplitIndex] = true;
-                    } else {
-                        continue;
-                    }
-                }
-
-                /**
-                 * read the split
-                 */
-                reader = getRecordReader(currentSplitIndex);
-                key = reader.createKey();
-                value = (Text) reader.createValue();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    @Override
-    public int read(byte[] buffer, int offset, int len) throws IOException {
-        if (reader == null) {
-            if (!moveToNext()) {
-                // nothing to read
-                return -1;
-            }
-        }
-
-        int numBytes = 0;
-        if (pendingValue != null) {
-            int sizeOfNextTuple = pendingValue.getLength() + 1;
-            if (sizeOfNextTuple > len) {
-                return 0;
-            }
-            System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
-            buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
-            numBytes += pendingValue.getLength() + 1;
-            pendingValue = null;
-        }
-
-        while (numBytes < len) {
-            hasMore = reader.next(key, value);
-            if (!hasMore) {
-                while (moveToNext()) {
-                    hasMore = reader.next(key, value);
-                    if (hasMore) {
-                        // move to the next non-empty split
-                        break;
-                    }
-                }
-            }
-            if (!hasMore) {
-                return (numBytes == 0) ? -1 : numBytes;
-            }
-            int sizeOfNextTuple = value.getLength() + 1;
-            if (numBytes + sizeOfNextTuple > len) {
-                // cannot add tuple to current buffer
-                // but the reader has moved pass the fetched tuple
-                // we need to store this for a subsequent read call.
-                // and return this then.
-                pendingValue = value;
-                break;
-            } else {
-                System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
-                buffer[offset + numBytes + value.getLength()] = (byte) EOL;
-                numBytes += sizeOfNextTuple;
-            }
-        }
-        return numBytes;
-    }
-
-    @Override
-    public int read() throws IOException {
-        throw new NotImplementedException("Use read(byte[], int, int");
-    }
-
-    @SuppressWarnings("rawtypes")
-    private RecordReader getRecordReader(int splitIndex) throws IOException {
-        if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
-            SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
-            RecordReader reader = format.getRecordReader((org.apache.hadoop.mapred.FileSplit) inputSplits[splitIndex],
-                    conf, getReporter());
-            return reader;
-        } else {
-            TextInputFormat format = (TextInputFormat) conf.getInputFormat();
-            RecordReader reader = format.getRecordReader((org.apache.hadoop.mapred.FileSplit) inputSplits[splitIndex],
-                    conf, getReporter());
-            return reader;
-        }
-    }
-
-    private Reporter getReporter() {
-        Reporter reporter = new Reporter() {
-
-            @Override
-            public Counter getCounter(Enum<?> arg0) {
-                return null;
-            }
-
-            @Override
-            public Counter getCounter(String arg0, String arg1) {
-                return null;
-            }
-
-            @Override
-            public InputSplit getInputSplit() throws UnsupportedOperationException {
-                return null;
-            }
-
-            @Override
-            public void incrCounter(Enum<?> arg0, long arg1) {
-            }
-
-            @Override
-            public void incrCounter(String arg0, String arg1, long arg2) {
-            }
-
-            @Override
-            public void setStatus(String arg0) {
-            }
-
-            @Override
-            public void progress() {
-            }
-
-            @Override
-            public float getProgress() {
-                return 0.0f;
-            }
-        };
-
-        return reporter;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
deleted file mode 100644
index 89abf0f..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.util.List;
-
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable;
-import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
-
-// This is an operator that takes a single file index and an array of secondary indexes
-// it is intended to be used for 
-// 1. commit transaction operation
-// 2. abort transaction operation
-// 3. recover transaction operation
-public abstract class AbstractExternalDatasetIndexesOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-    private ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory;
-    private IndexInfoOperatorDescriptor fileIndexInfo;
-    private List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories;
-    private List<IndexInfoOperatorDescriptor> bTreeIndexesInfos;
-    private List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories;
-    private List<IndexInfoOperatorDescriptor> rTreeIndexesInfos;
-
-    public AbstractExternalDatasetIndexesOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
-            IndexInfoOperatorDescriptor fileIndexesInfo,
-            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
-            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
-        super(spec, 0, 0);
-        this.filesIndexDataflowHelperFactory = filesIndexDataflowHelperFactory;
-        this.fileIndexInfo = fileIndexesInfo;
-        this.bTreeIndexesDataflowHelperFactories = bTreeIndexesDataflowHelperFactories;
-        this.bTreeIndexesInfos = bTreeIndexesInfos;
-        this.rTreeIndexesDataflowHelperFactories = rTreeIndexesDataflowHelperFactories;
-        this.rTreeIndexesInfos = rTreeIndexesInfos;
-    }
-
-    // opening and closing the index is done inside these methods since we don't always need open indexes
-    protected abstract void performOpOnIndex(
-            IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
-            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception;
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
-        return new AbstractOperatorNodePushable() {
-
-            @Override
-            public void initialize() throws HyracksDataException {
-                try {
-                    // only in partition of device id = 0, we perform the operation on the files index
-                    if(fileIndexInfo.getFileSplitProvider().getFileSplits()[partition].getIODeviceId() == 0){
-                        performOpOnIndex(filesIndexDataflowHelperFactory, ctx, fileIndexInfo, partition);
-                    }
-                    // perform operation on btrees
-                    for (int i = 0; i < bTreeIndexesDataflowHelperFactories.size(); i++) {
-                        performOpOnIndex(bTreeIndexesDataflowHelperFactories.get(i), ctx,
-                                bTreeIndexesInfos.get(i), partition);
-                    }
-                    // perform operation on rtrees
-                    for (int i = 0; i < rTreeIndexesDataflowHelperFactories.size(); i++) {
-                        performOpOnIndex(rTreeIndexesDataflowHelperFactories.get(i), ctx,
-                                rTreeIndexesInfos.get(i), partition);
-                    }
-                } catch (Exception e) {
-                    // This should never happen <unless there is a hardware failure or something serious>
-                    throw new HyracksDataException(e);
-                }
-            }
-
-            @Override
-            public void deinitialize() throws HyracksDataException {
-            }
-
-            @Override
-            public int getInputArity() {
-                return 0;
-            }
-
-            @Override
-            public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc)
-                    throws HyracksDataException {
-            }
-
-            @Override
-            public IFrameWriter getInputFrameWriter(int index) {
-                return null;
-            }
-
-        };
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
deleted file mode 100644
index 6ff991b..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.util.List;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.common.impls.AbortRecoverLSMIndexFileManager;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
-
-public class ExternalDatasetIndexesAbortOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-
-    public ExternalDatasetIndexesAbortOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
-            IndexInfoOperatorDescriptor fileIndexesInfo,
-            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
-            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
-        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
-                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
-    }
-
-    @Override
-    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
-            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
-        FileReference file = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
-        AbortRecoverLSMIndexFileManager fileManager = new AbortRecoverLSMIndexFileManager(file);
-        fileManager.deleteTransactionFiles();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
deleted file mode 100644
index e89a8db..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.util.List;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.storage.am.common.api.IIndex;
-import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
-
-public class ExternalDatasetIndexesCommitOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
-
-    public ExternalDatasetIndexesCommitOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
-            IndexInfoOperatorDescriptor fileIndexesInfo,
-            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
-            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
-        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
-                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
-    }
-
-    private static final long serialVersionUID = 1L;
-
-    @Override
-    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
-            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
-        FileReference resourecePath = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
-        System.err.println("performing the operation on "+ resourecePath.getFile().getAbsolutePath());
-        // Get DataflowHelper
-        IIndexDataflowHelper indexHelper = indexDataflowHelperFactory.createIndexDataflowHelper(fileIndexInfo, ctx, partition);
-        // Get index
-        IIndex index = indexHelper.getIndexInstance();
-        // commit transaction
-        ((ITwoPCIndex) index).commitTransaction();
-        System.err.println("operation on "+ resourecePath.getFile().getAbsolutePath() + " Succeded");
-
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
deleted file mode 100644
index 9bdfaa6..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.io.File;
-import java.util.List;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.io.FileReference;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
-import org.apache.hyracks.storage.am.lsm.common.impls.AbortRecoverLSMIndexFileManager;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
-
-public class ExternalDatasetIndexesRecoverOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-
-    public ExternalDatasetIndexesRecoverOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
-            IndexInfoOperatorDescriptor fileIndexesInfo,
-            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
-            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
-            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
-        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
-                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
-    }
-
-    @Override
-    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
-            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
-        FileReference file = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
-        AbortRecoverLSMIndexFileManager fileManager = new AbortRecoverLSMIndexFileManager(file);
-        fileManager.recoverTransaction();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorDescriptor.java
deleted file mode 100644
index f56b3ae..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorDescriptor.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory;
-import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-import org.apache.hyracks.storage.common.file.NoOpLocalResourceFactoryProvider;
-
-public class ExternalIndexBulkModifyOperatorDescriptor extends AbstractTreeIndexOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-    private final int[] deletedFiles;
-    private final int[] fieldPermutation;
-    private final float fillFactor;
-    private final long numElementsHint;
-
-    public ExternalIndexBulkModifyOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
-            IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields,
-            IIndexDataflowHelperFactory dataflowHelperFactory,
-            IModificationOperationCallbackFactory modificationOpCallbackFactory, int[] deletedFiles,
-            int[] fieldPermutation, float fillFactor, long numElementsHint) {
-        super(spec, 1, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, false, false, null,
-                NoOpLocalResourceFactoryProvider.INSTANCE, NoOpOperationCallbackFactory.INSTANCE,
-                modificationOpCallbackFactory);
-        this.deletedFiles = deletedFiles;
-        this.fieldPermutation = fieldPermutation;
-        this.fillFactor = fillFactor;
-        this.numElementsHint = numElementsHint;
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
-        return new ExternalIndexBulkModifyOperatorNodePushable(this, ctx, partition, fieldPermutation, fillFactor,
-                numElementsHint, recordDescProvider, deletedFiles);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
deleted file mode 100644
index a9c9ac7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.operators;
-
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.external.indexing.FilesIndexDescription;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.storage.am.common.api.ITwoPCIndexBulkLoader;
-import org.apache.hyracks.storage.am.common.api.IndexException;
-import org.apache.hyracks.storage.am.common.dataflow.IndexBulkLoadOperatorNodePushable;
-import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
-
-public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOperatorNodePushable {
-
-    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
-    private final int[] deletedFiles;
-    private ArrayTupleBuilder buddyBTreeTupleBuilder = new ArrayTupleBuilder(
-            filesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
-    private AMutableInt32 fileNumber = new AMutableInt32(0);
-    private ArrayTupleReference deleteTuple = new ArrayTupleReference();
-
-    public ExternalIndexBulkModifyOperatorNodePushable(ExternalIndexBulkModifyOperatorDescriptor opDesc,
-            IHyracksTaskContext ctx, int partition, int[] fieldPermutation, float fillFactor, long numElementsHint,
-            IRecordDescriptorProvider recordDescProvider, int[] deletedFiles) {
-        super(opDesc, ctx, partition, fieldPermutation, fillFactor, false, numElementsHint, false, recordDescProvider);
-        this.deletedFiles = deletedFiles;
-    }
-
-    // We override this method to do two things
-    // when creating the bulkLoader, it creates a transaction bulk loader
-    // It uses the bulkLoader to insert delete tuples for the deleted files
-    @Override
-    public void open() throws HyracksDataException {
-        RecordDescriptor recDesc = recDescProvider.getInputRecordDescriptor(opDesc.getActivityId(), 0);
-        accessor = new FrameTupleAccessor(recDesc);
-        indexHelper.open();
-        index = indexHelper.getIndexInstance();
-        try {
-            writer.open();
-            // Transactional BulkLoader
-            bulkLoader = ((ITwoPCIndex) index).createTransactionBulkLoader(fillFactor, verifyInput, deletedFiles.length,
-                    checkIfEmptyIndex);
-            // Delete files
-            for (int i = 0; i < deletedFiles.length; i++) {
-                fileNumber.setValue(deletedFiles[i]);
-                filesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
-                ((ITwoPCIndexBulkLoader) bulkLoader).delete(deleteTuple);
-            }
-        } catch (Throwable e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    @Override
-    public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
-        accessor.reset(buffer);
-        int tupleCount = accessor.getTupleCount();
-        for (int i = 0; i < tupleCount; i++) {
-            tuple.reset(accessor, i);
-            try {
-                bulkLoader.add(tuple);
-            } catch (IndexException e) {
-                throw new HyracksDataException(e);
-            }
-        }
-    }
-
-    @Override
-    public void close() throws HyracksDataException {
-        if (index != null) {
-            try {
-                bulkLoader.end();
-            } catch (Throwable th) {
-                throw new HyracksDataException(th);
-            } finally {
-                try {
-                    indexHelper.close();
-                } finally {
-                    writer.close();
-                }
-            }
-        }
-    }
-
-    @Override
-    public void fail() throws HyracksDataException {
-        if (index != null) {
-            try {
-                ((ITwoPCIndexBulkLoader) bulkLoader).abort();
-            } finally {
-                writer.fail();
-            }
-        }
-    }
-}


[11/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunctionProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunctionProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunctionProvider.java
index d0d44e3..d10ff6d 100755
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunctionProvider.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunctionProvider.java
@@ -18,6 +18,9 @@
  */
 package org.apache.asterix.external.library;
 
+import org.apache.asterix.external.api.IExternalFunction;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.om.functions.IExternalFunctionInfo;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalFunction.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalFunction.java
deleted file mode 100755
index 8e4cdbf..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalFunction.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-public interface IExternalFunction {
-
-    public void initialize(IFunctionHelper functionHelper) throws Exception;
-
-    public void deinitialize();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalScalarFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalScalarFunction.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalScalarFunction.java
deleted file mode 100755
index 8e82b83..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IExternalScalarFunction.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-public interface IExternalScalarFunction extends IExternalFunction {
-
-    public void evaluate(IFunctionHelper functionHelper) throws Exception;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionFactory.java
deleted file mode 100755
index cffdec6..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionFactory.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-public interface IFunctionFactory {
-
-    public IExternalFunction getExternalFunction();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionHelper.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionHelper.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionHelper.java
deleted file mode 100755
index 06d8f4a..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IFunctionHelper.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.library.java.IJObject;
-import org.apache.asterix.external.library.java.JTypeTag;
-
-import java.io.IOException;
-
-public interface IFunctionHelper {
-
-    public IJObject getArgument(int index);
-
-    public IJObject getResultObject();
-
-    public void setResult(IJObject result) throws IOException, AsterixException;
-
-    public boolean isValidResult();
-
-    public IJObject getObject(JTypeTag jtypeTag);
-
-    public void reset();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/IResultCollector.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IResultCollector.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/IResultCollector.java
deleted file mode 100755
index 3efdad6..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/IResultCollector.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-import java.io.DataOutput;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.om.base.AOrderedList;
-import org.apache.asterix.om.base.ARecord;
-import org.apache.asterix.om.base.IAObject;
-
-public interface IResultCollector {
-
-    public void writeIntResult(int result) throws AsterixException;
-
-    public void writeFloatResult(float result) throws AsterixException;
-
-    public void writeDoubleResult(double result) throws AsterixException;
-
-    public void writeStringResult(String result) throws AsterixException;
-
-    public void writeRecordResult(ARecord result) throws AsterixException;
-
-    public void writeListResult(AOrderedList list) throws AsterixException;
-
-    public IAObject getComplexTypeResultHolder();
-
-    public DataOutput getDataOutput();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/JTypeObjectFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/JTypeObjectFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/JTypeObjectFactory.java
index 677ed76..cdd29ff 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/JTypeObjectFactory.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/JTypeObjectFactory.java
@@ -20,7 +20,7 @@ package org.apache.asterix.external.library;
 
 import java.util.List;
 
-import org.apache.asterix.external.library.java.IJObject;
+import org.apache.asterix.external.api.IJObject;
 import org.apache.asterix.external.library.java.JObjects.JBoolean;
 import org.apache.asterix.external.library.java.JObjects.JCircle;
 import org.apache.asterix.external.library.java.JObjects.JDate;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/JavaFunctionHelper.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/JavaFunctionHelper.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/JavaFunctionHelper.java
index dc80fda..13ea589 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/JavaFunctionHelper.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/JavaFunctionHelper.java
@@ -19,7 +19,8 @@
 package org.apache.asterix.external.library;
 
 import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.library.java.IJObject;
+import org.apache.asterix.external.api.IFunctionHelper;
+import org.apache.asterix.external.api.IJObject;
 import org.apache.asterix.external.library.java.JObjectPointableVisitor;
 import org.apache.asterix.external.library.java.JObjects.JNull;
 import org.apache.asterix.external.library.java.JTypeTag;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/ResultCollector.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ResultCollector.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ResultCollector.java
index 2671f13..192bd4e 100755
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ResultCollector.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ResultCollector.java
@@ -21,6 +21,7 @@ package org.apache.asterix.external.library;
 import java.io.DataOutput;
 
 import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IResultCollector;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import org.apache.asterix.om.base.AMutableDouble;
 import org.apache.asterix.om.base.AMutableFloat;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/TypeInfo.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/TypeInfo.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/TypeInfo.java
index 062bfd7..453cf39 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/TypeInfo.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/TypeInfo.java
@@ -18,7 +18,7 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.java.IJObject;
+import org.apache.asterix.external.api.IJObject;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.IAType;
 import org.apache.asterix.om.util.container.IObjectPool;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJListAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJListAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJListAccessor.java
deleted file mode 100644
index a467721..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJListAccessor.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library.java;
-
-import org.apache.asterix.om.pointables.AListVisitablePointable;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.container.IObjectPool;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public interface IJListAccessor {
-
-    IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool, IAType listType,
-            JObjectPointableVisitor pointableVisitor) throws HyracksDataException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObject.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObject.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObject.java
deleted file mode 100644
index ffeacd4..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObject.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library.java;
-
-import java.io.DataOutput;
-
-import org.apache.asterix.om.base.IAObject;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public interface IJObject {
-
-    public ATypeTag getTypeTag();
-
-    public IAObject getIAObject();
-
-    public void serialize(DataOutput dataOutput, boolean writeTypeTag) throws HyracksDataException;
-
-    public void reset() throws AlgebricksException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObjectAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObjectAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObjectAccessor.java
deleted file mode 100644
index e05c8b0..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJObjectAccessor.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library.java;
-
-import org.apache.asterix.om.pointables.base.IVisitablePointable;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.container.IObjectPool;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public interface IJObjectAccessor {
-    IJObject access(IVisitablePointable pointable, IObjectPool<IJObject, IAType> obj) throws HyracksDataException;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJRecordAccessor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJRecordAccessor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJRecordAccessor.java
deleted file mode 100644
index ab22531..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJRecordAccessor.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library.java;
-
-import org.apache.asterix.external.library.java.JObjects.JRecord;
-import org.apache.asterix.om.pointables.ARecordVisitablePointable;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.container.IObjectPool;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public interface IJRecordAccessor {
-
-    public JRecord access(ARecordVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool, ARecordType recordType,
-            JObjectPointableVisitor pointableVisitor) throws HyracksDataException;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJType.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJType.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJType.java
deleted file mode 100644
index 6fab922..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/IJType.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library.java;
-
-import org.apache.asterix.om.base.IAObject;
-import org.apache.asterix.om.types.ATypeTag;
-
-public interface IJType {
-
-    public ATypeTag getTypeTag();
-
-    public IAObject getIAObject();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
index 29ce728..1835739 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
@@ -43,6 +43,10 @@ import org.apache.asterix.dataflow.data.nontagged.serde.APointSerializerDeserial
 import org.apache.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.ARectangleSerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.ATimeSerializerDeserializer;
+import org.apache.asterix.external.api.IJListAccessor;
+import org.apache.asterix.external.api.IJObject;
+import org.apache.asterix.external.api.IJObjectAccessor;
+import org.apache.asterix.external.api.IJRecordAccessor;
 import org.apache.asterix.external.library.TypeInfo;
 import org.apache.asterix.external.library.java.JObjects.JBoolean;
 import org.apache.asterix.external.library.java.JObjects.JByte;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectPointableVisitor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectPointableVisitor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectPointableVisitor.java
index e8bef5a..1a75115 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectPointableVisitor.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectPointableVisitor.java
@@ -22,6 +22,10 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IJListAccessor;
+import org.apache.asterix.external.api.IJObject;
+import org.apache.asterix.external.api.IJObjectAccessor;
+import org.apache.asterix.external.api.IJRecordAccessor;
 import org.apache.asterix.external.library.TypeInfo;
 import org.apache.asterix.external.library.java.JObjectAccessors.JListAccessor;
 import org.apache.asterix.external.library.java.JObjectAccessors.JRecordAccessor;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
index 146a247..93b4bf1 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
@@ -26,6 +26,7 @@ import org.apache.asterix.common.exceptions.AsterixException;
 import org.apache.asterix.dataflow.data.nontagged.serde.AInt32SerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.SerializerDeserializerUtil;
+import org.apache.asterix.external.api.IJObject;
 import org.apache.asterix.external.library.java.JObjects.ByteArrayAccessibleDataInputStream;
 import org.apache.asterix.external.library.java.JObjects.JBoolean;
 import org.apache.asterix.external.library.java.JObjects.JCircle;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
index e9a63b2..42b0742 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
@@ -53,6 +53,7 @@ import org.apache.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeseri
 import org.apache.asterix.dataflow.data.nontagged.serde.ARectangleSerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
 import org.apache.asterix.dataflow.data.nontagged.serde.ATimeSerializerDeserializer;
+import org.apache.asterix.external.api.IJObject;
 import org.apache.asterix.om.base.ABoolean;
 import org.apache.asterix.om.base.ADouble;
 import org.apache.asterix.om.base.AFloat;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
new file mode 100644
index 0000000..371c603
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/AbstractExternalDatasetIndexesOperatorDescriptor.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.util.List;
+
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable;
+import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
+
+// This is an operator that takes a single file index and an array of secondary indexes
+// it is intended to be used for 
+// 1. commit transaction operation
+// 2. abort transaction operation
+// 3. recover transaction operation
+public abstract class AbstractExternalDatasetIndexesOperatorDescriptor
+        extends AbstractSingleActivityOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+    private ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory;
+    private IndexInfoOperatorDescriptor fileIndexInfo;
+    private List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories;
+    private List<IndexInfoOperatorDescriptor> bTreeIndexesInfos;
+    private List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories;
+    private List<IndexInfoOperatorDescriptor> rTreeIndexesInfos;
+
+    public AbstractExternalDatasetIndexesOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
+            IndexInfoOperatorDescriptor fileIndexesInfo,
+            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
+            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
+        super(spec, 0, 0);
+        this.filesIndexDataflowHelperFactory = filesIndexDataflowHelperFactory;
+        this.fileIndexInfo = fileIndexesInfo;
+        this.bTreeIndexesDataflowHelperFactories = bTreeIndexesDataflowHelperFactories;
+        this.bTreeIndexesInfos = bTreeIndexesInfos;
+        this.rTreeIndexesDataflowHelperFactories = rTreeIndexesDataflowHelperFactories;
+        this.rTreeIndexesInfos = rTreeIndexesInfos;
+    }
+
+    // opening and closing the index is done inside these methods since we don't always need open indexes
+    protected abstract void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory,
+            IHyracksTaskContext ctx, IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception;
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
+        return new AbstractOperatorNodePushable() {
+
+            @Override
+            public void initialize() throws HyracksDataException {
+                try {
+                    // only in partition of device id = 0, we perform the operation on the files index
+                    if (fileIndexInfo.getFileSplitProvider().getFileSplits()[partition].getIODeviceId() == 0) {
+                        performOpOnIndex(filesIndexDataflowHelperFactory, ctx, fileIndexInfo, partition);
+                    }
+                    // perform operation on btrees
+                    for (int i = 0; i < bTreeIndexesDataflowHelperFactories.size(); i++) {
+                        performOpOnIndex(bTreeIndexesDataflowHelperFactories.get(i), ctx, bTreeIndexesInfos.get(i),
+                                partition);
+                    }
+                    // perform operation on rtrees
+                    for (int i = 0; i < rTreeIndexesDataflowHelperFactories.size(); i++) {
+                        performOpOnIndex(rTreeIndexesDataflowHelperFactories.get(i), ctx, rTreeIndexesInfos.get(i),
+                                partition);
+                    }
+                } catch (Exception e) {
+                    throw new HyracksDataException(e);
+                }
+            }
+
+            @Override
+            public void deinitialize() throws HyracksDataException {
+            }
+
+            @Override
+            public int getInputArity() {
+                return 0;
+            }
+
+            @Override
+            public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc)
+                    throws HyracksDataException {
+            }
+
+            @Override
+            public IFrameWriter getInputFrameWriter(int index) {
+                return null;
+            }
+
+        };
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorDescriptor.java
new file mode 100644
index 0000000..26c9c7d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorDescriptor.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+
+public class ExternalBTreeSearchOperatorDescriptor extends BTreeSearchOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    public ExternalBTreeSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
+            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
+            IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] lowKeyFields,
+            int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive,
+            IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput, boolean retainNull,
+            INullWriterFactory iNullWriterFactory, ISearchOperationCallbackFactory searchOpCallbackProvider) {
+        super(spec, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
+                comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields, lowKeyInclusive,
+                highKeyInclusive, dataflowHelperFactory, retainInput, retainNull, iNullWriterFactory,
+                searchOpCallbackProvider, null, null);
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
+        return new ExternalBTreeSearchOperatorNodePushable(this, ctx, partition, recordDescProvider, lowKeyFields,
+                highKeyFields, lowKeyInclusive, highKeyInclusive);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorNodePushable.java
new file mode 100644
index 0000000..0513f9c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalBTreeSearchOperatorNodePushable.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorNodePushable;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelper;
+import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTreeWithBuddy;
+
+public class ExternalBTreeSearchOperatorNodePushable extends BTreeSearchOperatorNodePushable {
+
+    public ExternalBTreeSearchOperatorNodePushable(ExternalBTreeSearchOperatorDescriptor opDesc,
+            IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, int[] lowKeyFields,
+            int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive) {
+        super(opDesc, ctx, partition, recordDescProvider, lowKeyFields, highKeyFields, lowKeyInclusive,
+                highKeyInclusive, null, null);
+    }
+
+    // We override the open function to search a specific version of the index
+    @Override
+    public void open() throws HyracksDataException {
+        writer.open();
+        ExternalBTreeWithBuddyDataflowHelper dataFlowHelper = (ExternalBTreeWithBuddyDataflowHelper) indexHelper;
+        accessor = new FrameTupleAccessor(inputRecDesc);
+        dataFlowHelper.open();
+        index = indexHelper.getIndexInstance();
+        if (retainNull) {
+            int fieldCount = getFieldCount();
+            nullTupleBuild = new ArrayTupleBuilder(fieldCount);
+            DataOutput out = nullTupleBuild.getDataOutput();
+            for (int i = 0; i < fieldCount; i++) {
+                try {
+                    nullWriter.writeNull(out);
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+                nullTupleBuild.addFieldEndOffset();
+            }
+        } else {
+            nullTupleBuild = null;
+        }
+        ExternalBTreeWithBuddy externalIndex = (ExternalBTreeWithBuddy) index;
+        try {
+            searchPred = createSearchPredicate();
+            tb = new ArrayTupleBuilder(recordDesc.getFieldCount());
+            dos = tb.getDataOutput();
+            appender = new FrameTupleAppender(new VSizeFrame(ctx));
+            ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
+                    .createSearchOperationCallback(indexHelper.getResourceID(), ctx);
+            // The next line is the reason we override this method
+            indexAccessor = externalIndex.createAccessor(searchCallback, dataFlowHelper.getTargetVersion());
+            cursor = createCursor();
+            if (retainInput) {
+                frameTuple = new FrameTupleReference();
+            }
+        } catch (Throwable th) {
+            throw new HyracksDataException(th);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
new file mode 100644
index 0000000..5255257
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesAbortOperatorDescriptor.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.util.List;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.common.impls.AbortRecoverLSMIndexFileManager;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
+
+public class ExternalDatasetIndexesAbortOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    public ExternalDatasetIndexesAbortOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
+            IndexInfoOperatorDescriptor fileIndexesInfo,
+            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
+            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
+        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
+                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
+    }
+
+    @Override
+    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
+            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
+        FileReference file = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
+        AbortRecoverLSMIndexFileManager fileManager = new AbortRecoverLSMIndexFileManager(file);
+        fileManager.deleteTransactionFiles();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
new file mode 100644
index 0000000..71ffef8
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesCommitOperatorDescriptor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.util.List;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.storage.am.common.api.IIndex;
+import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
+
+public class ExternalDatasetIndexesCommitOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
+
+    public ExternalDatasetIndexesCommitOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
+            IndexInfoOperatorDescriptor fileIndexesInfo,
+            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
+            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
+        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
+                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
+    }
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
+            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
+        FileReference resourecePath = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
+        System.err.println("performing the operation on "+ resourecePath.getFile().getAbsolutePath());
+        // Get DataflowHelper
+        IIndexDataflowHelper indexHelper = indexDataflowHelperFactory.createIndexDataflowHelper(fileIndexInfo, ctx, partition);
+        // Get index
+        IIndex index = indexHelper.getIndexInstance();
+        // commit transaction
+        ((ITwoPCIndex) index).commitTransaction();
+        System.err.println("operation on "+ resourecePath.getFile().getAbsolutePath() + " Succeded");
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
new file mode 100644
index 0000000..59ad076
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalDatasetIndexesRecoverOperatorDescriptor.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory;
+import org.apache.hyracks.storage.am.lsm.common.impls.AbortRecoverLSMIndexFileManager;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
+
+public class ExternalDatasetIndexesRecoverOperatorDescriptor extends AbstractExternalDatasetIndexesOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    public ExternalDatasetIndexesRecoverOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            ExternalBTreeDataflowHelperFactory filesIndexDataflowHelperFactory,
+            IndexInfoOperatorDescriptor fileIndexesInfo,
+            List<ExternalBTreeWithBuddyDataflowHelperFactory> bTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> bTreeIndexesInfos,
+            List<ExternalRTreeDataflowHelperFactory> rTreeIndexesDataflowHelperFactories,
+            List<IndexInfoOperatorDescriptor> rTreeIndexesInfos) {
+        super(spec, filesIndexDataflowHelperFactory, fileIndexesInfo, bTreeIndexesDataflowHelperFactories,
+                bTreeIndexesInfos, rTreeIndexesDataflowHelperFactories, rTreeIndexesInfos);
+    }
+
+    @Override
+    protected void performOpOnIndex(IIndexDataflowHelperFactory indexDataflowHelperFactory, IHyracksTaskContext ctx,
+            IndexInfoOperatorDescriptor fileIndexInfo, int partition) throws Exception {
+        FileReference file = IndexFileNameUtil.getIndexAbsoluteFileRef(fileIndexInfo, partition, ctx.getIOManager());
+        AbortRecoverLSMIndexFileManager fileManager = new AbortRecoverLSMIndexFileManager(file);
+        fileManager.recoverTransaction();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalFilesIndexOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalFilesIndexOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalFilesIndexOperatorDescriptor.java
new file mode 100644
index 0000000..0d51956
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalFilesIndexOperatorDescriptor.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.indexing.FileIndexTupleTranslator;
+import org.apache.asterix.external.indexing.FilesIndexDescription;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.btree.impls.BTree;
+import org.apache.hyracks.storage.am.common.api.IIndex;
+import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
+import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.IndexException;
+import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
+import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTree;
+import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTree.LSMTwoPCBTreeBulkLoader;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider;
+
+/**
+ * This operator is intended solely for external dataset files replicated index.
+ * It either create and bulkload when used for a new index
+ * or bulkmodify the index creating a hidden transaction component which later might be committed or deleted by another operator
+ *
+ * @author alamouda
+ */
+public class ExternalFilesIndexOperatorDescriptor extends AbstractTreeIndexOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+    private boolean createNewIndex;
+    private List<ExternalFile> files;
+
+    public ExternalFilesIndexOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
+            IFileSplitProvider fileSplitProvider, IIndexDataflowHelperFactory dataflowHelperFactory,
+            ILocalResourceFactoryProvider localResourceFactoryProvider, List<ExternalFile> files, boolean createNewIndex) {
+        super(spec, 0, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider,
+                new FilesIndexDescription().EXTERNAL_FILE_INDEX_TYPE_TRAITS,
+                new FilesIndexDescription().FILES_INDEX_COMP_FACTORIES, FilesIndexDescription.BLOOM_FILTER_FIELDS,
+                dataflowHelperFactory, null, false, false, null, localResourceFactoryProvider,
+                NoOpOperationCallbackFactory.INSTANCE, NoOpOperationCallbackFactory.INSTANCE);
+        this.createNewIndex = createNewIndex;
+        this.files = files;
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+        final IIndexDataflowHelper indexHelper = getIndexDataflowHelperFactory().createIndexDataflowHelper(this, ctx,
+                partition);
+        return new AbstractOperatorNodePushable() {
+
+            @SuppressWarnings("incomplete-switch")
+            @Override
+            public void initialize() throws HyracksDataException {
+                FileIndexTupleTranslator filesTupleTranslator = new FileIndexTupleTranslator();
+                if (createNewIndex) {
+                    // Create
+                    indexHelper.create();
+                    // Open and get
+                    indexHelper.open();
+                    try {
+                        IIndex index = indexHelper.getIndexInstance();
+                        // Create bulk loader
+
+                        IIndexBulkLoader bulkLoader = index.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false,
+                                files.size(), false);
+                        // Load files
+                        for (ExternalFile file : files) {
+                            bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
+                        }
+                        bulkLoader.end();
+                    } catch (IndexException | IOException | AsterixException e) {
+                        throw new HyracksDataException(e);
+                    } finally {
+                        indexHelper.close();
+                    }
+                } else {
+                    ///////// Bulk modify //////////
+                    // Open and get
+                    indexHelper.open();
+                    IIndex index = indexHelper.getIndexInstance();
+                    LSMTwoPCBTreeBulkLoader bulkLoader = null;
+                    try {
+                        bulkLoader = (LSMTwoPCBTreeBulkLoader) ((ExternalBTree) index).createTransactionBulkLoader(
+                                BTree.DEFAULT_FILL_FACTOR, false, files.size(), false);
+                        // Load files
+                        // The files must be ordered according to their numbers
+                        for (ExternalFile file : files) {
+                            switch (file.getPendingOp()) {
+                                case PENDING_ADD_OP:
+                                case PENDING_APPEND_OP:
+                                    bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
+                                    break;
+                                case PENDING_DROP_OP:
+                                    bulkLoader.delete(filesTupleTranslator.getTupleFromFile(file));
+                                    break;
+                            }
+                        }
+                        bulkLoader.end();
+                    } catch (IndexException | IOException | AsterixException e) {
+                        if (bulkLoader != null) {
+                            bulkLoader.abort();
+                        }
+                        throw new HyracksDataException(e);
+                    } finally {
+                        indexHelper.close();
+                    }
+                }
+            }
+
+            @Override
+            public void deinitialize() throws HyracksDataException {
+            }
+
+            @Override
+            public int getInputArity() {
+                return 0;
+            }
+
+            @Override
+            public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc)
+                    throws HyracksDataException {
+            }
+
+            @Override
+            public IFrameWriter getInputFrameWriter(int index) {
+                return null;
+            }
+
+        };
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorDescriptor.java
new file mode 100644
index 0000000..1476c14
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorDescriptor.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory;
+import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+import org.apache.hyracks.storage.common.file.NoOpLocalResourceFactoryProvider;
+
+public class ExternalIndexBulkModifyOperatorDescriptor extends AbstractTreeIndexOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+    private final int[] deletedFiles;
+    private final int[] fieldPermutation;
+    private final float fillFactor;
+    private final long numElementsHint;
+
+    public ExternalIndexBulkModifyOperatorDescriptor(IOperatorDescriptorRegistry spec,
+            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
+            IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields,
+            IIndexDataflowHelperFactory dataflowHelperFactory,
+            IModificationOperationCallbackFactory modificationOpCallbackFactory, int[] deletedFiles,
+            int[] fieldPermutation, float fillFactor, long numElementsHint) {
+        super(spec, 1, 0, null, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
+                comparatorFactories, bloomFilterKeyFields, dataflowHelperFactory, null, false, false, null,
+                NoOpLocalResourceFactoryProvider.INSTANCE, NoOpOperationCallbackFactory.INSTANCE,
+                modificationOpCallbackFactory);
+        this.deletedFiles = deletedFiles;
+        this.fieldPermutation = fieldPermutation;
+        this.fillFactor = fillFactor;
+        this.numElementsHint = numElementsHint;
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+        return new ExternalIndexBulkModifyOperatorNodePushable(this, ctx, partition, fieldPermutation, fillFactor,
+                numElementsHint, recordDescProvider, deletedFiles);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorNodePushable.java
new file mode 100644
index 0000000..5748a65
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalIndexBulkModifyOperatorNodePushable.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.nio.ByteBuffer;
+
+import org.apache.asterix.external.indexing.FilesIndexDescription;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import org.apache.hyracks.storage.am.common.api.ITwoPCIndexBulkLoader;
+import org.apache.hyracks.storage.am.common.api.IndexException;
+import org.apache.hyracks.storage.am.common.dataflow.IndexBulkLoadOperatorNodePushable;
+import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
+
+public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOperatorNodePushable {
+
+    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
+    private final int[] deletedFiles;
+    private ArrayTupleBuilder buddyBTreeTupleBuilder = new ArrayTupleBuilder(
+            filesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
+    private AMutableInt32 fileNumber = new AMutableInt32(0);
+    private ArrayTupleReference deleteTuple = new ArrayTupleReference();
+
+    public ExternalIndexBulkModifyOperatorNodePushable(ExternalIndexBulkModifyOperatorDescriptor opDesc,
+            IHyracksTaskContext ctx, int partition, int[] fieldPermutation, float fillFactor, long numElementsHint,
+            IRecordDescriptorProvider recordDescProvider, int[] deletedFiles) {
+        super(opDesc, ctx, partition, fieldPermutation, fillFactor, false, numElementsHint, false, recordDescProvider);
+        this.deletedFiles = deletedFiles;
+    }
+
+    // We override this method to do two things
+    // when creating the bulkLoader, it creates a transaction bulk loader
+    // It uses the bulkLoader to insert delete tuples for the deleted files
+    @Override
+    public void open() throws HyracksDataException {
+        RecordDescriptor recDesc = recDescProvider.getInputRecordDescriptor(opDesc.getActivityId(), 0);
+        accessor = new FrameTupleAccessor(recDesc);
+        indexHelper.open();
+        index = indexHelper.getIndexInstance();
+        try {
+            writer.open();
+            // Transactional BulkLoader
+            bulkLoader = ((ITwoPCIndex) index).createTransactionBulkLoader(fillFactor, verifyInput, deletedFiles.length,
+                    checkIfEmptyIndex);
+            // Delete files
+            for (int i = 0; i < deletedFiles.length; i++) {
+                fileNumber.setValue(deletedFiles[i]);
+                filesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
+                ((ITwoPCIndexBulkLoader) bulkLoader).delete(deleteTuple);
+            }
+        } catch (Throwable e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    @Override
+    public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+        accessor.reset(buffer);
+        int tupleCount = accessor.getTupleCount();
+        for (int i = 0; i < tupleCount; i++) {
+            tuple.reset(accessor, i);
+            try {
+                bulkLoader.add(tuple);
+            } catch (IndexException e) {
+                throw new HyracksDataException(e);
+            }
+        }
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        if (index != null) {
+            try {
+                bulkLoader.end();
+            } catch (Throwable th) {
+                throw new HyracksDataException(th);
+            } finally {
+                try {
+                    indexHelper.close();
+                } finally {
+                    writer.close();
+                }
+            }
+        }
+    }
+
+    @Override
+    public void fail() throws HyracksDataException {
+        if (index != null) {
+            try {
+                ((ITwoPCIndexBulkLoader) bulkLoader).abort();
+            } finally {
+                writer.fail();
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalLookupOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalLookupOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalLookupOperatorDescriptor.java
new file mode 100644
index 0000000..c8881a3
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalLookupOperatorDescriptor.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.nio.ByteBuffer;
+
+import org.apache.asterix.external.adapter.factory.LookupAdapterFactory;
+import org.apache.asterix.external.dataset.adapter.LookupAdapter;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.FilesIndexDescription;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
+import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelper;
+import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+
+/*
+ * This operator is intended for using record ids to access data in external sources
+ */
+public class ExternalLookupOperatorDescriptor extends AbstractTreeIndexOperatorDescriptor {
+    private static final long serialVersionUID = 1L;
+    private final LookupAdapterFactory<?> adapterFactory;
+
+    public ExternalLookupOperatorDescriptor(IOperatorDescriptorRegistry spec, LookupAdapterFactory<?> adapterFactory,
+            RecordDescriptor outRecDesc, ExternalBTreeDataflowHelperFactory externalFilesIndexDataFlowHelperFactory,
+            boolean propagateInput, IIndexLifecycleManagerProvider lcManagerProvider,
+            IStorageManagerInterface storageManager, IFileSplitProvider fileSplitProvider, int datasetId,
+            double bloomFilterFalsePositiveRate, ISearchOperationCallbackFactory searchOpCallbackFactory,
+            boolean retainNull, INullWriterFactory iNullWriterFactory) {
+        super(spec, 1, 1, outRecDesc, storageManager, lcManagerProvider, fileSplitProvider,
+                new FilesIndexDescription().EXTERNAL_FILE_INDEX_TYPE_TRAITS,
+                new FilesIndexDescription().FILES_INDEX_COMP_FACTORIES, FilesIndexDescription.BLOOM_FILTER_FIELDS,
+                externalFilesIndexDataFlowHelperFactory, null, propagateInput, retainNull, iNullWriterFactory, null,
+                searchOpCallbackFactory, null);
+        this.adapterFactory = adapterFactory;
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+            final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
+                    throws HyracksDataException {
+        // Create a file index accessor to be used for files lookup operations
+        // Note that all file index accessors will use partition 0 since we only have 1 files index per NC 
+        final ExternalFileIndexAccessor snapshotAccessor = new ExternalFileIndexAccessor(
+                (ExternalBTreeDataflowHelper) dataflowHelperFactory.createIndexDataflowHelper(this, ctx, partition),
+                this);
+        return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
+            // The adapter that uses the file index along with the coming tuples to access files in HDFS
+            private LookupAdapter<?> adapter;
+            private boolean indexOpen = false;
+
+            @Override
+            public void open() throws HyracksDataException {
+                try {
+                    adapter = adapterFactory.createAdapter(ctx, partition,
+                            recordDescProvider.getInputRecordDescriptor(getActivityId(), 0), snapshotAccessor, writer);
+                    //Open the file index accessor here
+                    snapshotAccessor.open();
+                    indexOpen = true;
+                    adapter.open();
+                } catch (Throwable th) {
+                    throw new HyracksDataException(th);
+                }
+            }
+
+            @Override
+            public void close() throws HyracksDataException {
+                HyracksDataException hde = null;
+                if (indexOpen) {
+                    try {
+                        snapshotAccessor.close();
+                    } catch (Throwable th) {
+                        hde = new HyracksDataException(th);
+                    }
+                    try {
+                        adapter.close();
+                    } catch (Throwable th) {
+                        if (hde == null) {
+                            hde = new HyracksDataException(th);
+                        } else {
+                            hde.addSuppressed(th);
+                        }
+                    }
+                }
+            }
+
+            @Override
+            public void fail() throws HyracksDataException {
+                try {
+                    adapter.fail();
+                } catch (Throwable th) {
+                    throw new HyracksDataException(th);
+                }
+            }
+
+            @Override
+            public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+                try {
+                    adapter.nextFrame(buffer);
+                } catch (Throwable th) {
+                    throw new HyracksDataException(th);
+                }
+            }
+        };
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorDescriptor.java
new file mode 100644
index 0000000..306f75a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorDescriptor.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
+import org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+
+public class ExternalRTreeSearchOperatorDescriptor extends RTreeSearchOperatorDescriptor {
+
+    private static final long serialVersionUID = 1L;
+
+    public ExternalRTreeSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
+            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
+            IBinaryComparatorFactory[] comparatorFactories, int[] keyFields,
+            ExternalRTreeDataflowHelperFactory dataflowHelperFactory, boolean retainInput, boolean retainNull,
+            INullWriterFactory iNullWriterFactory, ISearchOperationCallbackFactory searchOpCallbackFactory) {
+        super(spec, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
+                comparatorFactories, keyFields, dataflowHelperFactory, retainInput, retainNull, iNullWriterFactory,
+                searchOpCallbackFactory, null, null);
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
+        return new ExternalRTreeSearchOperatorNodePushable(this, ctx, partition, recordDescProvider, keyFields);
+    }
+
+}



[07/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapter.java
new file mode 100644
index 0000000..db38c12
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapter.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.hyracks.api.comm.IFrameWriter;
+
+public class SocketClientAdapter implements IDataSourceAdapter {
+
+    private static final long serialVersionUID = 1L;
+
+    private static final Logger LOGGER = Logger.getLogger(SocketClientAdapter.class.getName());
+
+    private static final String LOCALHOST = "127.0.0.1";
+
+    private static final long RECONNECT_PERIOD = 2000;
+
+    private final String localFile;
+
+    private final int port;
+
+    private boolean continueStreaming = true;
+
+    public SocketClientAdapter(Integer port, String localFile) {
+        this.localFile = localFile;
+        this.port = port;
+    }
+
+    @Override
+    public void start(int partition, IFrameWriter writer) throws Exception {
+        Socket socket = waitForReceiver();
+        OutputStream os = socket.getOutputStream();
+        FileInputStream fin = new FileInputStream(new File(localFile));
+        byte[] chunk = new byte[1024];
+        int read;
+        try {
+            while (continueStreaming) {
+                read = fin.read(chunk);
+                if (read > 0) {
+                    os.write(chunk, 0, read);
+                } else {
+                    break;
+                }
+            }
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("Finished streaming file " + localFile + "to port [" + port + "]");
+            }
+
+        } finally {
+            socket.close();
+            fin.close();
+        }
+
+    }
+
+    private Socket waitForReceiver() throws Exception {
+        Socket socket = null;
+        while (socket == null) {
+            try {
+                socket = new Socket(LOCALHOST, port);
+            } catch (Exception e) {
+                if (LOGGER.isLoggable(Level.WARNING)) {
+                    LOGGER.warning("Receiver not ready, would wait for " + (RECONNECT_PERIOD / 1000)
+                            + " seconds before reconnecting");
+                }
+                Thread.sleep(RECONNECT_PERIOD);
+            }
+        }
+        return socket;
+    }
+
+    @Override
+    public boolean stop() throws Exception {
+        continueStreaming = false;
+        return true;
+    }
+
+    @Override
+    public boolean handleException(Throwable e) {
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapterFactory.java
new file mode 100644
index 0000000..a1e90a8
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/SocketClientAdapterFactory.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.util.Map;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class SocketClientAdapterFactory implements IAdapterFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    private ARecordType outputType;
+
+    private GenericSocketFeedAdapterFactory genericSocketAdapterFactory;
+
+    private String[] fileSplits;
+
+    public static final String KEY_FILE_SPLITS = "file_splits";
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
+        this.outputType = outputType;
+        String fileSplitsValue = configuration.get(KEY_FILE_SPLITS);
+        if (fileSplitsValue == null) {
+            throw new IllegalArgumentException(
+                    "File splits not specified. File split is specified as a comma separated list of paths");
+        }
+        fileSplits = fileSplitsValue.trim().split(",");
+        genericSocketAdapterFactory = new GenericSocketFeedAdapterFactory();
+        genericSocketAdapterFactory.configure(configuration, outputType);
+    }
+
+    @Override
+    public String getAlias() {
+        return ExternalDataConstants.ALIAS_SOCKET_CLIENT_ADAPTER;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return genericSocketAdapterFactory.getPartitionConstraint();
+    }
+
+    @Override
+    public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
+        Pair<String, Integer> socket = genericSocketAdapterFactory.getSockets().get(partition);
+        return new SocketClientAdapter(socket.second, fileSplits[partition]);
+    }
+
+    @Override
+    public ARecordType getAdapterOutputType() {
+        return outputType;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/TweetGenerator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/TweetGenerator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/TweetGenerator.java
new file mode 100644
index 0000000..b5fd454
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/runtime/TweetGenerator.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.runtime;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.external.runtime.DataGenerator.InitializationInfo;
+import org.apache.asterix.external.runtime.DataGenerator.TweetMessage;
+import org.apache.asterix.external.runtime.DataGenerator.TweetMessageIterator;
+
+public class TweetGenerator {
+
+    private static Logger LOGGER = Logger.getLogger(TweetGenerator.class.getName());
+
+    public static final String KEY_DURATION = "duration";
+    public static final String KEY_TPS = "tps";
+    public static final String KEY_VERBOSE = "verbose";
+    public static final String KEY_FIELDS = "fields";
+    public static final int INFINITY = 0;
+
+    private static final int DEFAULT_DURATION = INFINITY;
+
+    private int duration;
+    private TweetMessageIterator tweetIterator = null;
+    private int partition;
+    private long tweetCount = 0;
+    private int frameTweetCount = 0;
+    private int numFlushedTweets = 0;
+    private DataGenerator dataGenerator = null;
+    private ByteBuffer outputBuffer = ByteBuffer.allocate(32 * 1024);
+    private String[] fields;
+    private final List<OutputStream> subscribers;
+    private final Object lock = new Object();
+    private final List<OutputStream> subscribersForRemoval = new ArrayList<OutputStream>();
+
+    public TweetGenerator(Map<String, String> configuration, int partition) throws Exception {
+        this.partition = partition;
+        String value = configuration.get(KEY_DURATION);
+        this.duration = value != null ? Integer.parseInt(value) : DEFAULT_DURATION;
+        dataGenerator = new DataGenerator(new InitializationInfo());
+        tweetIterator = dataGenerator.new TweetMessageIterator(duration);
+        this.fields = configuration.get(KEY_FIELDS) != null ? configuration.get(KEY_FIELDS).split(",") : null;
+        this.subscribers = new ArrayList<OutputStream>();
+    }
+
+    private void writeTweetString(TweetMessage tweetMessage) throws IOException {
+        String tweet = tweetMessage.getAdmEquivalent(fields) + "\n";
+        System.out.println(tweet);
+        tweetCount++;
+        byte[] b = tweet.getBytes();
+        if (outputBuffer.position() + b.length > outputBuffer.limit()) {
+            flush();
+            numFlushedTweets += frameTweetCount;
+            frameTweetCount = 0;
+            outputBuffer.put(b);
+        } else {
+            outputBuffer.put(b);
+        }
+        frameTweetCount++;
+    }
+
+    private void flush() throws IOException {
+        outputBuffer.flip();
+        synchronized (lock) {
+            for (OutputStream os : subscribers) {
+                try {
+                    os.write(outputBuffer.array(), 0, outputBuffer.limit());
+                } catch (Exception e) {
+                    subscribersForRemoval.add(os);
+                }
+            }
+            if (!subscribersForRemoval.isEmpty()) {
+                subscribers.removeAll(subscribersForRemoval);
+                subscribersForRemoval.clear();
+            }
+        }
+        outputBuffer.position(0);
+        outputBuffer.limit(32 * 1024);
+    }
+
+    public boolean generateNextBatch(int numTweets) throws Exception {
+        boolean moreData = tweetIterator.hasNext();
+        if (!moreData) {
+            if (outputBuffer.position() > 0) {
+                flush();
+            }
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("Reached end of batch. Tweet Count: [" + partition + "]" + tweetCount);
+            }
+            return false;
+        } else {
+            int count = 0;
+            while (count < numTweets) {
+                writeTweetString(tweetIterator.next());
+                count++;
+            }
+            return true;
+        }
+    }
+
+    public int getNumFlushedTweets() {
+        return numFlushedTweets;
+    }
+
+    public void registerSubscriber(OutputStream os) {
+        synchronized (lock) {
+            subscribers.add(os);
+        }
+    }
+
+    public void deregisterSubscribers(OutputStream os) {
+        synchronized (lock) {
+            subscribers.remove(os);
+        }
+    }
+
+    public void close() throws IOException {
+        synchronized (lock) {
+            for (OutputStream os : subscribers) {
+                os.close();
+            }
+        }
+    }
+
+    public boolean isSubscribed() {
+        return !subscribers.isEmpty();
+    }
+
+    public long getTweetCount() {
+        return tweetCount;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolver.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolver.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolver.java
deleted file mode 100644
index a897294..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolver.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.om.util.AsterixRuntimeUtil;
-
-/**
- * Resolves a value (DNS/IP Address) to the id of a Node Controller running at the location.
- */
-public class DNSResolver implements INodeResolver {
-
-    private static Random random = new Random();
-
-    @Override
-    public String resolveNode(String value) throws AsterixException {
-        try {
-            InetAddress ipAddress = InetAddress.getByName(value);
-            Set<String> nodeControllers = AsterixRuntimeUtil.getNodeControllersOnIP(ipAddress);
-            if (nodeControllers == null || nodeControllers.isEmpty()) {
-                throw new AsterixException(" No node controllers found at the address: " + value);
-            }
-            String chosenNCId = nodeControllers.toArray(new String[] {})[random.nextInt(nodeControllers.size())];
-            return chosenNCId;
-        }catch (UnknownHostException e){
-            throw new AsterixException("Unable to resolve hostname '"+ value + "' to an IP address");
-        } catch (AsterixException ae) {
-            throw ae;
-        } catch (Exception e) {
-            throw new AsterixException(e);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolverFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolverFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolverFactory.java
index 6862d7a..f8585bb 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolverFactory.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/DNSResolverFactory.java
@@ -18,12 +18,15 @@
  */
 package org.apache.asterix.external.util;
 
+import org.apache.asterix.external.api.INodeResolver;
+import org.apache.asterix.external.api.INodeResolverFactory;
+
 /**
- * Factory for creating instance of {@link DNSResolver}
+ * Factory for creating instance of {@link NodeResolver}
  */
 public class DNSResolverFactory implements INodeResolverFactory {
 
-    private static final INodeResolver INSTANCE = new DNSResolver();
+    private static final INodeResolver INSTANCE = new NodeResolver();
 
     @Override
     public INodeResolver createNodeResolver() {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/DataflowUtils.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/DataflowUtils.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/DataflowUtils.java
new file mode 100644
index 0000000..ea13f25
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/DataflowUtils.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.asterix.common.parse.ITupleForwarder.TupleForwardPolicy;
+import org.apache.asterix.external.dataflow.CounterTimerTupleForwarder;
+import org.apache.asterix.external.dataflow.FrameFullTupleForwarder;
+import org.apache.asterix.external.dataflow.RateControlledTupleForwarder;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+
+public class DataflowUtils {
+    public static void addTupleToFrame(FrameTupleAppender appender, ArrayTupleBuilder tb, IFrameWriter writer)
+            throws HyracksDataException {
+        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+            appender.flush(writer, true);
+            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    public static ITupleForwarder getTupleForwarder(Map<String, String> configuration) throws AsterixException {
+        ITupleForwarder policy = null;
+        ITupleForwarder.TupleForwardPolicy policyType = null;
+        String propValue = configuration.get(ITupleForwarder.FORWARD_POLICY);
+        if (propValue == null) {
+            policyType = TupleForwardPolicy.FRAME_FULL;
+        } else {
+            policyType = TupleForwardPolicy.valueOf(propValue.trim().toUpperCase());
+        }
+        switch (policyType) {
+            case FRAME_FULL:
+                policy = new FrameFullTupleForwarder();
+                break;
+            case COUNTER_TIMER_EXPIRED:
+                policy = new CounterTimerTupleForwarder();
+                break;
+            case RATE_CONTROLLED:
+                policy = new RateControlledTupleForwarder();
+                break;
+            default:
+                throw new AsterixException("Unknown tuple forward policy");
+        }
+        return policy;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
new file mode 100644
index 0000000..7f91a2b
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataCompatibilityUtils.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.api.IExternalDataSourceFactory;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.om.types.ARecordType;
+
+public class ExternalDataCompatibilityUtils {
+
+    public static void validateCompatibility(IExternalDataSourceFactory dataSourceFactory,
+            IDataParserFactory dataParserFactory) throws AsterixException {
+        if (dataSourceFactory.getDataSourceType() != dataParserFactory.getDataSourceType()) {
+            throw new AsterixException(
+                    "datasource-parser mismatch. datasource produces " + dataSourceFactory.getDataSourceType()
+                            + " and parser expects " + dataParserFactory.getDataSourceType());
+        }
+        if (dataSourceFactory.getDataSourceType() == DataSourceType.RECORDS) {
+            IRecordReaderFactory<?> recordReaderFactory = (IRecordReaderFactory<?>) dataSourceFactory;
+            IRecordDataParserFactory<?> recordParserFactory = (IRecordDataParserFactory<?>) dataParserFactory;
+            if (!recordParserFactory.getRecordClass().isAssignableFrom(recordReaderFactory.getRecordClass())) {
+                throw new AsterixException("datasource-parser mismatch. datasource produces records of type "
+                        + recordReaderFactory.getRecordClass() + " and parser expects records of type "
+                        + recordParserFactory.getRecordClass());
+            }
+        }
+    }
+
+    //TODO:Add remaining aliases
+    public static void addCompatabilityParameters(String adapterClassname, ARecordType itemType,
+            Map<String, String> configuration) throws AsterixException {
+        if (adapterClassname.equals(ExternalDataConstants.ALIAS_HDFS_ADAPTER)
+                || adapterClassname.equalsIgnoreCase(ExternalDataConstants.ADAPTER_HDFS_CLASSNAME)) {
+            if (configuration.get(ExternalDataConstants.KEY_FORMAT) == null) {
+                throw new AsterixException("Unspecified format parameter for HDFS adapter");
+            }
+            if (configuration.get(ExternalDataConstants.KEY_FORMAT).equals(ExternalDataConstants.FORMAT_BINARY)
+                    || configuration.get(ExternalDataConstants.KEY_FORMAT).equals(ExternalDataConstants.FORMAT_HIVE)) {
+                configuration.put(ExternalDataConstants.KEY_READER, ExternalDataConstants.READER_HDFS);
+            } else {
+                configuration.put(ExternalDataConstants.KEY_READER,
+                        configuration.get(ExternalDataConstants.KEY_FORMAT));
+                configuration.put(ExternalDataConstants.KEY_READER_STREAM, ExternalDataConstants.ALIAS_HDFS_ADAPTER);
+            }
+        }
+        if (adapterClassname.equals(ExternalDataConstants.ALIAS_LOCALFS_ADAPTER)
+                || adapterClassname.contains(ExternalDataConstants.ADAPTER_LOCALFS_CLASSNAME)) {
+            if (configuration.get(ExternalDataConstants.KEY_FORMAT) == null) {
+                throw new AsterixException("Unspecified format parameter for local file system adapter");
+            }
+            configuration.put(ExternalDataConstants.KEY_READER, configuration.get(ExternalDataConstants.KEY_FORMAT));
+            configuration.put(ExternalDataConstants.KEY_READER_STREAM, ExternalDataConstants.ALIAS_LOCALFS_ADAPTER);
+        }
+        if (configuration.get(ExternalDataConstants.KEY_PARSER) != null
+                && configuration.get(ExternalDataConstants.KEY_PARSER).equals(ExternalDataConstants.PARSER_HIVE)) {
+            configuration.put(ExternalDataConstants.KEY_PARSER, ExternalDataConstants.FORMAT_HIVE);
+        }
+        if (configuration.get(ExternalDataConstants.KEY_FILESYSTEM) != null) {
+            configuration.put(ExternalDataConstants.KEY_STREAM,
+                    configuration.get(ExternalDataConstants.KEY_FILESYSTEM));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
new file mode 100644
index 0000000..2050e6a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+public class ExternalDataConstants {
+    //TODO: Remove unused variables.
+    /**
+     * Keys
+     */
+    // used to specify the stream factory for an adapter that has a stream data source
+    public static final String KEY_STREAM = "stream";
+    // used to specify the dataverse of the adapter
+    public static final String KEY_DATAVERSE = "dataverse";
+    // used to specify the socket addresses when reading data from sockets
+    public static final String KEY_SOCKETS = "sockets";
+    // specify whether the socket address points to an NC or an IP
+    public static final String KEY_MODE = "address-type";
+    // specify the hdfs name node address when reading hdfs data
+    public static final String KEY_HDFS_URL = "hdfs";
+    // specify the path when reading from a file system
+    public static final String KEY_PATH = "path";
+    public static final String KEY_INPUT_FORMAT = "input-format";
+    public static final String KEY_FILESYSTEM = "fs";
+    public static final String KEY_HADOOP_FILESYSTEM_URI = "fs.defaultFS";
+    public static final String KEY_HADOOP_FILESYSTEM_CLASS = "fs.hdfs.impl";
+    public static final String KEY_HADOOP_INPUT_DIR = "mapred.input.dir";
+    public static final String KEY_HADOOP_INPUT_FORMAT = "mapred.input.format.class";
+    public static final String KEY_HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit";
+    public static final String KEY_HADOOP_SOCKET_PATH = "dfs.domain.socket.path";
+    public static final String KEY_HADOOP_BUFFER_SIZE = "io.file.buffer.size";
+    public static final String KEY_SOURCE_DATATYPE = "type-name";
+    public static final String KEY_DELIMITER = "delimiter";
+    public static final String KEY_PARSER_FACTORY = "tuple-parser";
+    public static final String KEY_DATA_PARSER = "parser";
+    public static final String KEY_HEADER = "header";
+    public static final String KEY_READER = "reader";
+    public static final String KEY_READER_STREAM = "reader-stream";
+    public static final String KEY_TYPE_NAME = "type-name";
+    public static final String KEY_RECORD_START = "record-start";
+    public static final String KEY_RECORD_END = "record-end";
+    public static final String KEY_EXPRESSION = "expression";
+    public static final String KEY_LOCAL_SOCKET_PATH = "local-socket-path";
+    public static final String KEY_FORMAT = "format";
+    public static final String KEY_QUOTE = "quote";
+    public static final String KEY_PARSER = "parser";
+    public static final String KEY_DATASET_RECORD = "dataset-record";
+    public static final String KEY_HIVE_SERDE = "hive-serde";
+    public static final String KEY_RSS_URL = "url";
+    public static final String KEY_INTERVAL = "interval";
+    public static final String KEY_PULL = "pull";
+    public static final String KEY_PUSH = "push";
+    /**
+     * HDFS class names
+     */
+    public static final String CLASS_NAME_TEXT_INPUT_FORMAT = TextInputFormat.class.getName();
+    public static final String CLASS_NAME_SEQUENCE_INPUT_FORMAT = SequenceFileInputFormat.class.getName();
+    public static final String CLASS_NAME_RC_INPUT_FORMAT = RCFileInputFormat.class.getName();
+    public static final String CLASS_NAME_HDFS_FILESYSTEM = DistributedFileSystem.class.getName();
+    /**
+     * input formats aliases
+     */
+    public static final String INPUT_FORMAT_TEXT = "text-input-format";
+    public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
+    public static final String INPUT_FORMAT_RC = "rc-input-format";
+    /**
+     * Builtin streams
+     */
+
+    /**
+     * Builtin record readers
+     */
+    public static final String READER_HDFS = "hdfs";
+    public static final String READER_ADM = "adm";
+    public static final String READER_SEMISTRUCTURED = "semi-structured";
+    public static final String READER_DELIMITED = "delimited-text";
+
+    public static final String CLUSTER_LOCATIONS = "cluster-locations";
+    public static final String SCHEDULER = "hdfs-scheduler";
+    public static final String PARSER_HIVE = "hive-parser";
+    public static final String HAS_HEADER = "has.header";
+    public static final String TIME_TRACKING = "time.tracking";
+    public static final String DEFAULT_QUOTE = "\"";
+    public static final String NODE_RESOLVER_FACTORY_PROPERTY = "node.Resolver";
+    public static final String DEFAULT_DELIMITER = ",";
+    public static final String EXTERNAL_LIBRARY_SEPARATOR = "#";
+    public static final String HDFS_INDEXING_ADAPTER = "hdfs-indexing-adapter";
+    /**
+     * supported builtin record formats
+     */
+    public static final String FORMAT_HIVE = "hive";
+    public static final String FORMAT_BINARY = "binary";
+    public static final String FORMAT_ADM = "adm";
+    public static final String FORMAT_JSON = "json";
+    public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
+    public static final String FORMAT_TWEET = "tweet";
+    public static final String FORMAT_RSS = "rss";
+
+    /**
+     * input streams
+     */
+    public static final String STREAM_HDFS = "hdfs";
+    public static final String STREAM_LOCAL_FILESYSTEM = "localfs";
+    public static final String STREAM_SOCKET = "socket";
+
+    /**
+     * adapter aliases
+     */
+    public static final String ALIAS_GENERIC_ADAPTER = "adapter";
+    public static final String ALIAS_LOCALFS_ADAPTER = "localfs";
+    public static final String ALIAS_HDFS_ADAPTER = "hdfs";
+    public static final String ALIAS_SOCKET_ADAPTER = "socket_adapter";
+    public static final String ALIAS_TWITTER_FIREHOSE_ADAPTER = "twitter_firehose";
+    public static final String ALIAS_SOCKET_CLIENT_ADAPTER = "socket_client";
+    public static final String ALIAS_RSS_ADAPTER = "rss_feed";
+    public static final String ALIAS_FILE_FEED_ADAPTER = "file_feed";
+    public static final String ALIAS_TWITTER_PUSH_ADAPTER = "push_twitter";
+    public static final String ALIAS_TWITTER_PULL_ADAPTER = "pull_twitter";
+    public static final String ALIAS_TWITTER_AZURE_ADAPTER = "azure_twitter";
+    public static final String ALIAS_CNN_ADAPTER = "cnn_feed";
+
+    /**
+     * For backward compatability
+     */
+    public static final String ADAPTER_LOCALFS_CLASSNAME = "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter";
+    public static final String ADAPTER_HDFS_CLASSNAME = "org.apache.asterix.external.dataset.adapter.HDFSAdapter";
+
+    /**
+     * Constant characters
+     */
+    public static final char ESCAPE = '\\';
+    public static final char QUOTE = '"';
+    public static final char SPACE = ' ';
+    public static final char TAB = '\t';
+    public static final char LF = '\n';
+    public static final char CR = '\r';
+    public static final char DEFAULT_RECORD_START = '{';
+    public static final char DEFAULT_RECORD_END = '}';
+
+    /**
+     * Constant byte characters
+     */
+    public static final byte EOL = '\n';
+    public static final byte BYTE_CR = '\r';
+    /**
+     * Size default values
+     */
+    public static final int DEFAULT_BUFFER_SIZE = 4096;
+    public static final int DEFAULT_BUFFER_INCREMENT = 4096;
+
+    /**
+     * Expected parameter values
+     */
+    public static final String PARAMETER_OF_SIZE_ONE = "Value of size 1";
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataExceptionUtils.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataExceptionUtils.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataExceptionUtils.java
new file mode 100644
index 0000000..9dcaef4
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataExceptionUtils.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+public class ExternalDataExceptionUtils {
+    public static final String INCORRECT_PARAMETER = "Incorrect parameter.\n";
+    public static final String MISSING_PARAMETER = "Missing parameter.\n";
+    public static final String PARAMETER_NAME = "Parameter name: ";
+    public static final String EXPECTED_VALUE = "Expected value: ";
+    public static final String PASSED_VALUE = "Passed value: ";
+
+    public static String incorrectParameterMessage(String parameterName, String expectedValue, String passedValue) {
+        return INCORRECT_PARAMETER + PARAMETER_NAME + parameterName + ExternalDataConstants.LF + EXPECTED_VALUE
+                + expectedValue + ExternalDataConstants.LF + PASSED_VALUE + passedValue;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
new file mode 100644
index 0000000..7c1c1b5
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.feeds.FeedConstants;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.library.ExternalLibraryManager;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
+import org.apache.hyracks.dataflow.common.data.parsers.DoubleParserFactory;
+import org.apache.hyracks.dataflow.common.data.parsers.FloatParserFactory;
+import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory;
+import org.apache.hyracks.dataflow.common.data.parsers.LongParserFactory;
+import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
+
+public class ExternalDataUtils {
+
+    // Get a delimiter from the given configuration
+    public static char getDelimiter(Map<String, String> configuration) throws AsterixException {
+        String delimiterValue = configuration.get(ExternalDataConstants.KEY_DELIMITER);
+        if (delimiterValue == null) {
+            delimiterValue = ExternalDataConstants.DEFAULT_DELIMITER;
+        } else if (delimiterValue.length() != 1) {
+            throw new AsterixException(
+                    "'" + delimiterValue + "' is not a valid delimiter. The length of a delimiter should be 1.");
+        }
+        return delimiterValue.charAt(0);
+    }
+
+    // Get a quote from the given configuration when the delimiter is given
+    // Need to pass delimiter to check whether they share the same character
+    public static char getQuote(Map<String, String> configuration, char delimiter) throws AsterixException {
+        String quoteValue = configuration.get(ExternalDataConstants.KEY_QUOTE);
+        if (quoteValue == null) {
+            quoteValue = ExternalDataConstants.DEFAULT_QUOTE;
+        } else if (quoteValue.length() != 1) {
+            throw new AsterixException("'" + quoteValue + "' is not a valid quote. The length of a quote should be 1.");
+        }
+
+        // Since delimiter (char type value) can't be null,
+        // we only check whether delimiter and quote use the same character
+        if (quoteValue.charAt(0) == delimiter) {
+            throw new AsterixException(
+                    "Quote '" + quoteValue + "' cannot be used with the delimiter '" + delimiter + "'. ");
+        }
+
+        return quoteValue.charAt(0);
+    }
+
+    // Get the header flag
+    public static boolean getHasHeader(Map<String, String> configuration) {
+        return Boolean.parseBoolean(configuration.get(ExternalDataConstants.KEY_HEADER));
+    }
+
+    public static DataSourceType getDataSourceType(Map<String, String> configuration) throws AsterixException {
+        if (isDataSourceStreamProvider(configuration)) {
+            return DataSourceType.STREAM;
+        } else if (isDataSourceRecordReader(configuration)) {
+            return DataSourceType.RECORDS;
+        } else {
+            throw new AsterixException(
+                    "unable to determine whether input is a stream provider or a record reader. parameters: "
+                            + ExternalDataConstants.KEY_STREAM + " or " + ExternalDataConstants.KEY_READER
+                            + " must be specified");
+        }
+    }
+
+    public static boolean isExternal(String aString) {
+        return (aString.contains(ExternalDataConstants.EXTERNAL_LIBRARY_SEPARATOR) && aString.trim().length() > 1);
+    }
+
+    public static ClassLoader getClassLoader(String dataverse, String library) {
+        return ExternalLibraryManager.getLibraryClassLoader(dataverse, library);
+    }
+
+    public static String getLibraryName(String aString) {
+        return aString.trim().split(FeedConstants.NamingConstants.LIBRARY_NAME_SEPARATOR)[0];
+    }
+
+    public static String getExternalClassName(String aString) {
+        return aString.trim().split(FeedConstants.NamingConstants.LIBRARY_NAME_SEPARATOR)[1];
+    }
+
+    public static IInputStreamProviderFactory createExternalInputStreamFactory(String dataverse, String stream)
+            throws InstantiationException, IllegalAccessException, ClassNotFoundException {
+        String libraryName = getLibraryName(stream);
+        String className = getExternalClassName(stream);
+        ClassLoader classLoader = getClassLoader(dataverse, libraryName);
+        return ((IInputStreamProviderFactory) (classLoader.loadClass(className).newInstance()));
+    }
+
+    public static String getDataverse(Map<String, String> configuration) {
+        return configuration.get(ExternalDataConstants.KEY_DATAVERSE);
+    }
+
+    public static boolean isDataSourceStreamProvider(Map<String, String> configuration) {
+        return configuration.containsKey(ExternalDataConstants.KEY_STREAM);
+    }
+
+    private static boolean isDataSourceRecordReader(Map<String, String> configuration) {
+        return configuration.containsKey(ExternalDataConstants.KEY_READER);
+    }
+
+    public static String getRecordFormat(Map<String, String> configuration) {
+        String parserFormat = configuration.get(ExternalDataConstants.KEY_DATA_PARSER);
+        return parserFormat != null ? parserFormat : configuration.get(ExternalDataConstants.KEY_FORMAT);
+    }
+
+    private static Map<ATypeTag, IValueParserFactory> valueParserFactoryMap = initializeValueParserFactoryMap();
+
+    private static Map<ATypeTag, IValueParserFactory> initializeValueParserFactoryMap() {
+        Map<ATypeTag, IValueParserFactory> m = new HashMap<ATypeTag, IValueParserFactory>();
+        m.put(ATypeTag.INT32, IntegerParserFactory.INSTANCE);
+        m.put(ATypeTag.FLOAT, FloatParserFactory.INSTANCE);
+        m.put(ATypeTag.DOUBLE, DoubleParserFactory.INSTANCE);
+        m.put(ATypeTag.INT64, LongParserFactory.INSTANCE);
+        m.put(ATypeTag.STRING, UTF8StringParserFactory.INSTANCE);
+        return m;
+    }
+
+    public static IValueParserFactory[] getValueParserFactories(ARecordType recordType) {
+        int n = recordType.getFieldTypes().length;
+        IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
+        for (int i = 0; i < n; i++) {
+            ATypeTag tag = null;
+            if (recordType.getFieldTypes()[i].getTypeTag() == ATypeTag.UNION) {
+                List<IAType> unionTypes = ((AUnionType) recordType.getFieldTypes()[i]).getUnionList();
+                if (unionTypes.size() != 2 && unionTypes.get(0).getTypeTag() != ATypeTag.NULL) {
+                    throw new NotImplementedException("Non-optional UNION type is not supported.");
+                }
+                tag = unionTypes.get(1).getTypeTag();
+            } else {
+                tag = recordType.getFieldTypes()[i].getTypeTag();
+            }
+            if (tag == null) {
+                throw new NotImplementedException("Failed to get the type information for field " + i + ".");
+            }
+            IValueParserFactory vpf = valueParserFactoryMap.get(tag);
+            if (vpf == null) {
+                throw new NotImplementedException("No value parser factory for delimited fields of type " + tag);
+            }
+            fieldParserFactories[i] = vpf;
+        }
+        return fieldParserFactories;
+    }
+
+    public static String getRecordReaderStreamName(Map<String, String> configuration) {
+        return configuration.get(ExternalDataConstants.KEY_READER_STREAM);
+    }
+
+    public static boolean hasHeader(Map<String, String> configuration) {
+        String value = configuration.get(ExternalDataConstants.KEY_HEADER);
+        if (value != null) {
+            return Boolean.valueOf(value);
+        }
+        return false;
+    }
+
+    public static boolean isPull(Map<String, String> configuration) {
+        String pull = configuration.get(ExternalDataConstants.KEY_PULL);
+        if (pull == null) {
+            return false;
+        }
+        return Boolean.parseBoolean(pull);
+    }
+
+    public static boolean isPush(Map<String, String> configuration) {
+        String push = configuration.get(ExternalDataConstants.KEY_PUSH);
+        if (push == null) {
+            return false;
+        }
+        return Boolean.parseBoolean(push);
+    }
+
+    public static IRecordReaderFactory<?> createExternalRecordReaderFactory(String dataverse, String reader)
+            throws InstantiationException, IllegalAccessException, ClassNotFoundException {
+        String library = reader.substring(0, reader.indexOf(ExternalDataConstants.EXTERNAL_LIBRARY_SEPARATOR));
+        ClassLoader classLoader = ExternalLibraryManager.getLibraryClassLoader(dataverse, library);
+        return (IRecordReaderFactory<?>) classLoader
+                .loadClass(reader.substring(reader.indexOf(ExternalDataConstants.EXTERNAL_LIBRARY_SEPARATOR) + 1))
+                .newInstance();
+    }
+
+    public static IDataParserFactory createExternalParserFactory(String dataverse, String parserFactoryName)
+            throws InstantiationException, IllegalAccessException, ClassNotFoundException {
+        String library = parserFactoryName.substring(0,
+                parserFactoryName.indexOf(ExternalDataConstants.EXTERNAL_LIBRARY_SEPARATOR));
+        ClassLoader classLoader = ExternalLibraryManager.getLibraryClassLoader(dataverse, library);
+        return (IDataParserFactory) classLoader
+                .loadClass(parserFactoryName
+                        .substring(parserFactoryName.indexOf(ExternalDataConstants.EXTERNAL_LIBRARY_SEPARATOR) + 1))
+                .newInstance();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
new file mode 100644
index 0000000..de6737a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.indexing.IndexingScheduler;
+import org.apache.asterix.external.indexing.RecordId.RecordIdType;
+import org.apache.asterix.external.input.stream.HDFSInputStreamProvider;
+import org.apache.asterix.om.util.AsterixAppContextInfo;
+import org.apache.asterix.om.util.AsterixClusterProperties;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.ICCContext;
+import org.apache.hyracks.api.exceptions.HyracksException;
+import org.apache.hyracks.hdfs.scheduler.Scheduler;
+
+public class HDFSUtils {
+
+    public static Scheduler initializeHDFSScheduler() {
+        ICCContext ccContext = AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext();
+        Scheduler scheduler = null;
+        try {
+            scheduler = new Scheduler(ccContext.getClusterControllerInfo().getClientNetAddress(),
+                    ccContext.getClusterControllerInfo().getClientNetPort());
+        } catch (HyracksException e) {
+            throw new IllegalStateException("Cannot obtain hdfs scheduler");
+        }
+        return scheduler;
+    }
+
+    public static IndexingScheduler initializeIndexingHDFSScheduler() {
+        ICCContext ccContext = AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext();
+        IndexingScheduler scheduler = null;
+        try {
+            scheduler = new IndexingScheduler(ccContext.getClusterControllerInfo().getClientNetAddress(),
+                    ccContext.getClusterControllerInfo().getClientNetPort());
+        } catch (HyracksException e) {
+            throw new IllegalStateException("Cannot obtain hdfs scheduler");
+        }
+        return scheduler;
+    }
+
+    /**
+     * Instead of creating the split using the input format, we do it manually
+     * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
+     * and the produced splits only cover intersection between current files in hdfs and files stored internally
+     * in AsterixDB
+     * 1. NoOp means appended file
+     * 2. AddOp means new file
+     * 3. UpdateOp means the delta of a file
+     * @return
+     * @throws IOException
+     */
+    public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
+        // Create file system object
+        FileSystem fs = FileSystem.get(conf);
+        ArrayList<FileSplit> fileSplits = new ArrayList<FileSplit>();
+        ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<ExternalFile>();
+        // Create files splits
+        for (ExternalFile file : files) {
+            Path filePath = new Path(file.getFileName());
+            FileStatus fileStatus;
+            try {
+                fileStatus = fs.getFileStatus(filePath);
+            } catch (FileNotFoundException e) {
+                // file was deleted at some point, skip to next file
+                continue;
+            }
+            if (file.getPendingOp() == ExternalFilePendingOp.PENDING_ADD_OP
+                    && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
+                // Get its information from HDFS name node
+                BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
+                // Create a split per block
+                for (BlockLocation block : fileBlocks) {
+                    if (block.getOffset() < file.getSize()) {
+                        fileSplits
+                                .add(new FileSplit(filePath,
+                                        block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize()
+                                                ? block.getLength() : (file.getSize() - block.getOffset()),
+                                block.getHosts()));
+                        orderedExternalFiles.add(file);
+                    }
+                }
+            } else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP
+                    && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
+                long oldSize = 0L;
+                long newSize = file.getSize();
+                for (int i = 0; i < files.size(); i++) {
+                    if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) {
+                        newSize = files.get(i).getSize();
+                        oldSize = file.getSize();
+                        break;
+                    }
+                }
+
+                // Get its information from HDFS name node
+                BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
+                // Create a split per block
+                for (BlockLocation block : fileBlocks) {
+                    if (block.getOffset() + block.getLength() > oldSize) {
+                        if (block.getOffset() < newSize) {
+                            // Block interact with delta -> Create a split
+                            long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
+                            long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L
+                                    : block.getOffset() + block.getLength() - newSize;
+                            long splitLength = block.getLength() - startCut - endCut;
+                            fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength,
+                                    block.getHosts()));
+                            orderedExternalFiles.add(file);
+                        }
+                    }
+                }
+            }
+        }
+        fs.close();
+        files.clear();
+        files.addAll(orderedExternalFiles);
+        return fileSplits.toArray(new FileSplit[fileSplits.size()]);
+    }
+
+    public static String getInputFormatClassName(Map<String, String> configuration) {
+        String inputFormatParameter = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim();
+        switch (inputFormatParameter) {
+            case ExternalDataConstants.INPUT_FORMAT_TEXT:
+                return ExternalDataConstants.CLASS_NAME_TEXT_INPUT_FORMAT;
+            case ExternalDataConstants.INPUT_FORMAT_SEQUENCE:
+                return ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT;
+            case ExternalDataConstants.INPUT_FORMAT_RC:
+                return ExternalDataConstants.CLASS_NAME_RC_INPUT_FORMAT;
+            default:
+                return inputFormatParameter;
+        }
+    }
+
+    public static Class<?> getInputFormatClass(Map<String, String> configuration) throws ClassNotFoundException {
+        String inputFormatParameter = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim();
+        switch (inputFormatParameter) {
+            case ExternalDataConstants.INPUT_FORMAT_TEXT:
+                return TextInputFormat.class;
+            case ExternalDataConstants.INPUT_FORMAT_SEQUENCE:
+                return SequenceFileInputFormat.class;
+            case ExternalDataConstants.INPUT_FORMAT_RC:
+                return RCFileInputFormat.class;
+            default:
+                return Class.forName(inputFormatParameter);
+        }
+    }
+
+    public static JobConf configureHDFSJobConf(Map<String, String> configuration) throws Exception {
+        JobConf conf = new JobConf();
+
+        String localShortCircuitSocketPath = configuration.get(ExternalDataConstants.KEY_LOCAL_SOCKET_PATH);
+        String formatClassName = HDFSUtils.getInputFormatClassName(configuration);
+        conf.set(ExternalDataConstants.KEY_HADOOP_FILESYSTEM_URI,
+                configuration.get(ExternalDataConstants.KEY_HDFS_URL).trim());
+        conf.set(ExternalDataConstants.KEY_HADOOP_FILESYSTEM_CLASS, ExternalDataConstants.CLASS_NAME_HDFS_FILESYSTEM);
+        conf.setClassLoader(HDFSInputStreamProvider.class.getClassLoader());
+        conf.set(ExternalDataConstants.KEY_HADOOP_INPUT_DIR, configuration.get(ExternalDataConstants.KEY_PATH).trim());
+        conf.set(ExternalDataConstants.KEY_HADOOP_INPUT_FORMAT, formatClassName);
+
+        // Enable local short circuit reads if user supplied the parameters
+        if (localShortCircuitSocketPath != null) {
+            conf.set(ExternalDataConstants.KEY_HADOOP_SHORT_CIRCUIT, "true");
+            conf.set(ExternalDataConstants.KEY_HADOOP_SOCKET_PATH, localShortCircuitSocketPath.trim());
+        }
+        return conf;
+    }
+
+    public static AlgebricksPartitionConstraint getPartitionConstraints(
+            AlgebricksPartitionConstraint clusterLocations) {
+        if (clusterLocations == null) {
+            ArrayList<String> locs = new ArrayList<String>();
+            Map<String, String[]> stores = AsterixAppContextInfo.getInstance().getMetadataProperties().getStores();
+            for (String i : stores.keySet()) {
+                int numIODevices = AsterixClusterProperties.INSTANCE.getNumberOfIODevices(i);
+                for (int k = 0; k < numIODevices; k++) {
+                    locs.add(i);
+                }
+            }
+            String[] cluster = new String[locs.size()];
+            cluster = locs.toArray(cluster);
+            clusterLocations = new AlgebricksAbsolutePartitionConstraint(cluster);
+        }
+        return clusterLocations;
+    }
+
+    public static RecordIdType getRecordIdType(Map<String, String> configuration) {
+        String inputFormatParameter = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim();
+        switch (inputFormatParameter) {
+            case ExternalDataConstants.INPUT_FORMAT_TEXT:
+            case ExternalDataConstants.INPUT_FORMAT_SEQUENCE:
+                return RecordIdType.OFFSET;
+            case ExternalDataConstants.INPUT_FORMAT_RC:
+                return RecordIdType.RC;
+            default:
+                return null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolver.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolver.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolver.java
deleted file mode 100644
index 3a92b97..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolver.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-
-/**
- * A policy for resolving a name to a node controller id.
- */
-public interface INodeResolver {
-
-    /**
-     * Resolve a passed-in value to a node controller id.
-     * 
-     * @param value
-     *            string to be resolved
-     * @return resolved result (a node controller id)
-     * @throws AsterixException
-     */
-    public String resolveNode(String value) throws AsterixException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolverFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolverFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolverFactory.java
deleted file mode 100644
index b3c459b..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/INodeResolverFactory.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-/**
- * Factory for creating an instance of INodeResolver
- *
- * @see INodeResolver
- */
-public interface INodeResolverFactory {
-
-    /**
-     * Create an instance of {@link INodeResolver}
-     * 
-     * @return an instance of INodeResolver
-     */
-    public INodeResolver createNodeResolver();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentitiyResolverFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentitiyResolverFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentitiyResolverFactory.java
index 776061f..582189a 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentitiyResolverFactory.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentitiyResolverFactory.java
@@ -18,6 +18,9 @@
  */
 package org.apache.asterix.external.util;
 
+import org.apache.asterix.external.api.INodeResolver;
+import org.apache.asterix.external.api.INodeResolverFactory;
+
 /**
  * Factory for creating an instance of @see {IdentityResolver}.
  * Identity resolver simply resolves a value to itself and is useful when value being resolved

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentityResolver.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentityResolver.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentityResolver.java
index 2b792b2..bda5f1e 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentityResolver.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/IdentityResolver.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.util;
 
+import org.apache.asterix.external.api.INodeResolver;
+
 /**
  * Identity resolver simply resolves a value to itself and is useful when value being resolved
  * is a node controller id.

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/util/NodeResolver.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/util/NodeResolver.java b/asterix-external-data/src/main/java/org/apache/asterix/external/util/NodeResolver.java
new file mode 100644
index 0000000..61764d7
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/util/NodeResolver.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.INodeResolver;
+import org.apache.asterix.om.util.AsterixRuntimeUtil;
+
+/**
+ * Resolves a value (DNS/IP Address) or a (Node Controller Id) to the id of a Node Controller running at the location.
+ */
+public class NodeResolver implements INodeResolver {
+    //TODO: change this call and replace by calling AsterixClusterProperties
+    private static final Random random = new Random();
+    private static final Map<InetAddress, Set<String>> ncMap = new HashMap<InetAddress, Set<String>>();
+    private static final Set<String> ncs = new HashSet<String>();
+
+    @Override
+    public String resolveNode(String value) throws AsterixException {
+        UnknownHostException uhe = null;
+        try {
+            if (ncMap.isEmpty()) {
+                NodeResolver.updateNCs();
+            }
+            InetAddress ipAddress = null;
+            try {
+                ipAddress = InetAddress.getByName(value);
+            } catch (UnknownHostException e) {
+                uhe = e;
+            }
+            if (ipAddress == null) {
+                if (ncs.contains(value)) {
+                    return value;
+                } else {
+                    NodeResolver.updateNCs();
+                    if (ncs.contains(value)) {
+                        return value;
+                    } else {
+                        throw new AsterixException("address passed: '" + value
+                                + "' couldn't be resolved to an ip address and is not an NC id. Existing NCs are "
+                                + ncs.toString(), uhe);
+                    }
+                }
+
+            }
+            Set<String> nodeControllers = ncMap.get(ipAddress);
+            if (nodeControllers == null || nodeControllers.isEmpty()) {
+                throw new AsterixException(" No node controllers found at the address: " + value);
+            }
+            String chosenNCId = nodeControllers.toArray(new String[] {})[random.nextInt(nodeControllers.size())];
+            return chosenNCId;
+        } catch (UnknownHostException e) {
+            throw new AsterixException("Unable to resolve hostname '" + value + "' to an IP address");
+        } catch (AsterixException ae) {
+            throw ae;
+        } catch (Exception e) {
+            throw new AsterixException(e);
+        }
+    }
+
+    private static void updateNCs() throws Exception {
+        synchronized (ncMap) {
+            ncMap.clear();
+            AsterixRuntimeUtil.getNodeControllerMap(ncMap);
+            synchronized (ncs) {
+                ncs.clear();
+                for (Entry<InetAddress, Set<String>> entry : ncMap.entrySet()) {
+                    ncs.addAll(entry.getValue());
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/resources/adm.grammar
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/resources/adm.grammar b/asterix-external-data/src/main/resources/adm.grammar
new file mode 100644
index 0000000..1910436
--- /dev/null
+++ b/asterix-external-data/src/main/resources/adm.grammar
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# LEXER GENERATOR configuration file
+# ---------------------------------------
+# Place *first* the generic configuration
+# then list your grammar.
+
+PACKAGE:          org.apache.asterix.runtime.operators.file.adm
+LEXER_NAME:       AdmLexer
+
+TOKENS:
+
+BOOLEAN_CONS   = string(boolean)
+INT8_CONS      = string(int8)
+INT16_CONS     = string(int16)
+INT32_CONS     = string(int32)
+INT64_CONS     = string(int64)
+INT64_CONS     = string(int)
+FLOAT_CONS     = string(float)
+DOUBLE_CONS    = string(double)
+DATE_CONS      = string(date)
+DATETIME_CONS  = string(datetime)
+DURATION_CONS  = string(duration)
+STRING_CONS    = string(string)
+HEX_CONS       = string(hex)
+BASE64_CONS    = string(base64)
+POINT_CONS     = string(point)
+POINT3D_CONS   = string(point3d)
+LINE_CONS      = string(line)
+POLYGON_CONS   = string(polygon)
+RECTANGLE_CONS = string(rectangle)
+CIRCLE_CONS    = string(circle)
+TIME_CONS      = string(time)
+INTERVAL_TIME_CONS      = string(interval-time)
+INTERVAL_DATE_CONS      = string(interval-date)
+INTERVAL_DATETIME_CONS  = string(interval-datetime)
+YEAR_MONTH_DURATION_CONS = string(year-month-duration)
+DAY_TIME_DURATION_CONS   = string(day-time-duration)
+UUID_CONS      = string(uuid)
+
+NULL_LITERAL   = string(null)
+TRUE_LITERAL   = string(true)
+FALSE_LITERAL  = string(false)
+
+CONSTRUCTOR_OPEN     = char(()
+CONSTRUCTOR_CLOSE    = char())
+START_RECORD         = char({)
+END_RECORD           = char(})
+COMMA                = char(\,)
+COLON                = char(:)
+START_ORDERED_LIST   = char([)
+END_ORDERED_LIST     = char(])
+START_UNORDERED_LIST = string({{)
+# END_UNORDERED_LIST  = }} is recognized as a double END_RECORD token
+
+STRING_LITERAL       = char("), anythingUntil(")
+
+INT_LITERAL          = signOrNothing(), digitSequence()
+INT8_LITERAL         = token(INT_LITERAL), string(i8)
+INT16_LITERAL        = token(INT_LITERAL), string(i16)
+INT32_LITERAL        = token(INT_LITERAL), string(i32)
+INT64_LITERAL        = token(INT_LITERAL), string(i64)
+
+@EXPONENT            = caseInsensitiveChar(e), signOrNothing(), digitSequence()
+
+DOUBLE_LITERAL		 = signOrNothing(), char(.), digitSequence()
+DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), char(.), digitSequence()
+DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), char(.), digitSequence(), token(@EXPONENT)
+DOUBLE_LITERAL		 = signOrNothing(), digitSequence(), token(@EXPONENT)
+
+FLOAT_LITERAL		 = token(DOUBLE_LITERAL), caseInsensitiveChar(f)

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
index 2c1f02a..db693a1 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalScalarFunction;
-import org.apache.asterix.external.library.IFunctionFactory;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class AddHashTagsFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
index bca508f..db717e6 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
@@ -23,6 +23,8 @@ import org.apache.asterix.external.library.java.JObjects.JPoint;
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
 import org.apache.asterix.external.library.java.JObjects.JUnorderedList;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 import org.apache.asterix.external.util.Datatypes;
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
index aec9e5d..a13da84 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class AddHashTagsInPlaceFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
index 2765225..399f0f9 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
@@ -21,6 +21,8 @@ package org.apache.asterix.external.library;
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
 import org.apache.asterix.external.library.java.JObjects.JUnorderedList;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 import org.apache.asterix.external.util.Datatypes;
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFactory.java
index dc0ab7a..9050462 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalFunction;
-import org.apache.asterix.external.library.IFunctionFactory;
+import org.apache.asterix.external.api.IExternalFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class AllTypesFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFunction.java
index 12ce871..8f65bee 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/AllTypesFunction.java
@@ -35,6 +35,8 @@ import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
 import org.apache.asterix.external.library.java.JObjects.JTime;
 import org.apache.asterix.external.library.java.JObjects.JUnorderedList;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 
 public class AllTypesFunction implements IExternalScalarFunction {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFactory.java
index 7d1d3da..e15cb3d 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalScalarFunction;
-import org.apache.asterix.external.library.IFunctionFactory;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class CapitalFinderFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFunction.java
index 21467af..969e109 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/CapitalFinderFunction.java
@@ -23,6 +23,8 @@ import java.util.Properties;
 
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 
 public class CapitalFinderFunction implements IExternalScalarFunction {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFactory.java
index 21ad776..5d8126b 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFactory.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class EchoDelayFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFunction.java
index e564ca0..c115ac4 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/EchoDelayFunction.java
@@ -20,6 +20,8 @@ package org.apache.asterix.external.library;
 
 import java.util.Random;
 
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 
 public class EchoDelayFunction implements IExternalScalarFunction {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFactory.java
index db3a5fa..5515ebd 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalScalarFunction;
-import org.apache.asterix.external.library.IFunctionFactory;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class ParseTweetFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFunction.java
index caa0544..b9c736a 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/ParseTweetFunction.java
@@ -21,6 +21,8 @@ package org.apache.asterix.external.library;
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
 import org.apache.asterix.external.library.java.JObjects.JUnorderedList;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 
 public class ParseTweetFunction implements IExternalScalarFunction {



[05/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapterFactory.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapterFactory.java
deleted file mode 100644
index 2930662..0000000
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapterFactory.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.metadata.declared;
-
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-
-/**
- *
- * @author alamouda
- * This wrapper adapter factory is an adapter that is used when loading data into a dataset with a UUID primary key. The factory creates an adapter that
- * generates UUIDs and merge them into parsed records
- */
-public class PKGeneratingAdapterFactory implements IAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-    private final IAdapterFactory wrappedAdapterFactory;
-    private final RecordDescriptor inRecDesc;
-    private final RecordDescriptor outRecDesc;
-    private final ARecordType inRecType;
-    private final ARecordType outRecType;
-    private final int pkIndex;
-
-    public PKGeneratingAdapterFactory(IAdapterFactory wrappedAdapterFactory, RecordDescriptor inRecDesc,
-            RecordDescriptor outRecDesc, ARecordType inRecType, ARecordType outRecType, int pkIndex) {
-        this.wrappedAdapterFactory = wrappedAdapterFactory;
-        this.inRecDesc = inRecDesc;
-        this.outRecDesc = outRecDesc;
-        this.inRecType = inRecType;
-        this.outRecType = outRecType;
-        this.pkIndex = pkIndex;
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return wrappedAdapterFactory.getSupportedOperations();
-    }
-
-    @Override
-    public String getName() {
-        return "PKGeneratingAdapter[ " + wrappedAdapterFactory.getName() + " ]";
-    }
-
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return wrappedAdapterFactory.getPartitionConstraint();
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        IDatasourceAdapter wrappedAdapter = wrappedAdapterFactory.createAdapter(ctx, partition);
-        return new PKGeneratingAdapter(ctx, inRecDesc, outRecDesc, inRecType, outRecType, wrappedAdapter, pkIndex);
-    }
-
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        wrappedAdapterFactory.configure(configuration, outputType);        
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return wrappedAdapterFactory.getAdapterOutputType();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractDatasourceAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractDatasourceAdapter.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractDatasourceAdapter.java
index 7f6a567..d65468e 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractDatasourceAdapter.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractDatasourceAdapter.java
@@ -20,7 +20,7 @@ package org.apache.asterix.metadata.feeds;
 
 import java.util.Map;
 
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 import org.apache.asterix.om.types.IAType;
 import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -29,7 +29,7 @@ import org.apache.hyracks.api.context.IHyracksTaskContext;
  * Represents the base class that is required to be extended by every
  * implementation of the IDatasourceAdapter interface.
  */
-public abstract class AbstractDatasourceAdapter implements IDatasourceAdapter {
+public abstract class AbstractDatasourceAdapter implements IDataSourceAdapter {
 
     private static final long serialVersionUID = 1L;
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractFeedDatasourceAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractFeedDatasourceAdapter.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractFeedDatasourceAdapter.java
index a87d2fb..c231ad9 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractFeedDatasourceAdapter.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AbstractFeedDatasourceAdapter.java
@@ -18,11 +18,11 @@
  */
 package org.apache.asterix.metadata.feeds;
 
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 import org.apache.asterix.external.feeds.FeedPolicyEnforcer;
 
 
-public abstract class AbstractFeedDatasourceAdapter implements IDatasourceAdapter {
+public abstract class AbstractFeedDatasourceAdapter implements IDataSourceAdapter {
 
     private static final long serialVersionUID = 1L;
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterExecutor.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterExecutor.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterExecutor.java
index 604ef79..6c2f14c 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterExecutor.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterExecutor.java
@@ -24,7 +24,7 @@ import java.util.logging.Logger;
 import org.apache.asterix.common.feeds.DistributeFeedFrameWriter;
 import org.apache.asterix.common.feeds.api.IAdapterRuntimeManager;
 import org.apache.asterix.common.feeds.api.IAdapterRuntimeManager.State;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 
 public class AdapterExecutor implements Runnable {
 
@@ -32,11 +32,11 @@ public class AdapterExecutor implements Runnable {
 
     private final DistributeFeedFrameWriter writer;
 
-    private final IFeedAdapter adapter;
+    private final IDataSourceAdapter adapter;
 
     private final IAdapterRuntimeManager adapterManager;
 
-    public AdapterExecutor(int partition, DistributeFeedFrameWriter writer, IFeedAdapter adapter,
+    public AdapterExecutor(int partition, DistributeFeedFrameWriter writer, IDataSourceAdapter adapter,
             IAdapterRuntimeManager adapterManager) {
         this.writer = writer;
         this.adapter = adapter;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterRuntimeManager.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterRuntimeManager.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterRuntimeManager.java
index 0e8ce1a..aacb3da 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterRuntimeManager.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/AdapterRuntimeManager.java
@@ -27,7 +27,7 @@ import org.apache.asterix.common.feeds.DistributeFeedFrameWriter;
 import org.apache.asterix.common.feeds.FeedId;
 import org.apache.asterix.common.feeds.IngestionRuntime;
 import org.apache.asterix.common.feeds.api.IAdapterRuntimeManager;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
 
 public class AdapterRuntimeManager implements IAdapterRuntimeManager {
@@ -36,7 +36,7 @@ public class AdapterRuntimeManager implements IAdapterRuntimeManager {
 
     private final FeedId feedId;
 
-    private final IFeedAdapter feedAdapter;
+    private final IDataSourceAdapter feedAdapter;
 
     private final IIntakeProgressTracker tracker;
 
@@ -50,7 +50,7 @@ public class AdapterRuntimeManager implements IAdapterRuntimeManager {
 
     private State state;
 
-    public AdapterRuntimeManager(FeedId feedId, IFeedAdapter feedAdapter, IIntakeProgressTracker tracker,
+    public AdapterRuntimeManager(FeedId feedId, IDataSourceAdapter feedAdapter, IIntakeProgressTracker tracker,
             DistributeFeedFrameWriter writer, int partition) {
         this.feedId = feedId;
         this.feedAdapter = feedAdapter;
@@ -91,7 +91,8 @@ public class AdapterRuntimeManager implements IAdapterRuntimeManager {
         return feedId + "[" + partition + "]";
     }
 
-    public IFeedAdapter getFeedAdapter() {
+    @Override
+    public IDataSourceAdapter getFeedAdapter() {
         return feedAdapter;
     }
 
@@ -99,10 +100,12 @@ public class AdapterRuntimeManager implements IAdapterRuntimeManager {
         return tracker;
     }
 
+    @Override
     public synchronized State getState() {
         return state;
     }
 
+    @Override
     public synchronized void setState(State state) {
         this.state = state;
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ExternalDataScanOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ExternalDataScanOperatorDescriptor.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ExternalDataScanOperatorDescriptor.java
index ba985bc..fee99d8 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ExternalDataScanOperatorDescriptor.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ExternalDataScanOperatorDescriptor.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.metadata.feeds;
 
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
 import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
@@ -55,7 +55,7 @@ public class ExternalDataScanOperatorDescriptor extends AbstractSingleActivityOp
 
             @Override
             public void initialize() throws HyracksDataException {
-                IDatasourceAdapter adapter = null;
+                IDataSourceAdapter adapter = null;
                 try {
                     writer.open();
                     adapter = adapterFactory.createAdapter(ctx, partition);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorDescriptor.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorDescriptor.java
index 486f45b..54c9af5 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorDescriptor.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorDescriptor.java
@@ -27,9 +27,9 @@ import org.apache.asterix.common.feeds.FeedPolicyAccessor;
 import org.apache.asterix.common.feeds.IngestionRuntime;
 import org.apache.asterix.common.feeds.SubscribableFeedRuntimeId;
 import org.apache.asterix.common.feeds.api.IFeedRuntime.FeedRuntimeType;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
-import org.apache.asterix.external.library.ExternalLibraryManager;
 import org.apache.asterix.common.feeds.api.IFeedSubscriptionManager;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.external.library.ExternalLibraryManager;
 import org.apache.asterix.metadata.entities.PrimaryFeed;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -55,7 +55,7 @@ public class FeedIntakeOperatorDescriptor extends AbstractSingleActivityOperator
     private final FeedPolicyAccessor policyAccessor;
 
     /** The adaptor factory that is used to create an instance of the feed adaptor **/
-    private IFeedAdapterFactory adaptorFactory;
+    private IAdapterFactory adaptorFactory;
 
     /** The library that contains the adapter in use. **/
     private String adaptorLibraryName;
@@ -71,8 +71,8 @@ public class FeedIntakeOperatorDescriptor extends AbstractSingleActivityOperator
 
     private ARecordType adapterOutputType;
 
-    public FeedIntakeOperatorDescriptor(JobSpecification spec, PrimaryFeed primaryFeed,
-            IFeedAdapterFactory adapterFactory, ARecordType adapterOutputType, FeedPolicyAccessor policyAccessor) {
+    public FeedIntakeOperatorDescriptor(JobSpecification spec, PrimaryFeed primaryFeed, IAdapterFactory adapterFactory,
+            ARecordType adapterOutputType, FeedPolicyAccessor policyAccessor) {
         super(spec, 0, 1);
         this.feedId = new FeedId(primaryFeed.getDataverseName(), primaryFeed.getFeedName());
         this.adaptorFactory = adapterFactory;
@@ -113,16 +113,16 @@ public class FeedIntakeOperatorDescriptor extends AbstractSingleActivityOperator
                 policyAccessor);
     }
 
-    private IFeedAdapterFactory createExtenralAdapterFactory(IHyracksTaskContext ctx, int partition) throws Exception {
-        IFeedAdapterFactory adapterFactory = null;
+    private IAdapterFactory createExtenralAdapterFactory(IHyracksTaskContext ctx, int partition) throws Exception {
+        IAdapterFactory adapterFactory = null;
         ClassLoader classLoader = ExternalLibraryManager.getLibraryClassLoader(feedId.getDataverse(),
                 adaptorLibraryName);
         if (classLoader != null) {
-            adapterFactory = ((IFeedAdapterFactory) (classLoader.loadClass(adaptorFactoryClassName).newInstance()));
+            adapterFactory = ((IAdapterFactory) (classLoader.loadClass(adaptorFactoryClassName).newInstance()));
             adapterFactory.configure(adaptorConfiguration, adapterOutputType);
         } else {
-            String message = "Unable to create adapter as class loader not configured for library "
-                    + adaptorLibraryName + " in dataverse " + feedId.getDataverse();
+            String message = "Unable to create adapter as class loader not configured for library " + adaptorLibraryName
+                    + " in dataverse " + feedId.getDataverse();
             LOGGER.severe(message);
             throw new IllegalArgumentException(message);
         }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorNodePushable.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorNodePushable.java
index 404d37c..5085087 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorNodePushable.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedIntakeOperatorNodePushable.java
@@ -33,13 +33,13 @@ import org.apache.asterix.common.feeds.IngestionRuntime;
 import org.apache.asterix.common.feeds.SubscribableFeedRuntimeId;
 import org.apache.asterix.common.feeds.api.IAdapterRuntimeManager;
 import org.apache.asterix.common.feeds.api.IAdapterRuntimeManager.State;
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
 import org.apache.asterix.common.feeds.api.IFeedManager;
 import org.apache.asterix.common.feeds.api.IFeedRuntime.FeedRuntimeType;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
 import org.apache.asterix.common.feeds.api.IFeedSubscriptionManager;
 import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
 import org.apache.asterix.common.feeds.api.ISubscriberRuntime;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
@@ -59,14 +59,14 @@ public class FeedIntakeOperatorNodePushable extends AbstractUnaryOutputSourceOpe
     private final IFeedSubscriptionManager feedSubscriptionManager;
     private final IFeedManager feedManager;
     private final IHyracksTaskContext ctx;
-    private final IFeedAdapterFactory adapterFactory;
+    private final IAdapterFactory adapterFactory;
 
     private IngestionRuntime ingestionRuntime;
-    private IFeedAdapter adapter;
+    private IDataSourceAdapter adapter;
     private IIntakeProgressTracker tracker;
     private DistributeFeedFrameWriter feedFrameWriter;
 
-    public FeedIntakeOperatorNodePushable(IHyracksTaskContext ctx, FeedId feedId, IFeedAdapterFactory adapterFactory,
+    public FeedIntakeOperatorNodePushable(IHyracksTaskContext ctx, FeedId feedId, IAdapterFactory adapterFactory,
             int partition, IngestionRuntime ingestionRuntime, FeedPolicyAccessor policyAccessor) {
         this.ctx = ctx;
         this.feedId = feedId;
@@ -85,12 +85,13 @@ public class FeedIntakeOperatorNodePushable extends AbstractUnaryOutputSourceOpe
         try {
             if (ingestionRuntime == null) {
                 try {
-                    adapter = (IFeedAdapter) adapterFactory.createAdapter(ctx, partition);
-                    if (adapterFactory.isRecordTrackingEnabled()) {
-                        tracker = adapterFactory.createIntakeProgressTracker();
-                    }
+                    adapter = adapterFactory.createAdapter(ctx, partition);
+                    //TODO: Fix record tracking
+                    //                    if (adapterFactory.isRecordTrackingEnabled()) {
+                    //                        tracker = adapterFactory.createIntakeProgressTracker();
+                    //                    }
                 } catch (Exception e) {
-                    LOGGER.severe("Unable to create adapter : " + adapterFactory.getName() + "[" + partition + "]"
+                    LOGGER.severe("Unable to create adapter : " + adapterFactory.getAlias() + "[" + partition + "]"
                             + " Exception " + e);
                     throw new HyracksDataException(e);
                 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedUtil.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedUtil.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedUtil.java
index 72b7c15..5ed2876 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedUtil.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/FeedUtil.java
@@ -38,14 +38,14 @@ import org.apache.asterix.common.feeds.FeedPolicyAccessor;
 import org.apache.asterix.common.feeds.FeedRuntimeId;
 import org.apache.asterix.common.feeds.api.IFeedRuntime.FeedRuntimeType;
 import org.apache.asterix.common.functions.FunctionSignature;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.external.library.ExternalLibraryManager;
+import org.apache.asterix.external.provider.AdapterFactoryProvider;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.metadata.MetadataException;
 import org.apache.asterix.metadata.MetadataManager;
 import org.apache.asterix.metadata.MetadataTransactionContext;
 import org.apache.asterix.metadata.bootstrap.MetadataConstants;
-import org.apache.asterix.metadata.declared.AqlMetadataProvider;
 import org.apache.asterix.metadata.entities.Dataset;
 import org.apache.asterix.metadata.entities.DatasourceAdapter;
 import org.apache.asterix.metadata.entities.DatasourceAdapter.AdapterType;
@@ -466,18 +466,21 @@ public class FeedUtil {
         return preProcessingRequired;
     }
 
-    public static Triple<IFeedAdapterFactory, ARecordType, AdapterType> getPrimaryFeedFactoryAndOutput(PrimaryFeed feed,
+    public static Triple<IAdapterFactory, ARecordType, AdapterType> getPrimaryFeedFactoryAndOutput(PrimaryFeed feed,
             FeedPolicyAccessor policyAccessor, MetadataTransactionContext mdTxnCtx) throws AlgebricksException {
 
         String adapterName = null;
         DatasourceAdapter adapterEntity = null;
         String adapterFactoryClassname = null;
-        IFeedAdapterFactory adapterFactory = null;
+        IAdapterFactory adapterFactory = null;
         ARecordType adapterOutputType = null;
-        Triple<IFeedAdapterFactory, ARecordType, AdapterType> feedProps = null;
+        Triple<IAdapterFactory, ARecordType, AdapterType> feedProps = null;
         AdapterType adapterType = null;
         try {
             adapterName = feed.getAdaptorName();
+            Map<String, String> configuration = feed.getAdaptorConfiguration();
+            configuration.putAll(policyAccessor.getFeedPolicy());
+            adapterOutputType = getOutputType(feed, configuration);
             adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME,
                     adapterName);
             if (adapterEntity == null) {
@@ -488,30 +491,24 @@ public class FeedUtil {
                 adapterFactoryClassname = adapterEntity.getClassname();
                 switch (adapterType) {
                     case INTERNAL:
-                        adapterFactory = (IFeedAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+                        adapterFactory = (IAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
                         break;
                     case EXTERNAL:
                         String[] anameComponents = adapterName.split("#");
                         String libraryName = anameComponents[0];
                         ClassLoader cl = ExternalLibraryManager.getLibraryClassLoader(feed.getDataverseName(),
                                 libraryName);
-                        adapterFactory = (IFeedAdapterFactory) cl.loadClass(adapterFactoryClassname).newInstance();
+                        adapterFactory = (IAdapterFactory) cl.loadClass(adapterFactoryClassname).newInstance();
                         break;
                 }
+                adapterFactory.configure(configuration, adapterOutputType);
             } else {
-                adapterFactoryClassname = AqlMetadataProvider.adapterFactoryMapping.get(adapterName);
-                if (adapterFactoryClassname == null) {
-                    adapterFactoryClassname = adapterName;
-                }
-                adapterFactory = (IFeedAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+                configuration.put(ExternalDataConstants.KEY_DATAVERSE, feed.getDataverseName());
+                adapterFactory = AdapterFactoryProvider.getAdapterFactory(adapterName, configuration,
+                        adapterOutputType);
                 adapterType = AdapterType.INTERNAL;
             }
-
-            Map<String, String> configuration = feed.getAdaptorConfiguration();
-            configuration.putAll(policyAccessor.getFeedPolicy());
-            adapterOutputType = getOutputType(feed, configuration);
-            adapterFactory.configure(configuration, adapterOutputType);
-            feedProps = new Triple<IFeedAdapterFactory, ARecordType, AdapterType>(adapterFactory, adapterOutputType,
+            feedProps = new Triple<IAdapterFactory, ARecordType, AdapterType>(adapterFactory, adapterOutputType,
                     adapterType);
         } catch (Exception e) {
             e.printStackTrace();
@@ -522,7 +519,7 @@ public class FeedUtil {
 
     private static ARecordType getOutputType(PrimaryFeed feed, Map<String, String> configuration) throws Exception {
         ARecordType outputType = null;
-        String fqOutputType = configuration.get(IAdapterFactory.KEY_TYPE_NAME);
+        String fqOutputType = configuration.get(ExternalDataConstants.KEY_TYPE_NAME);
 
         if (fqOutputType == null) {
             throw new IllegalArgumentException("No output type specified");
@@ -538,7 +535,8 @@ public class FeedUtil {
             dataverseName = dataverseAndType[0];
             datatypeName = dataverseAndType[1];
         } else
-            throw new IllegalArgumentException("Invalid value for the parameter " + IAdapterFactory.KEY_TYPE_NAME);
+            throw new IllegalArgumentException(
+                    "Invalid value for the parameter " + ExternalDataConstants.KEY_TYPE_NAME);
 
         MetadataTransactionContext ctx = null;
         MetadataManager.INSTANCE.acquireReadLatch();
@@ -569,7 +567,7 @@ public class FeedUtil {
         Feed primaryFeed = MetadataManager.INSTANCE.getFeed(mdTxnCtx, feed.getDataverseName(), primaryFeedName);
         FunctionSignature appliedFunction = primaryFeed.getAppliedFunction();
         if (appliedFunction == null) {
-            Triple<IFeedAdapterFactory, ARecordType, AdapterType> result = getPrimaryFeedFactoryAndOutput(
+            Triple<IAdapterFactory, ARecordType, AdapterType> result = getPrimaryFeedFactoryAndOutput(
                     (PrimaryFeed) primaryFeed, policyAccessor, mdTxnCtx);
             outputType = result.second.getTypeName();
         } else {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ITypedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ITypedAdapterFactory.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ITypedAdapterFactory.java
index 7a26560..f35c21f 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ITypedAdapterFactory.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/feeds/ITypedAdapterFactory.java
@@ -20,7 +20,7 @@ package org.apache.asterix.metadata.feeds;
 
 import java.util.Map;
 
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.om.types.ARecordType;
 
 public interface ITypedAdapterFactory extends IAdapterFactory {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-om/src/main/java/org/apache/asterix/formats/base/IDataFormat.java
----------------------------------------------------------------------
diff --git a/asterix-om/src/main/java/org/apache/asterix/formats/base/IDataFormat.java b/asterix-om/src/main/java/org/apache/asterix/formats/base/IDataFormat.java
index 8dcbb77..4872ede 100644
--- a/asterix-om/src/main/java/org/apache/asterix/formats/base/IDataFormat.java
+++ b/asterix-om/src/main/java/org/apache/asterix/formats/base/IDataFormat.java
@@ -20,7 +20,6 @@ package org.apache.asterix.formats.base;
 
 import java.util.List;
 
-import org.apache.asterix.common.parse.IParseFileSplitsDecl;
 import org.apache.asterix.om.functions.IFunctionDescriptor;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.IAType;
@@ -43,7 +42,6 @@ import org.apache.hyracks.algebricks.data.ITypeTraitProvider;
 import org.apache.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
 import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
 import org.apache.hyracks.api.dataflow.value.IPredicateEvaluatorFactoryProvider;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
 
 public interface IDataFormat {
     public void registerRuntimeFunctions() throws AlgebricksException;
@@ -74,12 +72,8 @@ public interface IDataFormat {
     public Triple<ICopyEvaluatorFactory, ScalarFunctionCallExpression, IAType> partitioningEvaluatorFactory(
             ARecordType recType, List<String> fldName) throws AlgebricksException;
 
-    public ICopyEvaluatorFactory getFieldAccessEvaluatorFactory(ARecordType recType, List<String> fldName, int recordColumn)
-            throws AlgebricksException;
-
-    public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl);
-
-    public ITupleParserFactory createTupleParser(ARecordType recType, boolean isDelimited, char delimiter, char quote, boolean hasHeader);
+    public ICopyEvaluatorFactory getFieldAccessEvaluatorFactory(ARecordType recType, List<String> fldName,
+            int recordColumn) throws AlgebricksException;
 
     public IFunctionDescriptor resolveFunction(ILogicalExpression expr, IVariableTypeEnvironment typeEnvironment)
             throws AlgebricksException;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixRuntimeUtil.java
----------------------------------------------------------------------
diff --git a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixRuntimeUtil.java b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixRuntimeUtil.java
index dcc4891..51c3802 100644
--- a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixRuntimeUtil.java
+++ b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixRuntimeUtil.java
@@ -52,4 +52,8 @@ public class AsterixRuntimeUtil {
         AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext().getIPAddressNodeMap(map);
         return map;
     }
+
+    public static void getNodeControllerMap(Map<InetAddress, Set<String>> map) throws Exception {
+        AsterixAppContextInfo.getInstance().getCCApplicationContext().getCCContext().getIPAddressNodeMap(map);
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-runtime/pom.xml b/asterix-runtime/pom.xml
index 2713dd9..4f2d0b2 100644
--- a/asterix-runtime/pom.xml
+++ b/asterix-runtime/pom.xml
@@ -17,171 +17,63 @@
  ! under the License.
  !-->
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<parent>
-		<artifactId>asterix</artifactId>
-		<groupId>org.apache.asterix</groupId>
-		<version>0.8.8-SNAPSHOT</version>
-	</parent>
-	<artifactId>asterix-runtime</artifactId>
-
-  <licenses>
-    <license>
-      <name>Apache License, Version 2.0</name>
-      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-      <distribution>repo</distribution>
-      <comments>A business-friendly OSS license</comments>
-    </license>
-  </licenses>
-
-
-	<build>
-		<plugins>
-			<plugin>
-	        	<groupId>org.apache.asterix</groupId>
-	        	<artifactId>lexer-generator-maven-plugin</artifactId>
-	        	<version>0.8.8-SNAPSHOT</version>
-	        	<configuration>
-	        	  	<grammarFile>src/main/resources/adm.grammar</grammarFile>
-	        	  	<outputDir>${project.build.directory}/generated-sources/org/apache/asterix/runtime/operators/file/adm</outputDir>
-	        	</configuration>
-	        	<executions>
-		          	<execution>
-		          		<id>generate-lexer</id>
-            			<phase>generate-sources</phase>
-			            <goals>
-	              			<goal>generate-lexer</goal>
-	            		</goals>
-	          		</execution>
-	        	</executions>
-	      	</plugin>
-	      	 <plugin>
-			    <groupId>org.codehaus.mojo</groupId>
-			    <artifactId>build-helper-maven-plugin</artifactId>
-			    <version>1.9</version>
-			    <executions>
-			        <execution>
-			            <id>add-source</id>
-			            <phase>generate-sources</phase>
-			            <goals>
-			                <goal>add-source</goal>
-			            </goals>
-			            <configuration>
-			                <sources>
-			                    <source>${project.build.directory}/generated-sources/</source>
-			                </sources>
-			            </configuration>
-			        </execution>
-			    </executions>
- 			</plugin>
-	    </plugins>
-		<pluginManagement>
-			<plugins>
-				<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
-				<plugin>
-					<groupId>org.eclipse.m2e</groupId>
-					<artifactId>lifecycle-mapping</artifactId>
-					<version>1.0.0</version>
-					<configuration>
-						<lifecycleMappingMetadata>
-							<pluginExecutions>
-								<pluginExecution>
-									<pluginExecutionFilter>
-										<groupId>
-											org.apache.asterix
-										</groupId>
-										<artifactId>
-											lexer-generator-maven-plugin
-										</artifactId>
-										<versionRange>
-											[0.1,)
-										</versionRange>
-										<goals>
-											<goal>generate-lexer</goal>
-										</goals>
-									</pluginExecutionFilter>
-									<action>
-										<execute>
-											<runOnIncremental>false</runOnIncremental>
-										</execute>
-									</action>
-								</pluginExecution>
-								<pluginExecution>
-									<pluginExecutionFilter>
-										<groupId>
-											org.codehaus.mojo
-										</groupId>
-										<artifactId>
-											build-helper-maven-plugin
-										</artifactId>
-										<versionRange>
-											[1.7,)
-										</versionRange>
-										<goals>
-											<goal>add-source</goal>
-										</goals>
-									</pluginExecutionFilter>
-									<action>
-										<ignore />
-									</action>
-								</pluginExecution>
-							</pluginExecutions>
-						</lifecycleMappingMetadata>
-					</configuration>
-				</plugin>
-			</plugins>
-		</pluginManagement>
-	</build>
-
-	<dependencies>
-		<dependency>
-			<groupId>org.apache.asterix</groupId>
-			<artifactId>asterix-om</artifactId>
-			<version>0.8.8-SNAPSHOT</version>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.asterix</groupId>
-			<artifactId>asterix-fuzzyjoin</artifactId>
-			<version>0.8.8-SNAPSHOT</version>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-storage-am-btree</artifactId>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.asterix</groupId>
-			<artifactId>asterix-transactions</artifactId>
-			<version>0.8.8-SNAPSHOT</version>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.twitter4j</groupId>
-			<artifactId>twitter4j-core</artifactId>
-			<version>[4.0,)</version>
-		</dependency>
-		<dependency>
-			<groupId>org.twitter4j</groupId>
-			<artifactId>twitter4j-stream</artifactId>
-			<version>[4.0,)</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hadoop</groupId>
-			<artifactId>hadoop-client</artifactId>
-			<type>jar</type>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-		         <groupId>org.apache.hyracks</groupId>
-		         <artifactId>hyracks-api</artifactId>
-	        </dependency>
-	    <dependency>
-            <groupId>com.e-movimento.tinytools</groupId>
-            <artifactId>privilegedaccessor</artifactId>
-            <version>1.2.2</version>
-            <scope>test</scope>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>asterix</artifactId>
+        <groupId>org.apache.asterix</groupId>
+        <version>0.8.8-SNAPSHOT</version>
+    </parent>
+    <artifactId>asterix-runtime</artifactId>
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+            <comments>A business-friendly OSS license</comments>
+        </license>
+    </licenses>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.asterix</groupId>
+            <artifactId>asterix-om</artifactId>
+            <version>0.8.8-SNAPSHOT</version>
+            <scope>compile</scope>
         </dependency>
-	</dependencies>
-
-</project>
+        <dependency>
+            <groupId>org.apache.asterix</groupId>
+            <artifactId>asterix-fuzzyjoin</artifactId>
+            <version>0.8.8-SNAPSHOT</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-storage-am-btree</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.asterix</groupId>
+            <artifactId>asterix-transactions</artifactId>
+            <version>0.8.8-SNAPSHOT</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.twitter4j</groupId>
+            <artifactId>twitter4j-core</artifactId>
+            <version>[4.0,)</version>
+        </dependency>
+        <dependency>
+            <groupId>org.twitter4j</groupId>
+            <artifactId>twitter4j-stream</artifactId>
+            <version>[4.0,)</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-api</artifactId>
+        </dependency>
+    </dependencies>
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorDescriptor.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorDescriptor.java
deleted file mode 100644
index e6e7540..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorDescriptor.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.external;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
-import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-
-public class ExternalBTreeSearchOperatorDescriptor extends BTreeSearchOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-
-    public ExternalBTreeSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
-            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
-            IBinaryComparatorFactory[] comparatorFactories, int[] bloomFilterKeyFields, int[] lowKeyFields,
-            int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive,
-            IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput, boolean retainNull,
-            INullWriterFactory iNullWriterFactory, ISearchOperationCallbackFactory searchOpCallbackProvider) {
-        super(spec, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields, lowKeyInclusive,
-                highKeyInclusive, dataflowHelperFactory, retainInput, retainNull, iNullWriterFactory,
-                searchOpCallbackProvider, null, null);
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
-        return new ExternalBTreeSearchOperatorNodePushable(this, ctx, partition, recordDescProvider, lowKeyFields,
-                highKeyFields, lowKeyInclusive, highKeyInclusive);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorNodePushable.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorNodePushable.java
deleted file mode 100644
index 97b3d6a..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalBTreeSearchOperatorNodePushable.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.external;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-import org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorNodePushable;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
-import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelper;
-import org.apache.hyracks.storage.am.lsm.btree.impls.ExternalBTreeWithBuddy;
-
-public class ExternalBTreeSearchOperatorNodePushable extends BTreeSearchOperatorNodePushable {
-
-    public ExternalBTreeSearchOperatorNodePushable(ExternalBTreeSearchOperatorDescriptor opDesc,
-            IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, int[] lowKeyFields,
-            int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive) {
-        super(opDesc, ctx, partition, recordDescProvider, lowKeyFields, highKeyFields, lowKeyInclusive,
-                highKeyInclusive, null, null);
-    }
-
-    // We override the open function to search a specific version of the index
-    @Override
-    public void open() throws HyracksDataException {
-        writer.open();
-        ExternalBTreeWithBuddyDataflowHelper dataFlowHelper = (ExternalBTreeWithBuddyDataflowHelper) indexHelper;
-        accessor = new FrameTupleAccessor(inputRecDesc);
-        dataFlowHelper.open();
-        index = indexHelper.getIndexInstance();
-        if (retainNull) {
-            int fieldCount = getFieldCount();
-            nullTupleBuild = new ArrayTupleBuilder(fieldCount);
-            DataOutput out = nullTupleBuild.getDataOutput();
-            for (int i = 0; i < fieldCount; i++) {
-                try {
-                    nullWriter.writeNull(out);
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-                nullTupleBuild.addFieldEndOffset();
-            }
-        } else {
-            nullTupleBuild = null;
-        }
-        ExternalBTreeWithBuddy externalIndex = (ExternalBTreeWithBuddy) index;
-        try {
-            searchPred = createSearchPredicate();
-            tb = new ArrayTupleBuilder(recordDesc.getFieldCount());
-            dos = tb.getDataOutput();
-            appender = new FrameTupleAppender(new VSizeFrame(ctx));
-            ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
-                    .createSearchOperationCallback(indexHelper.getResourceID(), ctx);
-            // The next line is the reason we override this method
-            indexAccessor = externalIndex.createAccessor(searchCallback, dataFlowHelper.getTargetVersion());
-            cursor = createCursor();
-            if (retainInput) {
-                frameTuple = new FrameTupleReference();
-            }
-        } catch (Throwable th) {
-            throw new HyracksDataException(th);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorDescriptor.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorDescriptor.java
deleted file mode 100644
index d34c993..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorDescriptor.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.external;
-
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
-import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
-import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelperFactory;
-import org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor;
-import org.apache.hyracks.storage.common.IStorageManagerInterface;
-
-public class ExternalRTreeSearchOperatorDescriptor extends RTreeSearchOperatorDescriptor {
-
-    private static final long serialVersionUID = 1L;
-
-    public ExternalRTreeSearchOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc,
-            IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lifecycleManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
-            IBinaryComparatorFactory[] comparatorFactories, int[] keyFields,
-            ExternalRTreeDataflowHelperFactory dataflowHelperFactory, boolean retainInput, boolean retainNull,
-            INullWriterFactory iNullWriterFactory, ISearchOperationCallbackFactory searchOpCallbackFactory) {
-        super(spec, recDesc, storageManager, lifecycleManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, keyFields, dataflowHelperFactory, retainInput, retainNull, iNullWriterFactory,
-                searchOpCallbackFactory, null, null);
-    }
-
-    @Override
-    public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
-            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
-        return new ExternalRTreeSearchOperatorNodePushable(this, ctx, partition, recordDescProvider, keyFields);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorNodePushable.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorNodePushable.java
deleted file mode 100644
index 3fb5609..0000000
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/external/ExternalRTreeSearchOperatorNodePushable.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.runtime.external;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
-import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
-import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelper;
-import org.apache.hyracks.storage.am.lsm.rtree.impls.ExternalRTree;
-import org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorNodePushable;
-
-public class ExternalRTreeSearchOperatorNodePushable extends RTreeSearchOperatorNodePushable {
-
-    public ExternalRTreeSearchOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
-            int partition, IRecordDescriptorProvider recordDescProvider, int[] keyFields) {
-        super(opDesc, ctx, partition, recordDescProvider, keyFields, null, null);
-    }
-
-    // We override this method to specify the searched version of the index
-    @Override
-    public void open() throws HyracksDataException {
-        writer.open();
-        accessor = new FrameTupleAccessor(inputRecDesc);
-        indexHelper.open();
-        ExternalRTreeDataflowHelper rTreeDataflowHelper = (ExternalRTreeDataflowHelper) indexHelper;
-        index = indexHelper.getIndexInstance();
-        if (retainNull) {
-            int fieldCount = getFieldCount();
-            nullTupleBuild = new ArrayTupleBuilder(fieldCount);
-            DataOutput out = nullTupleBuild.getDataOutput();
-            for (int i = 0; i < fieldCount; i++) {
-                try {
-                    nullWriter.writeNull(out);
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-                nullTupleBuild.addFieldEndOffset();
-            }
-        } else {
-            nullTupleBuild = null;
-        }
-        ExternalRTree rTreeIndex = (ExternalRTree) index;
-        try {
-            searchPred = createSearchPredicate();
-            tb = new ArrayTupleBuilder(recordDesc.getFieldCount());
-            dos = tb.getDataOutput();
-            appender = new FrameTupleAppender(new VSizeFrame(ctx));
-            ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
-                    .createSearchOperationCallback(indexHelper.getResourceID(), ctx);
-            // The next line is the reason we override this method
-            indexAccessor = rTreeIndex.createAccessor(searchCallback, rTreeDataflowHelper.getTargetVersion());
-            cursor = createCursor();
-            if (retainInput) {
-                frameTuple = new FrameTupleReference();
-            }
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-runtime/src/main/java/org/apache/asterix/runtime/formats/NonTaggedDataFormat.java
----------------------------------------------------------------------
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/formats/NonTaggedDataFormat.java b/asterix-runtime/src/main/java/org/apache/asterix/runtime/formats/NonTaggedDataFormat.java
index ef7a6c8..309f677 100644
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/formats/NonTaggedDataFormat.java
+++ b/asterix-runtime/src/main/java/org/apache/asterix/runtime/formats/NonTaggedDataFormat.java
@@ -18,11 +18,19 @@
  */
 package org.apache.asterix.runtime.formats;
 
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.asterix.common.config.GlobalConfig;
 import org.apache.asterix.common.exceptions.AsterixRuntimeException;
-import org.apache.asterix.common.parse.IParseFileSplitsDecl;
 import org.apache.asterix.dataflow.data.nontagged.AqlNullWriterFactory;
 import org.apache.asterix.formats.base.IDataFormat;
+import org.apache.asterix.formats.nontagged.AqlADMPrinterFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlBinaryBooleanInspectorImpl;
 import org.apache.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlBinaryHashFunctionFactoryProvider;
@@ -33,7 +41,6 @@ import org.apache.asterix.formats.nontagged.AqlCleanJSONPrinterFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlLosslessJSONPrinterFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlNormalizedKeyComputerFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlPredicateEvaluatorFactoryProvider;
-import org.apache.asterix.formats.nontagged.AqlADMPrinterFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import org.apache.asterix.formats.nontagged.AqlTypeTraitProvider;
 import org.apache.asterix.om.base.ABoolean;
@@ -166,7 +173,6 @@ import org.apache.asterix.runtime.evaluators.functions.AnyCollectionMemberDescri
 import org.apache.asterix.runtime.evaluators.functions.CastListDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.CastRecordDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.CodePointToStringDescriptor;
-import org.apache.asterix.runtime.evaluators.functions.StringContainsDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.CountHashedGramTokensDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.CountHashedWordTokensDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.CreateCircleDescriptor;
@@ -182,7 +188,6 @@ import org.apache.asterix.runtime.evaluators.functions.EditDistanceDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.EditDistanceListIsFilterable;
 import org.apache.asterix.runtime.evaluators.functions.EditDistanceStringIsFilterable;
 import org.apache.asterix.runtime.evaluators.functions.EmbedTypeDescriptor;
-import org.apache.asterix.runtime.evaluators.functions.StringEndsWithDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.FlowRecordDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.FuzzyEqDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.GetItemDescriptor;
@@ -193,7 +198,6 @@ import org.apache.asterix.runtime.evaluators.functions.InjectFailureDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.IsNullDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.IsSystemNullDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.LenDescriptor;
-import org.apache.asterix.runtime.evaluators.functions.StringLikeDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.NotDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.NotNullDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.NumericAbsDescriptor;
@@ -223,16 +227,19 @@ import org.apache.asterix.runtime.evaluators.functions.SpatialAreaDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.SpatialCellDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.SpatialDistanceDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.SpatialIntersectDescriptor;
-import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringConcatDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringContainsDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringEndsWithDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringEqualDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringJoinDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringLengthDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringLikeDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringLowerCaseDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringMatchesDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringMatchesWithFlagDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringReplaceDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringReplaceWithFlagsDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringUpperCaseDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.Substring2Descriptor;
@@ -305,12 +312,12 @@ import org.apache.asterix.runtime.evaluators.functions.temporal.PrintTimeDescrip
 import org.apache.asterix.runtime.evaluators.functions.temporal.TimeFromDatetimeDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.temporal.TimeFromUnixTimeInMsDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.temporal.YearMonthDurationComparatorDecriptor;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
 import org.apache.asterix.runtime.runningaggregates.std.TidRunningAggregateDescriptor;
 import org.apache.asterix.runtime.unnestingfunctions.std.RangeDescriptor;
 import org.apache.asterix.runtime.unnestingfunctions.std.ScanCollectionDescriptor;
 import org.apache.asterix.runtime.unnestingfunctions.std.SubsetCollectionDescriptor;
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
 import org.apache.hyracks.algebricks.common.utils.Triple;
@@ -349,17 +356,6 @@ import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.IntegerParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.LongParserFactory;
 import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 
 public class NonTaggedDataFormat implements IDataFormat {
 
@@ -386,6 +382,7 @@ public class NonTaggedDataFormat implements IDataFormat {
     public NonTaggedDataFormat() {
     }
 
+    @Override
     public void registerRuntimeFunctions() throws AlgebricksException {
 
         if (registered) {
@@ -739,13 +736,13 @@ public class NonTaggedDataFormat implements IDataFormat {
                     fieldFound = true;
                     try {
                         AInt32 ai = new AInt32(i);
-                        AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(ai.getType()).serialize(
-                                ai, dos);
+                        AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(ai.getType()).serialize(ai,
+                                dos);
                     } catch (HyracksDataException e) {
                         throw new AlgebricksException(e);
                     }
-                    ICopyEvaluatorFactory fldIndexEvalFactory = new ConstantEvalFactory(Arrays.copyOf(
-                            abvs.getByteArray(), abvs.getLength()));
+                    ICopyEvaluatorFactory fldIndexEvalFactory = new ConstantEvalFactory(
+                            Arrays.copyOf(abvs.getByteArray(), abvs.getLength()));
 
                     evalFactory = new FieldAccessByIndexEvalFactory(recordEvalFactory, fldIndexEvalFactory, recType);
                     return evalFactory;
@@ -775,8 +772,8 @@ public class NonTaggedDataFormat implements IDataFormat {
             if (fldName.size() > 1) {
                 evalFactory = new FieldAccessNestedEvalFactory(recordEvalFactory, recType, fldName);
             } else {
-                evalFactory = FieldAccessByNameDescriptor.FACTORY.createFunctionDescriptor().createEvaluatorFactory(
-                        factories);
+                evalFactory = FieldAccessByNameDescriptor.FACTORY.createFunctionDescriptor()
+                        .createEvaluatorFactory(factories);
             }
             return evalFactory;
         } else
@@ -800,8 +797,8 @@ public class NonTaggedDataFormat implements IDataFormat {
         } catch (HyracksDataException e) {
             throw new AlgebricksException(e);
         }
-        ICopyEvaluatorFactory dimensionEvalFactory = new ConstantEvalFactory(Arrays.copyOf(abvs1.getByteArray(),
-                abvs1.getLength()));
+        ICopyEvaluatorFactory dimensionEvalFactory = new ConstantEvalFactory(
+                Arrays.copyOf(abvs1.getByteArray(), abvs1.getLength()));
 
         for (int i = 0; i < numOfFields; i++) {
             ArrayBackedValueStorage abvs2 = new ArrayBackedValueStorage();
@@ -812,8 +809,8 @@ public class NonTaggedDataFormat implements IDataFormat {
             } catch (HyracksDataException e) {
                 throw new AlgebricksException(e);
             }
-            ICopyEvaluatorFactory coordinateEvalFactory = new ConstantEvalFactory(Arrays.copyOf(abvs2.getByteArray(),
-                    abvs2.getLength()));
+            ICopyEvaluatorFactory coordinateEvalFactory = new ConstantEvalFactory(
+                    Arrays.copyOf(abvs2.getByteArray(), abvs2.getLength()));
 
             evalFactories[i] = new CreateMBREvalFactory(evalFactory, dimensionEvalFactory, coordinateEvalFactory);
         }
@@ -838,13 +835,13 @@ public class NonTaggedDataFormat implements IDataFormat {
                     DataOutput dos = abvs.getDataOutput();
                     try {
                         AInt32 ai = new AInt32(i);
-                        AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(ai.getType()).serialize(
-                                ai, dos);
+                        AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(ai.getType()).serialize(ai,
+                                dos);
                     } catch (HyracksDataException e) {
                         throw new AlgebricksException(e);
                     }
-                    ICopyEvaluatorFactory fldIndexEvalFactory = new ConstantEvalFactory(Arrays.copyOf(
-                            abvs.getByteArray(), abvs.getLength()));
+                    ICopyEvaluatorFactory fldIndexEvalFactory = new ConstantEvalFactory(
+                            Arrays.copyOf(abvs.getByteArray(), abvs.getLength()));
                     ICopyEvaluatorFactory evalFactory = new FieldAccessByIndexEvalFactory(recordEvalFactory,
                             fldIndexEvalFactory, recType);
                     IFunctionInfo finfoAccess = AsterixBuiltinFunctions
@@ -852,8 +849,8 @@ public class NonTaggedDataFormat implements IDataFormat {
 
                     ScalarFunctionCallExpression partitionFun = new ScalarFunctionCallExpression(finfoAccess,
                             new MutableObject<ILogicalExpression>(new VariableReferenceExpression(METADATA_DUMMY_VAR)),
-                            new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(
-                                    new AInt32(i)))));
+                            new MutableObject<ILogicalExpression>(
+                                    new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
                     return new Triple<ICopyEvaluatorFactory, ScalarFunctionCallExpression, IAType>(evalFactory,
                             partitionFun, recType.getFieldTypes()[i]);
                 }
@@ -903,12 +900,15 @@ public class NonTaggedDataFormat implements IDataFormat {
     }
 
     interface FunctionTypeInferer {
-        void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException;
+        void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                throws AlgebricksException;
     }
 
     void registerTypeInferers() {
         functionTypeInferers.put(AsterixBuiltinFunctions.LISTIFY, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) expr;
                 if (f.getArguments().size() == 0) {
                     ((ListifyAggregateDescriptor) fd).reset(new AOrderedListType(null, null));
@@ -926,7 +926,9 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.RECORD_MERGE, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) expr;
                 IAType outType = (IAType) context.getType(expr);
                 IAType type0 = (IAType) context.getType(f.getArguments().get(0).getValue());
@@ -935,7 +937,9 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.CAST_RECORD, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
                 ARecordType rt = (ARecordType) TypeComputerUtilities.getRequiredType(funcExpr);
                 IAType it = (IAType) context.getType(funcExpr.getArguments().get(0).getValue());
@@ -946,7 +950,9 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.CAST_LIST, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
                 AbstractCollectionType rt = (AbstractCollectionType) TypeComputerUtilities.getRequiredType(funcExpr);
                 IAType it = (IAType) context.getType(funcExpr.getArguments().get(0).getValue());
@@ -957,13 +963,18 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.FLOW_RECORD, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
-                ARecordType it = (ARecordType) TypeComputerUtilities.getInputType((AbstractFunctionCallExpression) expr);
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
+                ARecordType it = (ARecordType) TypeComputerUtilities
+                        .getInputType((AbstractFunctionCallExpression) expr);
                 ((FlowRecordDescriptor) fd).reset(it);
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.OPEN_RECORD_CONSTRUCTOR, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 ARecordType rt = (ARecordType) context.getType(expr);
                 ((OpenRecordConstructorDescriptor) fd).reset(rt,
                         computeOpenFields((AbstractFunctionCallExpression) expr, rt));
@@ -976,8 +987,8 @@ public class NonTaggedDataFormat implements IDataFormat {
                     Mutable<ILogicalExpression> argRef = expr.getArguments().get(2 * i);
                     ILogicalExpression arg = argRef.getValue();
                     if (arg.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
-                        String fn = ((AString) ((AsterixConstantValue) ((ConstantExpression) arg).getValue()).getObject())
-                                .getStringValue();
+                        String fn = ((AString) ((AsterixConstantValue) ((ConstantExpression) arg).getValue())
+                                .getObject()).getStringValue();
                         open[i] = true;
                         for (String s : recType.getFieldNames()) {
                             if (s.equals(fn)) {
@@ -993,22 +1004,30 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.CLOSED_RECORD_CONSTRUCTOR, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 ((ClosedRecordConstructorDescriptor) fd).reset((ARecordType) context.getType(expr));
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.ORDERED_LIST_CONSTRUCTOR, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 ((OrderedListConstructorDescriptor) fd).reset((AOrderedListType) context.getType(expr));
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.UNORDERED_LIST_CONSTRUCTOR, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 ((UnorderedListConstructorDescriptor) fd).reset((AUnorderedListType) context.getType(expr));
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.FIELD_ACCESS_BY_INDEX, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr;
                 IAType t = (IAType) context.getType(fce.getArguments().get(0).getValue());
                 switch (t.getTypeTag()) {
@@ -1036,11 +1055,13 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.FIELD_ACCESS_NESTED, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr;
                 IAType t = (IAType) context.getType(fce.getArguments().get(0).getValue());
-                AOrderedList fieldPath = (AOrderedList) (((AsterixConstantValue) ((ConstantExpression) fce.getArguments()
-                        .get(1).getValue()).getValue()).getObject());
+                AOrderedList fieldPath = (AOrderedList) (((AsterixConstantValue) ((ConstantExpression) fce
+                        .getArguments().get(1).getValue()).getValue()).getObject());
                 List<String> listFieldPath = new ArrayList<String>();
                 for (int i = 0; i < fieldPath.size(); i++) {
                     listFieldPath.add(((AString) fieldPath.getItem(i)).getStringValue());
@@ -1059,7 +1080,9 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.GET_RECORD_FIELDS, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr;
                 IAType t = (IAType) context.getType(fce.getArguments().get(0).getValue());
                 if (t.getTypeTag().equals(ATypeTag.RECORD)) {
@@ -1071,7 +1094,9 @@ public class NonTaggedDataFormat implements IDataFormat {
             }
         });
         functionTypeInferers.put(AsterixBuiltinFunctions.GET_RECORD_FIELD_VALUE, new FunctionTypeInferer() {
-            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context) throws AlgebricksException {
+            @Override
+            public void infer(ILogicalExpression expr, IFunctionDescriptor fd, IVariableTypeEnvironment context)
+                    throws AlgebricksException {
                 AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr;
                 IAType t = (IAType) context.getType(fce.getArguments().get(0).getValue());
                 if (t.getTypeTag().equals(ATypeTag.RECORD)) {
@@ -1134,35 +1159,6 @@ public class NonTaggedDataFormat implements IDataFormat {
     }
 
     @Override
-    public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl) {
-        return createTupleParser(recType, decl.isDelimitedFileFormat(), decl.getDelimChar(), decl.getQuote(),
-                decl.getHasHeader());
-    }
-
-    @Override
-    public ITupleParserFactory createTupleParser(ARecordType recType, boolean delimitedFormat, char delimiter,
-            char quote, boolean hasHeader) {
-        Map<String, String> conf = new HashMap<String, String>();
-        AsterixTupleParserFactory.InputDataFormat inputFormat = null;
-        if (delimitedFormat) {
-            conf.put(AsterixTupleParserFactory.KEY_FORMAT, AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT);
-            conf.put(AsterixTupleParserFactory.KEY_DELIMITER, "" + delimiter);
-            inputFormat = InputDataFormat.DELIMITED;
-        } else {
-            conf.put(AsterixTupleParserFactory.KEY_FORMAT, AsterixTupleParserFactory.FORMAT_ADM);
-            inputFormat = InputDataFormat.ADM;
-        }
-
-        if (hasHeader) {
-            conf.put(AsterixTupleParserFactory.HAS_HEADER,
-                    hasHeader ? Boolean.TRUE.toString() : Boolean.FALSE.toString());
-        }
-        conf.put(AsterixTupleParserFactory.KEY_QUOTE, "" + quote);
-        return new AsterixTupleParserFactory(conf, recType, inputFormat);
-    }
-
-
-    @Override
     public INullWriterFactory getNullWriterFactory() {
         return AqlNullWriterFactory.INSTANCE;
     }


[10/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorNodePushable.java
new file mode 100644
index 0000000..75cc1bf
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalRTreeSearchOperatorNodePushable.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
+import org.apache.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
+import org.apache.hyracks.storage.am.lsm.rtree.dataflow.ExternalRTreeDataflowHelper;
+import org.apache.hyracks.storage.am.lsm.rtree.impls.ExternalRTree;
+import org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorNodePushable;
+
+public class ExternalRTreeSearchOperatorNodePushable extends RTreeSearchOperatorNodePushable {
+
+    public ExternalRTreeSearchOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
+            int partition, IRecordDescriptorProvider recordDescProvider, int[] keyFields) {
+        super(opDesc, ctx, partition, recordDescProvider, keyFields, null, null);
+    }
+
+    // We override this method to specify the searched version of the index
+    @Override
+    public void open() throws HyracksDataException {
+        writer.open();
+        accessor = new FrameTupleAccessor(inputRecDesc);
+        indexHelper.open();
+        ExternalRTreeDataflowHelper rTreeDataflowHelper = (ExternalRTreeDataflowHelper) indexHelper;
+        index = indexHelper.getIndexInstance();
+        if (retainNull) {
+            int fieldCount = getFieldCount();
+            nullTupleBuild = new ArrayTupleBuilder(fieldCount);
+            DataOutput out = nullTupleBuild.getDataOutput();
+            for (int i = 0; i < fieldCount; i++) {
+                try {
+                    nullWriter.writeNull(out);
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+                nullTupleBuild.addFieldEndOffset();
+            }
+        } else {
+            nullTupleBuild = null;
+        }
+        ExternalRTree rTreeIndex = (ExternalRTree) index;
+        try {
+            searchPred = createSearchPredicate();
+            tb = new ArrayTupleBuilder(recordDesc.getFieldCount());
+            dos = tb.getDataOutput();
+            appender = new FrameTupleAppender(new VSizeFrame(ctx));
+            ISearchOperationCallback searchCallback = opDesc.getSearchOpCallbackFactory()
+                    .createSearchOperationCallback(indexHelper.getResourceID(), ctx);
+            // The next line is the reason we override this method
+            indexAccessor = rTreeIndex.createAccessor(searchCallback, rTreeDataflowHelper.getTargetVersion());
+            cursor = createCursor();
+            if (retainInput) {
+                frameTuple = new FrameTupleReference();
+            }
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/operators/IndexInfoOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/operators/IndexInfoOperatorDescriptor.java b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/IndexInfoOperatorDescriptor.java
new file mode 100644
index 0000000..99c555a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/operators/IndexInfoOperatorDescriptor.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.operators;
+
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.ActivityId;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
+import org.apache.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
+import org.apache.hyracks.storage.am.common.api.ITupleFilterFactory;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
+import org.apache.hyracks.storage.common.IStorageManagerInterface;
+import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider;
+
+/*
+ * This is a hack used to optain multiple index instances in a single operator and it is not actually used as an operator
+ */
+public class IndexInfoOperatorDescriptor implements IIndexOperatorDescriptor{
+
+    private static final long serialVersionUID = 1L;
+    private final IFileSplitProvider fileSplitProvider;
+    private final IStorageManagerInterface storageManager;
+    private final IIndexLifecycleManagerProvider lifecycleManagerProvider;
+    public IndexInfoOperatorDescriptor(IFileSplitProvider fileSplitProvider,IStorageManagerInterface storageManager,
+            IIndexLifecycleManagerProvider lifecycleManagerProvider){
+        this.fileSplitProvider = fileSplitProvider;
+        this.lifecycleManagerProvider = lifecycleManagerProvider;
+        this.storageManager = storageManager;
+        
+    }
+
+    @Override
+    public ActivityId getActivityId() {
+        return null;
+    }
+
+    @Override
+    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+        return null;
+    }
+
+    @Override
+    public IFileSplitProvider getFileSplitProvider() {
+        return fileSplitProvider;
+    }
+
+    @Override
+    public IStorageManagerInterface getStorageManager() {
+        return storageManager;
+    }
+
+    @Override
+    public IIndexLifecycleManagerProvider getLifecycleManagerProvider() {
+        return lifecycleManagerProvider;
+    }
+
+    @Override
+    public RecordDescriptor getRecordDescriptor() {
+        return null;
+    }
+
+    @Override
+    public IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
+        return null;
+    }
+
+    @Override
+    public boolean getRetainInput() {
+        return false;
+    }
+
+    @Override
+    public ISearchOperationCallbackFactory getSearchOpCallbackFactory() {
+        return null;
+    }
+
+    @Override
+    public IModificationOperationCallbackFactory getModificationOpCallbackFactory() {
+        return null;
+    }
+
+    @Override
+    public ITupleFilterFactory getTupleFilterFactory() {
+        return null;
+    }
+
+    @Override
+    public ILocalResourceFactoryProvider getLocalResourceFactoryProvider() {
+        return null;
+    }
+
+    @Override
+    public boolean getRetainNull() {
+        return false;
+    }
+
+    @Override
+    public INullWriterFactory getNullWriterFactory() {
+        return null;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
new file mode 100644
index 0000000..860d35f
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
@@ -0,0 +1,1148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.builders.AbvsBuilderFactory;
+import org.apache.asterix.builders.IARecordBuilder;
+import org.apache.asterix.builders.IAsterixListBuilder;
+import org.apache.asterix.builders.ListBuilderFactory;
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.builders.RecordBuilderFactory;
+import org.apache.asterix.builders.UnorderedListBuilder;
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeserializer;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.om.base.ABoolean;
+import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.AUnorderedListType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
+import org.apache.asterix.om.types.hierachy.ITypeConvertComputer;
+import org.apache.asterix.om.util.NonTaggedFormatUtil;
+import org.apache.asterix.om.util.container.IObjectPool;
+import org.apache.asterix.om.util.container.ListObjectPool;
+import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
+import org.apache.asterix.runtime.operators.file.adm.AdmLexerException;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+
+/**
+ * Parser for ADM formatted data.
+ */
+public class ADMDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
+
+    private AdmLexer admLexer;
+    private ARecordType recordType;
+    private boolean datasetRec;
+    private boolean isStreamParser = true;
+
+    private int nullableFieldId = 0;
+    private ArrayBackedValueStorage castBuffer = new ArrayBackedValueStorage();
+
+    private IObjectPool<IARecordBuilder, ATypeTag> recordBuilderPool = new ListObjectPool<IARecordBuilder, ATypeTag>(
+            new RecordBuilderFactory());
+    private IObjectPool<IAsterixListBuilder, ATypeTag> listBuilderPool = new ListObjectPool<IAsterixListBuilder, ATypeTag>(
+            new ListBuilderFactory());
+    private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = new ListObjectPool<IMutableValueStorage, ATypeTag>(
+            new AbvsBuilderFactory());
+
+    private String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
+    private String mismatchErrorMessage2 = " got a value of type ";
+    private Map<String, String> configuration;
+
+    static class ParseException extends AsterixException {
+        private static final long serialVersionUID = 1L;
+        private String filename;
+        private int line = -1;
+        private int column = -1;
+
+        public ParseException(String message) {
+            super(message);
+        }
+
+        public ParseException(Throwable cause) {
+            super(cause);
+        }
+
+        public ParseException(String message, Throwable cause) {
+            super(message, cause);
+        }
+
+        public ParseException(Throwable cause, String filename, int line, int column) {
+            super(cause);
+            setLocation(filename, line, column);
+        }
+
+        public void setLocation(String filename, int line, int column) {
+            this.filename = filename;
+            this.line = line;
+            this.column = column;
+        }
+
+        @Override
+        public String getMessage() {
+            StringBuilder msg = new StringBuilder("Parse error");
+            if (filename != null) {
+                msg.append(" in file " + filename);
+            }
+            if (line >= 0) {
+                if (column >= 0) {
+                    msg.append(" at (" + line + ", " + column + ")");
+                } else {
+                    msg.append(" in line " + line);
+                }
+            }
+            return msg.append(": " + super.getMessage()).toString();
+        }
+    }
+
+    public ADMDataParser() {
+        this(null);
+    }
+
+    public ADMDataParser(String filename) {
+        this.filename = filename;
+    }
+
+    @Override
+    public boolean parse(DataOutput out) throws AsterixException {
+        try {
+            resetPools();
+            return parseAdmInstance(recordType, datasetRec, out);
+        } catch (IOException e) {
+            throw new ParseException(e, filename, admLexer.getLine(), admLexer.getColumn());
+        } catch (AdmLexerException e) {
+            throw new AsterixException(e);
+        } catch (ParseException e) {
+            e.setLocation(filename, admLexer.getLine(), admLexer.getColumn());
+            throw e;
+        }
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return ExternalDataUtils.isDataSourceStreamProvider(configuration) ? DataSourceType.STREAM
+                : DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType recordType) throws IOException {
+        this.recordType = recordType;
+        this.configuration = configuration;
+        String isDatasetRecordString = configuration.get(ExternalDataConstants.KEY_DATASET_RECORD);
+        if (isDatasetRecordString == null) {
+            this.datasetRec = true;
+        } else {
+            this.datasetRec = Boolean.parseBoolean(isDatasetRecordString);
+        }
+        this.isStreamParser = ExternalDataUtils.isDataSourceStreamProvider(configuration);
+        if (!isStreamParser) {
+            this.admLexer = new AdmLexer();
+        }
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends char[]> record, DataOutput out) throws Exception {
+        try {
+            resetPools();
+            admLexer.setBuffer(record.get());
+            parseAdmInstance(recordType, datasetRec, out);
+        } catch (IOException e) {
+            throw new ParseException(e, filename, admLexer.getLine(), admLexer.getColumn());
+        } catch (AdmLexerException e) {
+            throw new AsterixException(e);
+        } catch (ParseException e) {
+            e.setLocation(filename, admLexer.getLine(), admLexer.getColumn());
+            throw e;
+        }
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public void setInputStream(InputStream in) throws Exception {
+        admLexer = new AdmLexer(new java.io.InputStreamReader(in));
+    }
+
+    protected boolean parseAdmInstance(IAType objectType, boolean datasetRec, DataOutput out)
+            throws AsterixException, IOException, AdmLexerException {
+        int token = admLexer.next();
+        if (token == AdmLexer.TOKEN_EOF) {
+            return false;
+        } else {
+            admFromLexerStream(token, objectType, out, datasetRec);
+            return true;
+        }
+    }
+
+    private void admFromLexerStream(int token, IAType objectType, DataOutput out, Boolean datasetRec)
+            throws AsterixException, IOException, AdmLexerException {
+
+        switch (token) {
+            case AdmLexer.TOKEN_NULL_LITERAL: {
+                if (checkType(ATypeTag.NULL, objectType)) {
+                    nullSerde.serialize(ANull.NULL, out);
+                } else {
+                    throw new ParseException("This field can not be null");
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_TRUE_LITERAL: {
+                if (checkType(ATypeTag.BOOLEAN, objectType)) {
+                    booleanSerde.serialize(ABoolean.TRUE, out);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_BOOLEAN_CONS: {
+                parseConstructor(ATypeTag.BOOLEAN, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_FALSE_LITERAL: {
+                if (checkType(ATypeTag.BOOLEAN, objectType)) {
+                    booleanSerde.serialize(ABoolean.FALSE, out);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_DOUBLE_LITERAL: {
+                parseToNumericTarget(ATypeTag.DOUBLE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_DOUBLE_CONS: {
+                parseConstructor(ATypeTag.DOUBLE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_FLOAT_LITERAL: {
+                parseToNumericTarget(ATypeTag.FLOAT, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_FLOAT_CONS: {
+                parseConstructor(ATypeTag.FLOAT, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT8_LITERAL: {
+                parseAndCastNumeric(ATypeTag.INT8, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT8_CONS: {
+                parseConstructor(ATypeTag.INT8, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT16_LITERAL: {
+                parseAndCastNumeric(ATypeTag.INT16, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT16_CONS: {
+                parseConstructor(ATypeTag.INT16, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT_LITERAL: {
+                // For an INT value without any suffix, we return it as INT64 type value since it is the default integer type.
+                parseAndCastNumeric(ATypeTag.INT64, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT32_LITERAL: {
+                parseAndCastNumeric(ATypeTag.INT32, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT32_CONS: {
+                parseConstructor(ATypeTag.INT32, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT64_LITERAL: {
+                parseAndCastNumeric(ATypeTag.INT64, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INT64_CONS: {
+                parseConstructor(ATypeTag.INT64, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_STRING_LITERAL: {
+                if (checkType(ATypeTag.STRING, objectType)) {
+                    final String tokenImage = admLexer.getLastTokenImage().substring(1,
+                            admLexer.getLastTokenImage().length() - 1);
+                    aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
+                    stringSerde.serialize(aString, out);
+                } else if (checkType(ATypeTag.UUID, objectType)) {
+                    // Dealing with UUID type that is represented by a string
+                    final String tokenImage = admLexer.getLastTokenImage().substring(1,
+                            admLexer.getLastTokenImage().length() - 1);
+                    aUUID.fromStringToAMuatbleUUID(tokenImage);
+                    uuidSerde.serialize(aUUID, out);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_STRING_CONS: {
+                parseConstructor(ATypeTag.STRING, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_HEX_CONS:
+            case AdmLexer.TOKEN_BASE64_CONS: {
+                if (checkType(ATypeTag.BINARY, objectType)) {
+                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
+                            parseToBinaryTarget(token, admLexer.getLastTokenImage(), out);
+                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
+            }
+            case AdmLexer.TOKEN_DATE_CONS: {
+                parseConstructor(ATypeTag.DATE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_TIME_CONS: {
+                parseConstructor(ATypeTag.TIME, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_DATETIME_CONS: {
+                parseConstructor(ATypeTag.DATETIME, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_INTERVAL_DATE_CONS: {
+                if (checkType(ATypeTag.INTERVAL, objectType)) {
+                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
+                            parseDateInterval(admLexer.getLastTokenImage(), out);
+                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                throw new ParseException("Wrong interval data parsing for date interval.");
+            }
+            case AdmLexer.TOKEN_INTERVAL_TIME_CONS: {
+                if (checkType(ATypeTag.INTERVAL, objectType)) {
+                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
+                            parseTimeInterval(admLexer.getLastTokenImage(), out);
+                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                throw new ParseException("Wrong interval data parsing for time interval.");
+            }
+            case AdmLexer.TOKEN_INTERVAL_DATETIME_CONS: {
+                if (checkType(ATypeTag.INTERVAL, objectType)) {
+                    if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+                        if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
+                            parseDateTimeInterval(admLexer.getLastTokenImage(), out);
+                            if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+                                break;
+                            }
+                        }
+                    }
+                }
+                throw new ParseException("Wrong interval data parsing for datetime interval.");
+            }
+            case AdmLexer.TOKEN_DURATION_CONS: {
+                parseConstructor(ATypeTag.DURATION, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_YEAR_MONTH_DURATION_CONS: {
+                parseConstructor(ATypeTag.YEARMONTHDURATION, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_DAY_TIME_DURATION_CONS: {
+                parseConstructor(ATypeTag.DAYTIMEDURATION, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_POINT_CONS: {
+                parseConstructor(ATypeTag.POINT, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_POINT3D_CONS: {
+                parseConstructor(ATypeTag.POINT3D, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_CIRCLE_CONS: {
+                parseConstructor(ATypeTag.CIRCLE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_RECTANGLE_CONS: {
+                parseConstructor(ATypeTag.RECTANGLE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_LINE_CONS: {
+                parseConstructor(ATypeTag.LINE, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_POLYGON_CONS: {
+                parseConstructor(ATypeTag.POLYGON, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_START_UNORDERED_LIST: {
+                if (checkType(ATypeTag.UNORDEREDLIST, objectType)) {
+                    objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
+                    parseUnorderedList((AUnorderedListType) objectType, out);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+                }
+                break;
+            }
+
+            case AdmLexer.TOKEN_START_ORDERED_LIST: {
+                if (checkType(ATypeTag.ORDEREDLIST, objectType)) {
+                    objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
+                    parseOrderedList((AOrderedListType) objectType, out);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_START_RECORD: {
+                if (checkType(ATypeTag.RECORD, objectType)) {
+                    objectType = getComplexType(objectType, ATypeTag.RECORD);
+                    parseRecord((ARecordType) objectType, out, datasetRec);
+                } else {
+                    throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+                }
+                break;
+            }
+            case AdmLexer.TOKEN_UUID_CONS: {
+                parseConstructor(ATypeTag.UUID, objectType, out);
+                break;
+            }
+            case AdmLexer.TOKEN_EOF: {
+                break;
+            }
+            default: {
+                throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token) + ".");
+            }
+        }
+
+    }
+
+    private String replaceEscapes(String tokenImage) throws ParseException {
+        char[] chars = tokenImage.toCharArray();
+        int len = chars.length;
+        int readpos = 0;
+        int writepos = 0;
+        int movemarker = 0;
+        while (readpos < len) {
+            if (chars[readpos] == '\\') {
+                moveChars(chars, movemarker, readpos, readpos - writepos);
+                switch (chars[readpos + 1]) {
+                    case '\\':
+                    case '\"':
+                    case '/':
+                        chars[writepos] = chars[readpos + 1];
+                        break;
+                    case 'b':
+                        chars[writepos] = '\b';
+                        break;
+                    case 'f':
+                        chars[writepos] = '\f';
+                        break;
+                    case 'n':
+                        chars[writepos] = '\n';
+                        break;
+                    case 'r':
+                        chars[writepos] = '\r';
+                        break;
+                    case 't':
+                        chars[writepos] = '\t';
+                        break;
+                    case 'u':
+                        chars[writepos] = (char) Integer.parseInt(new String(chars, readpos + 2, 4), 16);
+                        readpos += 4;
+                        break;
+                    default:
+                        throw new ParseException("Illegal escape '\\" + chars[readpos + 1] + "'");
+                }
+                ++readpos;
+                movemarker = readpos + 1;
+            }
+            ++writepos;
+            ++readpos;
+        }
+        moveChars(chars, movemarker, len, readpos - writepos);
+        return new String(chars, 0, len - (readpos - writepos));
+    }
+
+    private static void moveChars(final char[] chars, final int start, final int end, final int offset) {
+        if (offset == 0) {
+            return;
+        }
+        for (int i = start; i < end; ++i) {
+            chars[i - offset] = chars[i];
+        }
+    }
+
+    private IAType getComplexType(IAType aObjectType, ATypeTag tag) {
+        if (aObjectType == null) {
+            return null;
+        }
+
+        if (aObjectType.getTypeTag() == tag) {
+            return aObjectType;
+        }
+
+        if (aObjectType.getTypeTag() == ATypeTag.UNION) {
+            List<IAType> unionList = ((AUnionType) aObjectType).getUnionList();
+            for (int i = 0; i < unionList.size(); i++) {
+                if (unionList.get(i).getTypeTag() == tag) {
+                    return unionList.get(i);
+                }
+            }
+        }
+        return null; // wont get here
+    }
+
+    private ATypeTag getTargetTypeTag(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
+        if (aObjectType == null) {
+            return expectedTypeTag;
+        }
+        if (aObjectType.getTypeTag() != ATypeTag.UNION) {
+            final ATypeTag typeTag = aObjectType.getTypeTag();
+            if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
+                    || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
+                return typeTag;
+            } else {
+                return null;
+            }
+            //            return ATypeHierarchy.canPromote(expectedTypeTag, typeTag) ? typeTag : null;
+        } else { // union
+            List<IAType> unionList = ((AUnionType) aObjectType).getUnionList();
+            for (IAType t : unionList) {
+                final ATypeTag typeTag = t.getTypeTag();
+                if (ATypeHierarchy.canPromote(expectedTypeTag, typeTag)
+                        || ATypeHierarchy.canDemote(expectedTypeTag, typeTag)) {
+                    return typeTag;
+                }
+            }
+        }
+        return null;
+    }
+
+    private boolean checkType(ATypeTag expectedTypeTag, IAType aObjectType) throws IOException {
+        return getTargetTypeTag(expectedTypeTag, aObjectType) != null;
+    }
+
+    private void parseRecord(ARecordType recType, DataOutput out, Boolean datasetRec)
+            throws IOException, AsterixException, AdmLexerException {
+
+        ArrayBackedValueStorage fieldValueBuffer = getTempBuffer();
+        ArrayBackedValueStorage fieldNameBuffer = getTempBuffer();
+        IARecordBuilder recBuilder = getRecordBuilder();
+
+        BitSet nulls = null;
+        if (datasetRec) {
+
+            if (recType != null) {
+                nulls = new BitSet(recType.getFieldNames().length);
+                recBuilder.reset(recType);
+            } else {
+                recBuilder.reset(null);
+            }
+
+        } else if (recType != null) {
+            nulls = new BitSet(recType.getFieldNames().length);
+            recBuilder.reset(recType);
+        } else {
+            recBuilder.reset(null);
+        }
+
+        recBuilder.init();
+        int token;
+        boolean inRecord = true;
+        boolean expectingRecordField = false;
+        boolean first = true;
+
+        Boolean openRecordField = false;
+        int fieldId = 0;
+        IAType fieldType = null;
+        do {
+            token = admLexer.next();
+            switch (token) {
+                case AdmLexer.TOKEN_END_RECORD: {
+                    if (expectingRecordField) {
+                        throw new ParseException("Found END_RECORD while expecting a record field.");
+                    }
+                    inRecord = false;
+                    break;
+                }
+                case AdmLexer.TOKEN_STRING_LITERAL: {
+                    // we've read the name of the field
+                    // now read the content
+                    fieldNameBuffer.reset();
+                    fieldValueBuffer.reset();
+                    expectingRecordField = false;
+
+                    if (recType != null) {
+                        String fldName = admLexer.getLastTokenImage().substring(1,
+                                admLexer.getLastTokenImage().length() - 1);
+                        fieldId = recBuilder.getFieldId(fldName);
+                        if (fieldId < 0 && !recType.isOpen()) {
+                            throw new ParseException("This record is closed, you can not add extra fields !!");
+                        } else if (fieldId < 0 && recType.isOpen()) {
+                            aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
+                                    admLexer.getLastTokenImage().length() - 1));
+                            stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
+                            openRecordField = true;
+                            fieldType = null;
+                        } else {
+                            // a closed field
+                            nulls.set(fieldId);
+                            fieldType = recType.getFieldTypes()[fieldId];
+                            openRecordField = false;
+                        }
+                    } else {
+                        aStringFieldName.setValue(
+                                admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1));
+                        stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
+                        openRecordField = true;
+                        fieldType = null;
+                    }
+
+                    token = admLexer.next();
+                    if (token != AdmLexer.TOKEN_COLON) {
+                        throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+                                + " while expecting \":\".");
+                    }
+
+                    token = admLexer.next();
+                    this.admFromLexerStream(token, fieldType, fieldValueBuffer.getDataOutput(), false);
+                    if (openRecordField) {
+                        if (fieldValueBuffer.getByteArray()[0] != ATypeTag.NULL.serialize()) {
+                            recBuilder.addField(fieldNameBuffer, fieldValueBuffer);
+                        }
+                    } else if (NonTaggedFormatUtil.isOptional(recType)) {
+                        if (fieldValueBuffer.getByteArray()[0] != ATypeTag.NULL.serialize()) {
+                            recBuilder.addField(fieldId, fieldValueBuffer);
+                        }
+                    } else {
+                        recBuilder.addField(fieldId, fieldValueBuffer);
+                    }
+
+                    break;
+                }
+                case AdmLexer.TOKEN_COMMA: {
+                    if (first) {
+                        throw new ParseException("Found COMMA before any record field.");
+                    }
+                    if (expectingRecordField) {
+                        throw new ParseException("Found COMMA while expecting a record field.");
+                    }
+                    expectingRecordField = true;
+                    break;
+                }
+                default: {
+                    throw new ParseException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+                            + " while parsing record fields.");
+                }
+            }
+            first = false;
+        } while (inRecord);
+
+        if (recType != null) {
+            nullableFieldId = checkNullConstraints(recType, nulls);
+            if (nullableFieldId != -1) {
+                throw new ParseException("Field: " + recType.getFieldNames()[nullableFieldId] + " can not be null");
+            }
+        }
+        recBuilder.write(out, true);
+    }
+
+    private int checkNullConstraints(ARecordType recType, BitSet nulls) {
+        boolean isNull = false;
+        for (int i = 0; i < recType.getFieldTypes().length; i++) {
+            if (nulls.get(i) == false) {
+                IAType type = recType.getFieldTypes()[i];
+                if (type.getTypeTag() != ATypeTag.NULL && type.getTypeTag() != ATypeTag.UNION) {
+                    return i;
+                }
+
+                if (type.getTypeTag() == ATypeTag.UNION) { // union
+                    List<IAType> unionList = ((AUnionType) type).getUnionList();
+                    for (int j = 0; j < unionList.size(); j++) {
+                        if (unionList.get(j).getTypeTag() == ATypeTag.NULL) {
+                            isNull = true;
+                            break;
+                        }
+                    }
+                    if (!isNull) {
+                        return i;
+                    }
+                }
+            }
+        }
+        return -1;
+    }
+
+    private void parseOrderedList(AOrderedListType oltype, DataOutput out)
+            throws IOException, AsterixException, AdmLexerException {
+        ArrayBackedValueStorage itemBuffer = getTempBuffer();
+        OrderedListBuilder orderedListBuilder = (OrderedListBuilder) getOrderedListBuilder();
+
+        IAType itemType = null;
+        if (oltype != null) {
+            itemType = oltype.getItemType();
+        }
+        orderedListBuilder.reset(oltype);
+
+        int token;
+        boolean inList = true;
+        boolean expectingListItem = false;
+        boolean first = true;
+        do {
+            token = admLexer.next();
+            if (token == AdmLexer.TOKEN_END_ORDERED_LIST) {
+                if (expectingListItem) {
+                    throw new ParseException("Found END_COLLECTION while expecting a list item.");
+                }
+                inList = false;
+            } else if (token == AdmLexer.TOKEN_COMMA) {
+                if (first) {
+                    throw new ParseException("Found COMMA before any list item.");
+                }
+                if (expectingListItem) {
+                    throw new ParseException("Found COMMA while expecting a list item.");
+                }
+                expectingListItem = true;
+            } else {
+                expectingListItem = false;
+                itemBuffer.reset();
+
+                admFromLexerStream(token, itemType, itemBuffer.getDataOutput(), false);
+                orderedListBuilder.addItem(itemBuffer);
+            }
+            first = false;
+        } while (inList);
+        orderedListBuilder.write(out, true);
+    }
+
+    private void parseUnorderedList(AUnorderedListType uoltype, DataOutput out)
+            throws IOException, AsterixException, AdmLexerException {
+        ArrayBackedValueStorage itemBuffer = getTempBuffer();
+        UnorderedListBuilder unorderedListBuilder = (UnorderedListBuilder) getUnorderedListBuilder();
+
+        IAType itemType = null;
+
+        if (uoltype != null) {
+            itemType = uoltype.getItemType();
+        }
+        unorderedListBuilder.reset(uoltype);
+
+        int token;
+        boolean inList = true;
+        boolean expectingListItem = false;
+        boolean first = true;
+        do {
+            token = admLexer.next();
+            if (token == AdmLexer.TOKEN_END_RECORD) {
+                if (admLexer.next() == AdmLexer.TOKEN_END_RECORD) {
+                    if (expectingListItem) {
+                        throw new ParseException("Found END_COLLECTION while expecting a list item.");
+                    } else {
+                        inList = false;
+                    }
+                } else {
+                    throw new ParseException("Found END_RECORD while expecting a list item.");
+                }
+            } else if (token == AdmLexer.TOKEN_COMMA) {
+                if (first) {
+                    throw new ParseException("Found COMMA before any list item.");
+                }
+                if (expectingListItem) {
+                    throw new ParseException("Found COMMA while expecting a list item.");
+                }
+                expectingListItem = true;
+            } else {
+                expectingListItem = false;
+                itemBuffer.reset();
+                admFromLexerStream(token, itemType, itemBuffer.getDataOutput(), false);
+                unorderedListBuilder.addItem(itemBuffer);
+            }
+            first = false;
+        } while (inList);
+        unorderedListBuilder.write(out, true);
+    }
+
+    private IARecordBuilder getRecordBuilder() {
+        return recordBuilderPool.allocate(ATypeTag.RECORD);
+    }
+
+    private IAsterixListBuilder getOrderedListBuilder() {
+        return listBuilderPool.allocate(ATypeTag.ORDEREDLIST);
+    }
+
+    private IAsterixListBuilder getUnorderedListBuilder() {
+        return listBuilderPool.allocate(ATypeTag.UNORDEREDLIST);
+    }
+
+    private ArrayBackedValueStorage getTempBuffer() {
+        return (ArrayBackedValueStorage) abvsBuilderPool.allocate(ATypeTag.BINARY);
+    }
+
+    private void parseToBinaryTarget(int lexerToken, String tokenImage, DataOutput out)
+            throws ParseException, HyracksDataException {
+        switch (lexerToken) {
+            case AdmLexer.TOKEN_HEX_CONS: {
+                parseHexBinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
+                break;
+            }
+            case AdmLexer.TOKEN_BASE64_CONS: {
+                parseBase64BinaryString(tokenImage.toCharArray(), 1, tokenImage.length() - 2, out);
+                break;
+            }
+        }
+    }
+
+    private void parseToNumericTarget(ATypeTag typeTag, IAType objectType, DataOutput out)
+            throws AsterixException, IOException {
+        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
+        if (targetTypeTag == null || !parseValue(admLexer.getLastTokenImage(), targetTypeTag, out)) {
+            throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
+        }
+    }
+
+    private void parseAndCastNumeric(ATypeTag typeTag, IAType objectType, DataOutput out)
+            throws AsterixException, IOException {
+        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
+        DataOutput dataOutput = out;
+        if (targetTypeTag != typeTag) {
+            castBuffer.reset();
+            dataOutput = castBuffer.getDataOutput();
+        }
+
+        if (targetTypeTag == null || !parseValue(admLexer.getLastTokenImage(), typeTag, dataOutput)) {
+            throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
+        }
+
+        // If two type tags are not the same, either we try to promote or demote source type to the target type
+        if (targetTypeTag != typeTag) {
+            if (ATypeHierarchy.canPromote(typeTag, targetTypeTag)) {
+                // can promote typeTag to targetTypeTag
+                ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag, targetTypeTag);
+                if (promoteComputer == null) {
+                    throw new AsterixException(
+                            "Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
+                }
+                // do the promotion; note that the type tag field should be skipped
+                promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
+                        castBuffer.getLength() - 1, out);
+            } else if (ATypeHierarchy.canDemote(typeTag, targetTypeTag)) {
+                //can demote source type to the target type
+                ITypeConvertComputer demoteComputer = ATypeHierarchy.getTypeDemoteComputer(typeTag, targetTypeTag);
+                if (demoteComputer == null) {
+                    throw new AsterixException(
+                            "Can't cast the " + typeTag + " type to the " + targetTypeTag + " type.");
+                }
+                // do the demotion; note that the type tag field should be skipped
+                demoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
+                        castBuffer.getLength() - 1, out);
+            }
+        }
+    }
+
+    private void parseConstructor(ATypeTag typeTag, IAType objectType, DataOutput out)
+            throws AsterixException, AdmLexerException, IOException {
+        final ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
+        if (targetTypeTag != null) {
+            DataOutput dataOutput = out;
+            if (targetTypeTag != typeTag) {
+                castBuffer.reset();
+                dataOutput = castBuffer.getDataOutput();
+            }
+            int token = admLexer.next();
+            if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+                token = admLexer.next();
+                if (token == AdmLexer.TOKEN_STRING_LITERAL) {
+                    final String unquoted = admLexer.getLastTokenImage().substring(1,
+                            admLexer.getLastTokenImage().length() - 1);
+                    if (!parseValue(unquoted, typeTag, dataOutput)) {
+                        throw new ParseException("Missing deserializer method for constructor: "
+                                + AdmLexer.tokenKindToString(token) + ".");
+                    }
+                    token = admLexer.next();
+                    if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+                        if (targetTypeTag != typeTag) {
+                            ITypeConvertComputer promoteComputer = ATypeHierarchy.getTypePromoteComputer(typeTag,
+                                    targetTypeTag);
+                            // the availability if the promote computer should be consistent with the availability of a target type
+                            assert promoteComputer != null;
+                            // do the promotion; note that the type tag field should be skipped
+                            promoteComputer.convertType(castBuffer.getByteArray(), castBuffer.getStartOffset() + 1,
+                                    castBuffer.getLength() - 1, out);
+                        }
+                        return;
+                    }
+                }
+            }
+        }
+        throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + ". Got " + typeTag + " instead.");
+    }
+
+    private boolean parseValue(final String unquoted, ATypeTag typeTag, DataOutput out)
+            throws AsterixException, HyracksDataException, IOException {
+        switch (typeTag) {
+            case BOOLEAN:
+                parseBoolean(unquoted, out);
+                return true;
+            case INT8:
+                parseInt8(unquoted, out);
+                return true;
+            case INT16:
+                parseInt16(unquoted, out);
+                return true;
+            case INT32:
+                parseInt32(unquoted, out);
+                return true;
+            case INT64:
+                parseInt64(unquoted, out);
+                return true;
+            case FLOAT:
+                aFloat.setValue(Float.parseFloat(unquoted));
+                floatSerde.serialize(aFloat, out);
+                return true;
+            case DOUBLE:
+                aDouble.setValue(Double.parseDouble(unquoted));
+                doubleSerde.serialize(aDouble, out);
+                return true;
+            case STRING:
+                aString.setValue(unquoted);
+                stringSerde.serialize(aString, out);
+                return true;
+            case TIME:
+                parseTime(unquoted, out);
+                return true;
+            case DATE:
+                parseDate(unquoted, out);
+                return true;
+            case DATETIME:
+                parseDateTime(unquoted, out);
+                return true;
+            case DURATION:
+                parseDuration(unquoted, out);
+                return true;
+            case DAYTIMEDURATION:
+                parseDateTimeDuration(unquoted, out);
+                return true;
+            case YEARMONTHDURATION:
+                parseYearMonthDuration(unquoted, out);
+                return true;
+            case POINT:
+                parsePoint(unquoted, out);
+                return true;
+            case POINT3D:
+                parse3DPoint(unquoted, out);
+                return true;
+            case CIRCLE:
+                parseCircle(unquoted, out);
+                return true;
+            case RECTANGLE:
+                parseRectangle(unquoted, out);
+                return true;
+            case LINE:
+                parseLine(unquoted, out);
+                return true;
+            case POLYGON:
+                APolygonSerializerDeserializer.parse(unquoted, out);
+                return true;
+            case UUID:
+                aUUID.fromStringToAMuatbleUUID(unquoted);
+                uuidSerde.serialize(aUUID, out);
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    private void parseBoolean(String bool, DataOutput out) throws AsterixException, HyracksDataException {
+        String errorMessage = "This can not be an instance of boolean";
+        if (bool.equals("true")) {
+            booleanSerde.serialize(ABoolean.TRUE, out);
+        } else if (bool.equals("false")) {
+            booleanSerde.serialize(ABoolean.FALSE, out);
+        } else {
+            throw new ParseException(errorMessage);
+        }
+    }
+
+    private void parseInt8(String int8, DataOutput out) throws AsterixException, HyracksDataException {
+        String errorMessage = "This can not be an instance of int8";
+        boolean positive = true;
+        byte value = 0;
+        int offset = 0;
+
+        if (int8.charAt(offset) == '+') {
+            offset++;
+        } else if (int8.charAt(offset) == '-') {
+            offset++;
+            positive = false;
+        }
+        for (; offset < int8.length(); offset++) {
+            if (int8.charAt(offset) >= '0' && int8.charAt(offset) <= '9') {
+                value = (byte) (value * 10 + int8.charAt(offset) - '0');
+            } else if (int8.charAt(offset) == 'i' && int8.charAt(offset + 1) == '8' && offset + 2 == int8.length()) {
+                break;
+            } else {
+                throw new ParseException(errorMessage);
+            }
+        }
+        if (value < 0) {
+            throw new ParseException(errorMessage);
+        }
+        if (value > 0 && !positive) {
+            value *= -1;
+        }
+        aInt8.setValue(value);
+        int8Serde.serialize(aInt8, out);
+    }
+
+    private void parseInt16(String int16, DataOutput out) throws AsterixException, HyracksDataException {
+        String errorMessage = "This can not be an instance of int16";
+        boolean positive = true;
+        short value = 0;
+        int offset = 0;
+
+        if (int16.charAt(offset) == '+') {
+            offset++;
+        } else if (int16.charAt(offset) == '-') {
+            offset++;
+            positive = false;
+        }
+        for (; offset < int16.length(); offset++) {
+            if (int16.charAt(offset) >= '0' && int16.charAt(offset) <= '9') {
+                value = (short) (value * 10 + int16.charAt(offset) - '0');
+            } else if (int16.charAt(offset) == 'i' && int16.charAt(offset + 1) == '1' && int16.charAt(offset + 2) == '6'
+                    && offset + 3 == int16.length()) {
+                break;
+            } else {
+                throw new ParseException(errorMessage);
+            }
+        }
+        if (value < 0) {
+            throw new ParseException(errorMessage);
+        }
+        if (value > 0 && !positive) {
+            value *= -1;
+        }
+        aInt16.setValue(value);
+        int16Serde.serialize(aInt16, out);
+    }
+
+    private void parseInt32(String int32, DataOutput out) throws AsterixException, HyracksDataException {
+        String errorMessage = "This can not be an instance of int32";
+        boolean positive = true;
+        int value = 0;
+        int offset = 0;
+
+        if (int32.charAt(offset) == '+') {
+            offset++;
+        } else if (int32.charAt(offset) == '-') {
+            offset++;
+            positive = false;
+        }
+        for (; offset < int32.length(); offset++) {
+            if (int32.charAt(offset) >= '0' && int32.charAt(offset) <= '9') {
+                value = (value * 10 + int32.charAt(offset) - '0');
+            } else if (int32.charAt(offset) == 'i' && int32.charAt(offset + 1) == '3' && int32.charAt(offset + 2) == '2'
+                    && offset + 3 == int32.length()) {
+                break;
+            } else {
+                throw new ParseException(errorMessage);
+            }
+        }
+        if (value < 0) {
+            throw new ParseException(errorMessage);
+        }
+        if (value > 0 && !positive) {
+            value *= -1;
+        }
+
+        aInt32.setValue(value);
+        int32Serde.serialize(aInt32, out);
+    }
+
+    private void parseInt64(String int64, DataOutput out) throws AsterixException, HyracksDataException {
+        String errorMessage = "This can not be an instance of int64";
+        boolean positive = true;
+        long value = 0;
+        int offset = 0;
+
+        if (int64.charAt(offset) == '+') {
+            offset++;
+        } else if (int64.charAt(offset) == '-') {
+            offset++;
+            positive = false;
+        }
+        for (; offset < int64.length(); offset++) {
+            if (int64.charAt(offset) >= '0' && int64.charAt(offset) <= '9') {
+                value = (value * 10 + int64.charAt(offset) - '0');
+            } else if (int64.charAt(offset) == 'i' && int64.charAt(offset + 1) == '6' && int64.charAt(offset + 2) == '4'
+                    && offset + 3 == int64.length()) {
+                break;
+            } else {
+                throw new ParseException(errorMessage);
+            }
+        }
+        if (value < 0) {
+            throw new ParseException(errorMessage);
+        }
+        if (value > 0 && !positive) {
+            value *= -1;
+        }
+
+        aInt64.setValue(value);
+        int64Serde.serialize(aInt64, out);
+    }
+
+    /**
+     * Resets the pools before parsing a top-level record.
+     * In this way the elements in those pools can be re-used.
+     */
+    private void resetPools() {
+        listBuilderPool.reset();
+        recordBuilderPool.reset();
+        abvsBuilderPool.reset();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
new file mode 100644
index 0000000..f5f2793
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
@@ -0,0 +1,522 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+
+import org.apache.asterix.external.api.IDataParser;
+import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import org.apache.asterix.om.base.ABinary;
+import org.apache.asterix.om.base.ABoolean;
+import org.apache.asterix.om.base.ACircle;
+import org.apache.asterix.om.base.ADate;
+import org.apache.asterix.om.base.ADateTime;
+import org.apache.asterix.om.base.ADayTimeDuration;
+import org.apache.asterix.om.base.ADouble;
+import org.apache.asterix.om.base.ADuration;
+import org.apache.asterix.om.base.AFloat;
+import org.apache.asterix.om.base.AInt16;
+import org.apache.asterix.om.base.AInt32;
+import org.apache.asterix.om.base.AInt64;
+import org.apache.asterix.om.base.AInt8;
+import org.apache.asterix.om.base.AInterval;
+import org.apache.asterix.om.base.ALine;
+import org.apache.asterix.om.base.AMutableBinary;
+import org.apache.asterix.om.base.AMutableCircle;
+import org.apache.asterix.om.base.AMutableDate;
+import org.apache.asterix.om.base.AMutableDateTime;
+import org.apache.asterix.om.base.AMutableDayTimeDuration;
+import org.apache.asterix.om.base.AMutableDouble;
+import org.apache.asterix.om.base.AMutableDuration;
+import org.apache.asterix.om.base.AMutableFloat;
+import org.apache.asterix.om.base.AMutableInt16;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.base.AMutableInt64;
+import org.apache.asterix.om.base.AMutableInt8;
+import org.apache.asterix.om.base.AMutableInterval;
+import org.apache.asterix.om.base.AMutableLine;
+import org.apache.asterix.om.base.AMutablePoint;
+import org.apache.asterix.om.base.AMutablePoint3D;
+import org.apache.asterix.om.base.AMutableRectangle;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.base.AMutableTime;
+import org.apache.asterix.om.base.AMutableUUID;
+import org.apache.asterix.om.base.AMutableYearMonthDuration;
+import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.base.APoint;
+import org.apache.asterix.om.base.APoint3D;
+import org.apache.asterix.om.base.ARectangle;
+import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.base.ATime;
+import org.apache.asterix.om.base.AUUID;
+import org.apache.asterix.om.base.AYearMonthDuration;
+import org.apache.asterix.om.base.temporal.ADateParserFactory;
+import org.apache.asterix.om.base.temporal.ADurationParserFactory;
+import org.apache.asterix.om.base.temporal.ADurationParserFactory.ADurationParseOption;
+import org.apache.asterix.om.base.temporal.ATimeParserFactory;
+import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.bytes.Base64Parser;
+import org.apache.hyracks.util.bytes.HexParser;
+
+/**
+ * Base class for data parsers. Includes the common set of definitions for
+ * serializers/deserializers for built-in ADM types.
+ */
+public abstract class AbstractDataParser implements IDataParser {
+
+    protected AMutableInt8 aInt8 = new AMutableInt8((byte) 0);
+    protected AMutableInt16 aInt16 = new AMutableInt16((short) 0);
+    protected AMutableInt32 aInt32 = new AMutableInt32(0);
+    protected AMutableInt64 aInt64 = new AMutableInt64(0);
+    protected AMutableDouble aDouble = new AMutableDouble(0);
+    protected AMutableFloat aFloat = new AMutableFloat(0);
+    protected AMutableString aString = new AMutableString("");
+    protected AMutableBinary aBinary = new AMutableBinary(null, 0, 0);
+    protected AMutableString aStringFieldName = new AMutableString("");
+    protected AMutableUUID aUUID = new AMutableUUID(0, 0);
+    // For temporal and spatial data types
+    protected AMutableTime aTime = new AMutableTime(0);
+    protected AMutableDateTime aDateTime = new AMutableDateTime(0L);
+    protected AMutableDuration aDuration = new AMutableDuration(0, 0);
+    protected AMutableDayTimeDuration aDayTimeDuration = new AMutableDayTimeDuration(0);
+    protected AMutableYearMonthDuration aYearMonthDuration = new AMutableYearMonthDuration(0);
+    protected AMutablePoint aPoint = new AMutablePoint(0, 0);
+    protected AMutablePoint3D aPoint3D = new AMutablePoint3D(0, 0, 0);
+    protected AMutableCircle aCircle = new AMutableCircle(null, 0);
+    protected AMutableRectangle aRectangle = new AMutableRectangle(null, null);
+    protected AMutablePoint aPoint2 = new AMutablePoint(0, 0);
+    protected AMutableLine aLine = new AMutableLine(null, null);
+    protected AMutableDate aDate = new AMutableDate(0);
+    protected final AMutableInterval aInterval = new AMutableInterval(0L, 0L, (byte) 0);
+
+    // Serializers
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<ADouble> doubleSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADOUBLE);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AString> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ASTRING);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<ABinary> binarySerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ABINARY);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AFloat> floatSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AFLOAT);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AInt8> int8Serde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT8);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AInt16> int16Serde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT16);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AInt32> int32Serde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT32);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AInt64> int64Serde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT64);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<ABoolean> booleanSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ABOOLEAN);
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<ANull> nullSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ANULL);
+
+    protected final HexParser hexParser = new HexParser();
+    protected final Base64Parser base64Parser = new Base64Parser();
+
+    // For UUID, we assume that the format is the string representation of UUID
+    // (xxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx) when parsing the data.
+    // Thus, we need to call UUID.fromStringToAMuatbleUUID() to convert it to the internal representation (two long values).
+    @SuppressWarnings("unchecked")
+    protected ISerializerDeserializer<AUUID> uuidSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AUUID);
+
+    // To avoid race conditions, the serdes for temporal and spatial data types needs to be one per parser
+    // ^^^^^^^^^^^^^^^^^^^^^^^^ ??? then why all these serdes are static?
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<ATime> timeSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ATIME);
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<ADate> dateSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADATE);
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<ADateTime> datetimeSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADATETIME);
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<ADuration> durationSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADURATION);
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<ADayTimeDuration> dayTimeDurationSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADAYTIMEDURATION);
+    @SuppressWarnings("unchecked")
+    protected static final ISerializerDeserializer<AYearMonthDuration> yearMonthDurationSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AYEARMONTHDURATION);
+    @SuppressWarnings("unchecked")
+    protected final static ISerializerDeserializer<APoint> pointSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.APOINT);
+    @SuppressWarnings("unchecked")
+    protected final static ISerializerDeserializer<APoint3D> point3DSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.APOINT3D);
+    @SuppressWarnings("unchecked")
+    protected final static ISerializerDeserializer<ACircle> circleSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ACIRCLE);
+    @SuppressWarnings("unchecked")
+    protected final static ISerializerDeserializer<ARectangle> rectangleSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ARECTANGLE);
+    @SuppressWarnings("unchecked")
+    protected final static ISerializerDeserializer<ALine> lineSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ALINE);
+    @SuppressWarnings("unchecked")
+    private static final ISerializerDeserializer<AInterval> intervalSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINTERVAL);
+
+    protected String filename;
+
+    void setFilename(String filename) {
+        this.filename = filename;
+    }
+
+    protected void parseTime(String time, DataOutput out) throws HyracksDataException {
+        int chrononTimeInMs;
+        try {
+            chrononTimeInMs = ATimeParserFactory.parseTimePart(time, 0, time.length());
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+        aTime.setValue(chrononTimeInMs);
+        timeSerde.serialize(aTime, out);
+    }
+
+    protected void parseDate(String date, DataOutput out) throws HyracksDataException {
+        long chrononTimeInMs = 0;
+        try {
+            chrononTimeInMs = ADateParserFactory.parseDatePart(date, 0, date.length());
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+        short temp = 0;
+        if (chrononTimeInMs < 0 && chrononTimeInMs % GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
+            temp = 1;
+        }
+        aDate.setValue((int) (chrononTimeInMs / GregorianCalendarSystem.CHRONON_OF_DAY) - temp);
+        dateSerde.serialize(aDate, out);
+    }
+
+    protected void parseDateTime(String datetime, DataOutput out) throws HyracksDataException {
+        long chrononTimeInMs = 0;
+        try {
+            // +1 if it is negative (-)
+            short timeOffset = (short) ((datetime.charAt(0) == '-') ? 1 : 0);
+
+            timeOffset += 8;
+
+            if (datetime.charAt(timeOffset) != 'T') {
+                timeOffset += 2;
+                if (datetime.charAt(timeOffset) != 'T') {
+                    throw new AlgebricksException("This can not be an instance of datetime: missing T");
+                }
+            }
+            chrononTimeInMs = ADateParserFactory.parseDatePart(datetime, 0, timeOffset);
+            chrononTimeInMs += ATimeParserFactory.parseTimePart(datetime, timeOffset + 1,
+                    datetime.length() - timeOffset - 1);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+        aDateTime.setValue(chrononTimeInMs);
+        datetimeSerde.serialize(aDateTime, out);
+    }
+
+    protected void parseDuration(String duration, DataOutput out) throws HyracksDataException {
+        try {
+            ADurationParserFactory.parseDuration(duration, 0, duration.length(), aDuration, ADurationParseOption.All);
+            durationSerde.serialize(aDuration, out);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    protected void parseDateTimeDuration(String durationString, DataOutput out) throws HyracksDataException {
+        try {
+            ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aDayTimeDuration,
+                    ADurationParseOption.All);
+            dayTimeDurationSerde.serialize(aDayTimeDuration, out);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    protected void parseYearMonthDuration(String durationString, DataOutput out) throws HyracksDataException {
+        try {
+            ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aYearMonthDuration,
+                    ADurationParseOption.All);
+            yearMonthDurationSerde.serialize(aYearMonthDuration, out);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    protected void parsePoint(String point, DataOutput out) throws HyracksDataException {
+        try {
+            aPoint.setValue(Double.parseDouble(point.substring(0, point.indexOf(','))),
+                    Double.parseDouble(point.substring(point.indexOf(',') + 1, point.length())));
+            pointSerde.serialize(aPoint, out);
+        } catch (HyracksDataException e) {
+            throw new HyracksDataException(point + " can not be an instance of point");
+        }
+    }
+
+    protected void parse3DPoint(String point3d, DataOutput out) throws HyracksDataException {
+        try {
+            int firstCommaIndex = point3d.indexOf(',');
+            int secondCommaIndex = point3d.indexOf(',', firstCommaIndex + 1);
+            aPoint3D.setValue(Double.parseDouble(point3d.substring(0, firstCommaIndex)),
+                    Double.parseDouble(point3d.substring(firstCommaIndex + 1, secondCommaIndex)),
+                    Double.parseDouble(point3d.substring(secondCommaIndex + 1, point3d.length())));
+            point3DSerde.serialize(aPoint3D, out);
+        } catch (HyracksDataException e) {
+            throw new HyracksDataException(point3d + " can not be an instance of point3d");
+        }
+    }
+
+    protected void parseCircle(String circle, DataOutput out) throws HyracksDataException {
+        try {
+            String[] parts = circle.split(" ");
+            aPoint.setValue(Double.parseDouble(parts[0].split(",")[0]), Double.parseDouble(parts[0].split(",")[1]));
+            aCircle.setValue(aPoint, Double.parseDouble(parts[1].substring(0, parts[1].length())));
+            circleSerde.serialize(aCircle, out);
+        } catch (HyracksDataException e) {
+            throw new HyracksDataException(circle + " can not be an instance of circle");
+        }
+    }
+
+    protected void parseRectangle(String rectangle, DataOutput out) throws HyracksDataException {
+        try {
+            String[] points = rectangle.split(" ");
+            if (points.length != 2) {
+                throw new HyracksDataException("rectangle consists of only 2 points.");
+            }
+            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
+            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
+            if (aPoint.getX() > aPoint2.getX() && aPoint.getY() > aPoint2.getY()) {
+                aRectangle.setValue(aPoint2, aPoint);
+            } else if (aPoint.getX() < aPoint2.getX() && aPoint.getY() < aPoint2.getY()) {
+                aRectangle.setValue(aPoint, aPoint2);
+            } else {
+                throw new IllegalArgumentException(
+                        "Rectangle arugment must be either (bottom left point, top right point) or (top right point, bottom left point)");
+            }
+            rectangleSerde.serialize(aRectangle, out);
+        } catch (HyracksDataException e) {
+            throw new HyracksDataException(rectangle + " can not be an instance of rectangle");
+        }
+    }
+
+    protected void parseLine(String line, DataOutput out) throws HyracksDataException {
+        try {
+            String[] points = line.split(" ");
+            if (points.length != 2) {
+                throw new HyracksDataException("line consists of only 2 points.");
+            }
+            aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
+            aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
+            aLine.setValue(aPoint, aPoint2);
+            lineSerde.serialize(aLine, out);
+        } catch (HyracksDataException e) {
+            throw new HyracksDataException(line + " can not be an instance of line");
+        }
+    }
+
+    protected void parseHexBinaryString(char[] input, int start, int length, DataOutput out)
+            throws HyracksDataException {
+        hexParser.generateByteArrayFromHexString(input, start, length);
+        aBinary.setValue(hexParser.getByteArray(), 0, hexParser.getLength());
+        binarySerde.serialize(aBinary, out);
+    }
+
+    protected void parseBase64BinaryString(char[] input, int start, int length, DataOutput out)
+            throws HyracksDataException {
+        base64Parser.generatePureByteArrayFromBase64String(input, start, length);
+        aBinary.setValue(base64Parser.getByteArray(), 0, base64Parser.getLength());
+        binarySerde.serialize(aBinary, out);
+    }
+
+    protected void parseDateTimeInterval(String interval, DataOutput out) throws HyracksDataException {
+        long chrononTimeInMsStart = 0;
+        long chrononTimeInMsEnd = 0;
+        try {
+            // the starting point for parsing (so for the accessor)
+            int startOffset = 0;
+            int endOffset, timeSeperatorOffsetInDatetimeString;
+
+            // Get the index for the comma
+            int commaIndex = interval.indexOf(',');
+            if (commaIndex < 1) {
+                throw new AlgebricksException("comma is missing for a string of interval");
+            }
+
+            endOffset = commaIndex - 1;
+            timeSeperatorOffsetInDatetimeString = interval.indexOf('T');
+
+            if (timeSeperatorOffsetInDatetimeString < 0) {
+                throw new AlgebricksException(
+                        "This can not be an instance of interval: missing T for a datetime value.");
+            }
+
+            chrononTimeInMsStart = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
+
+            chrononTimeInMsStart += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
+
+            // Interval End
+            startOffset = commaIndex + 1;
+            endOffset = interval.length() - 1;
+
+            timeSeperatorOffsetInDatetimeString = interval.indexOf('T', startOffset);
+
+            if (timeSeperatorOffsetInDatetimeString < 0) {
+                throw new AlgebricksException(
+                        "This can not be an instance of interval: missing T for a datetime value.");
+            }
+
+            chrononTimeInMsEnd = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
+
+            chrononTimeInMsEnd += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+
+        try {
+            aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.DATETIME.serialize());
+        } catch (AlgebricksException e) {
+            throw new HyracksDataException(e);
+        }
+
+        intervalSerde.serialize(aInterval, out);
+    }
+
+    protected void parseTimeInterval(String interval, DataOutput out) throws HyracksDataException {
+        long chrononTimeInMsStart = 0;
+        long chrononTimeInMsEnd = 0;
+        try {
+            int startOffset = 0;
+            int endOffset;
+
+            // Get the index for the comma
+            int commaIndex = interval.indexOf(',');
+            if (commaIndex < 0) {
+                throw new AlgebricksException("comma is missing for a string of interval");
+            }
+
+            endOffset = commaIndex - 1;
+            // Interval Start
+            chrononTimeInMsStart = parseTimePart(interval, startOffset, endOffset);
+
+            if (chrononTimeInMsStart < 0) {
+                chrononTimeInMsStart += GregorianCalendarSystem.CHRONON_OF_DAY;
+            }
+
+            // Interval End
+            startOffset = commaIndex + 1;
+            endOffset = interval.length() - 1;
+
+            chrononTimeInMsEnd = parseTimePart(interval, startOffset, endOffset);
+            if (chrononTimeInMsEnd < 0) {
+                chrononTimeInMsEnd += GregorianCalendarSystem.CHRONON_OF_DAY;
+            }
+
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+
+        try {
+            aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.TIME.serialize());
+        } catch (AlgebricksException e) {
+            throw new HyracksDataException(e);
+        }
+        intervalSerde.serialize(aInterval, out);
+    }
+
+    protected void parseDateInterval(String interval, DataOutput out) throws HyracksDataException {
+        long chrononTimeInMsStart = 0;
+        long chrononTimeInMsEnd = 0;
+        try {
+            // the starting point for parsing (so for the accessor)
+            int startOffset = 0;
+            int endOffset;
+
+            // Get the index for the comma
+            int commaIndex = interval.indexOf(',');
+            if (commaIndex < 1) {
+                throw new AlgebricksException("comma is missing for a string of interval");
+            }
+
+            endOffset = commaIndex - 1;
+            chrononTimeInMsStart = parseDatePart(interval, startOffset, endOffset);
+
+            // Interval End
+            startOffset = commaIndex + 1;
+            endOffset = interval.length() - 1;
+
+            chrononTimeInMsEnd = parseDatePart(interval, startOffset, endOffset);
+
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+
+        try {
+            aInterval.setValue((chrononTimeInMsStart / GregorianCalendarSystem.CHRONON_OF_DAY),
+                    (chrononTimeInMsEnd / GregorianCalendarSystem.CHRONON_OF_DAY), ATypeTag.DATE.serialize());
+        } catch (AlgebricksException e) {
+            throw new HyracksDataException(e);
+        }
+        intervalSerde.serialize(aInterval, out);
+    }
+
+    private long parseDatePart(String interval, int startOffset, int endOffset)
+            throws AlgebricksException, HyracksDataException {
+
+        while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
+            endOffset--;
+        }
+
+        while (interval.charAt(startOffset) == '"' || interval.charAt(startOffset) == ' ') {
+            startOffset++;
+        }
+
+        return ADateParserFactory.parseDatePart(interval, startOffset, endOffset - startOffset + 1);
+    }
+
+    private int parseTimePart(String interval, int startOffset, int endOffset)
+            throws AlgebricksException, HyracksDataException {
+
+        while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
+            endOffset--;
+        }
+
+        while (interval.charAt(startOffset) == '"' || interval.charAt(startOffset) == ' ') {
+            startOffset++;
+        }
+
+        return ATimeParserFactory.parseTimePart(interval, startOffset, endOffset - startOffset + 1);
+    }
+}


[12/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SemiStructuredRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SemiStructuredRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SemiStructuredRecordReader.java
new file mode 100644
index 0000000..9864805
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SemiStructuredRecordReader.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataExceptionUtils;
+
+public class SemiStructuredRecordReader extends AbstractStreamRecordReader {
+
+    private int depth;
+    private boolean prevCharEscape;
+    private boolean inString;
+    private char recordStart;
+    private char recordEnd;
+    private int recordNumber = 0;
+
+    public int getRecordNumber() {
+        return recordNumber;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        super.configure(configuration);
+        String recStartString = configuration.get(ExternalDataConstants.KEY_RECORD_START);
+        String recEndString = configuration.get(ExternalDataConstants.KEY_RECORD_END);
+        if (recStartString != null) {
+            if (recStartString.length() != 1) {
+                throw new AsterixException(
+                        ExternalDataExceptionUtils.incorrectParameterMessage(ExternalDataConstants.KEY_RECORD_START,
+                                ExternalDataConstants.PARAMETER_OF_SIZE_ONE, recStartString));
+            }
+            recordStart = recStartString.charAt(0);
+        } else {
+            recordStart = ExternalDataConstants.DEFAULT_RECORD_START;
+        }
+        if (recEndString != null) {
+            if (recEndString.length() != 1) {
+                throw new AsterixException(
+                        ExternalDataExceptionUtils.incorrectParameterMessage(ExternalDataConstants.KEY_RECORD_END,
+                                ExternalDataConstants.PARAMETER_OF_SIZE_ONE, recEndString));
+            }
+            recordEnd = recEndString.charAt(0);
+        } else {
+            recordEnd = ExternalDataConstants.DEFAULT_RECORD_END;
+        }
+    }
+
+    @Override
+    public boolean hasNext() throws Exception {
+        record.reset();
+        boolean hasStarted = false;
+        boolean hasFinished = false;
+        prevCharEscape = false;
+        inString = false;
+        depth = 0;
+        do {
+            int startPosn = bufferPosn; //starting from where we left off the last time
+            if (bufferPosn >= bufferLength) {
+                startPosn = bufferPosn = 0;
+                bufferLength = reader.read(inputBuffer);
+                if (bufferLength <= 0) {
+                    return false; // EOF
+                }
+            }
+            if (!hasStarted) {
+                for (; bufferPosn < bufferLength; ++bufferPosn) { //search for record begin
+                    if (inputBuffer[bufferPosn] == recordStart) {
+                        startPosn = bufferPosn;
+                        hasStarted = true;
+                        depth = 1;
+                        ++bufferPosn; // at next invocation proceed from following byte
+                        break;
+                    } else if (inputBuffer[bufferPosn] != ExternalDataConstants.SPACE
+                            && inputBuffer[bufferPosn] != ExternalDataConstants.TAB
+                            && inputBuffer[bufferPosn] != ExternalDataConstants.LF
+                            && inputBuffer[bufferPosn] != ExternalDataConstants.CR) {
+                        // corrupted file. clear the buffer and stop reading
+                        reader.skipError();
+                        bufferPosn = bufferLength = 0;
+                        throw new IOException("Malformed input stream");
+                    }
+                }
+            }
+            if (hasStarted) {
+                for (; bufferPosn < bufferLength; ++bufferPosn) { //search for record begin
+                    if (inString) {
+                        // we are in a string, we only care about the string end
+                        if (inputBuffer[bufferPosn] == ExternalDataConstants.QUOTE && !prevCharEscape) {
+                            inString = false;
+                        }
+                        if (prevCharEscape) {
+                            prevCharEscape = false;
+                        } else {
+                            prevCharEscape = inputBuffer[bufferPosn] == ExternalDataConstants.ESCAPE;
+                        }
+                    } else {
+                        if (inputBuffer[bufferPosn] == ExternalDataConstants.QUOTE) {
+                            inString = true;
+                        } else if (inputBuffer[bufferPosn] == recordStart) {
+                            depth += 1;
+                        } else if (inputBuffer[bufferPosn] == recordEnd) {
+                            depth -= 1;
+                            if (depth == 0) {
+                                hasFinished = true;
+                                ++bufferPosn; // at next invocation proceed from following byte
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+
+            int appendLength = bufferPosn - startPosn;
+            if (appendLength > 0) {
+                record.append(inputBuffer, startPosn, appendLength);
+            }
+        } while (!hasFinished);
+        record.endRecord();
+        recordNumber++;
+        return true;
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SequenceLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SequenceLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SequenceLookupReader.java
new file mode 100644
index 0000000..c294ccb
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/SequenceLookupReader.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.log4j.Logger;
+
+public class SequenceLookupReader extends AbstractCharRecordLookupReader {
+
+    public SequenceLookupReader(ExternalFileIndexAccessor snapshotAccessor, FileSystem fs, Configuration conf) {
+        super(snapshotAccessor, fs, conf);
+    }
+
+    private static final Logger LOGGER = Logger.getLogger(SequenceLookupReader.class.getName());
+    private Reader reader;
+    private Writable key;
+
+    @Override
+    protected void readRecord(RecordId rid) throws IOException {
+        reader.seek(rid.getOffset());
+        reader.next(key, value);
+    }
+
+    @Override
+    protected void closeFile() {
+        if (reader == null) {
+            return;
+        }
+        try {
+            reader.close();
+        } catch (Exception e) {
+            LOGGER.warn("Error closing HDFS file ", e);
+        }
+    }
+
+    @SuppressWarnings("deprecation")
+    @Override
+    protected void openFile() throws IllegalArgumentException, IOException {
+        reader = new SequenceFile.Reader(fs, new Path(file.getFileName()), conf);
+        key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TextLookupReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TextLookupReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TextLookupReader.java
new file mode 100644
index 0000000..b276bfa
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TextLookupReader.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Logger;
+
+public class TextLookupReader extends AbstractCharRecordLookupReader {
+
+    public TextLookupReader(ExternalFileIndexAccessor snapshotAccessor, FileSystem fs, Configuration conf) {
+        super(snapshotAccessor, fs, conf);
+    }
+
+    private static final Logger LOGGER = Logger.getLogger(TextLookupReader.class.getName());
+    private HDFSTextLineReader reader;
+
+    @Override
+    protected void readRecord(RecordId rid) throws IOException {
+        reader.seek(rid.getOffset());
+        reader.readLine(value);
+    }
+
+    @Override
+    protected void closeFile() {
+        if (reader == null) {
+            return;
+        }
+        try {
+            reader.close();
+        } catch (Exception e) {
+            LOGGER.warn("Error closing HDFS file ", e);
+        }
+    }
+
+    @Override
+    protected void openFile() throws IllegalArgumentException, IOException {
+        if (reader == null) {
+            reader = new HDFSTextLineReader();
+        }
+        reader.resetReader(fs.open(new Path(file.getFileName())));;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPullRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPullRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPullRecordReader.java
new file mode 100644
index 0000000..34d8122
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPullRecordReader.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.asterix.external.util.TwitterUtil;
+import org.apache.asterix.external.util.TwitterUtil.SearchAPIConstants;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+import twitter4j.Query;
+import twitter4j.QueryResult;
+import twitter4j.Status;
+import twitter4j.Twitter;
+import twitter4j.TwitterException;
+
+public class TwitterPullRecordReader implements IRecordReader<Status> {
+
+    private String keywords;
+    private Query query;
+    private Twitter twitter;
+    private int requestInterval = 5; // seconds
+    private QueryResult result;
+    private int nextTweetIndex = 0;
+    private long lastTweetIdReceived = 0;
+    private GenericRecord<Status> record;
+
+    public TwitterPullRecordReader() {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        twitter = TwitterUtil.getTwitterService(configuration);
+        keywords = configuration.get(SearchAPIConstants.QUERY);
+        requestInterval = Integer.parseInt(configuration.get(SearchAPIConstants.INTERVAL));
+        query = new Query(keywords);
+        query.setCount(100);
+        record = new GenericRecord<Status>();
+    }
+
+    @Override
+    public boolean hasNext() throws Exception {
+        return true;
+    }
+
+    @Override
+    public IRawRecord<Status> next() throws IOException, InterruptedException {
+        if (result == null || nextTweetIndex >= result.getTweets().size()) {
+            Thread.sleep(1000 * requestInterval);
+            query.setSinceId(lastTweetIdReceived);
+            try {
+                result = twitter.search(query);
+            } catch (TwitterException e) {
+                throw new HyracksDataException(e);
+            }
+            nextTweetIndex = 0;
+        }
+        if (result != null && !result.getTweets().isEmpty()) {
+            List<Status> tw = result.getTweets();
+            Status tweet = tw.get(nextTweetIndex++);
+            if (lastTweetIdReceived < tweet.getId()) {
+                lastTweetIdReceived = tweet.getId();
+            }
+            record.set(tweet);
+            return record;
+        } else {
+            return null;
+        }
+    }
+
+    @Override
+    public Class<Status> getRecordClass() throws IOException {
+        return Status.class;
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPushRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPushRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPushRecordReader.java
new file mode 100644
index 0000000..e7c141d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/TwitterPushRecordReader.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.asterix.external.util.TwitterUtil;
+
+import twitter4j.FilterQuery;
+import twitter4j.StallWarning;
+import twitter4j.Status;
+import twitter4j.StatusDeletionNotice;
+import twitter4j.StatusListener;
+import twitter4j.TwitterStream;
+
+public class TwitterPushRecordReader implements IRecordReader<Status> {
+    private LinkedBlockingQueue<Status> inputQ;
+    private TwitterStream twitterStream;
+    private GenericRecord<Status> record;
+
+    @Override
+    public void close() throws IOException {
+        twitterStream.clearListeners();
+        twitterStream.cleanUp();
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        record = new GenericRecord<Status>();
+        inputQ = new LinkedBlockingQueue<Status>();
+        twitterStream = TwitterUtil.getTwitterStream(configuration);
+        twitterStream.addListener(new TweetListener(inputQ));
+        FilterQuery query = TwitterUtil.getFilterQuery(configuration);
+        if (query != null) {
+            twitterStream.filter(query);
+        } else {
+            twitterStream.sample();
+        }
+    }
+
+    @Override
+    public boolean hasNext() throws Exception {
+        return true;
+    }
+
+    @Override
+    public IRawRecord<Status> next() throws IOException, InterruptedException {
+        Status tweet = inputQ.poll();
+        if (tweet == null) {
+            return null;
+        }
+        record.set(tweet);
+        return record;
+    }
+
+    @Override
+    public Class<? extends Status> getRecordClass() throws IOException {
+        return Status.class;
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+    private class TweetListener implements StatusListener {
+
+        private LinkedBlockingQueue<Status> inputQ;
+
+        public TweetListener(LinkedBlockingQueue<Status> inputQ) {
+            this.inputQ = inputQ;
+        }
+
+        @Override
+        public void onStatus(Status tweet) {
+            inputQ.add(tweet);
+        }
+
+        @Override
+        public void onException(Exception arg0) {
+
+        }
+
+        @Override
+        public void onDeletionNotice(StatusDeletionNotice arg0) {
+        }
+
+        @Override
+        public void onScrubGeo(long arg0, long arg1) {
+        }
+
+        @Override
+        public void onStallWarning(StallWarning arg0) {
+        }
+
+        @Override
+        public void onTrackLimitationNotice(int arg0) {
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/HDFSLookupReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/HDFSLookupReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/HDFSLookupReaderFactory.java
new file mode 100644
index 0000000..e9fad25
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/HDFSLookupReaderFactory.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.ILookupReaderFactory;
+import org.apache.asterix.external.api.ILookupRecordReader;
+import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
+import org.apache.asterix.external.input.record.reader.RCLookupReader;
+import org.apache.asterix.external.input.record.reader.SequenceLookupReader;
+import org.apache.asterix.external.input.record.reader.TextLookupReader;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.hdfs.dataflow.ConfFactory;
+
+public class HDFSLookupReaderFactory<T> implements ILookupReaderFactory<T> {
+
+    protected static final long serialVersionUID = 1L;
+    protected transient AlgebricksPartitionConstraint clusterLocations;
+    protected ConfFactory confFactory;
+    protected Map<String, String> configuration;
+
+    public HDFSLookupReaderFactory() {
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        clusterLocations = HDFSUtils.getPartitionConstraints(clusterLocations);
+        return clusterLocations;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
+        confFactory = new ConfFactory(conf);
+
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public ILookupRecordReader<? extends T> createRecordReader(IHyracksTaskContext ctx, int partition,
+            ExternalFileIndexAccessor snapshotAccessor) throws Exception {
+        String inputFormatParameter = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim();
+        JobConf conf = confFactory.getConf();
+        FileSystem fs = FileSystem.get(conf);
+        switch (inputFormatParameter) {
+            case ExternalDataConstants.INPUT_FORMAT_TEXT:
+                return (ILookupRecordReader<? extends T>) new TextLookupReader(snapshotAccessor, fs, conf);
+            case ExternalDataConstants.INPUT_FORMAT_SEQUENCE:
+                return (ILookupRecordReader<? extends T>) new SequenceLookupReader(snapshotAccessor, fs, conf);
+            case ExternalDataConstants.INPUT_FORMAT_RC:
+                return (ILookupRecordReader<? extends T>) new RCLookupReader(snapshotAccessor, fs, conf);
+            default:
+                throw new AsterixException("Unrecognised input format: " + inputFormatParameter);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/LineRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/LineRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/LineRecordReaderFactory.java
new file mode 100644
index 0000000..05d419d
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/LineRecordReaderFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.reader.AbstractStreamRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.LineRecordReader;
+import org.apache.asterix.external.input.record.reader.QuotedLineRecordReader;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class LineRecordReaderFactory extends AbstractStreamRecordReaderFactory<char[]> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public IRecordReader<? extends char[]> createRecordReader(IHyracksTaskContext ctx, int partition) throws Exception {
+        String quoteString = configuration.get(ExternalDataConstants.KEY_QUOTE);
+        LineRecordReader recordReader;
+        if (quoteString != null) {
+            recordReader = new QuotedLineRecordReader();
+        } else {
+            recordReader = new LineRecordReader();
+        }
+        return configureReader(recordReader, ctx, partition);
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    protected void configureStreamReaderFactory(Map<String, String> configuration) throws Exception {
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/RSSRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/RSSRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/RSSRecordReaderFactory.java
new file mode 100644
index 0000000..a672f2f
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/RSSRecordReaderFactory.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.factory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.RSSRecordReader;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+import com.sun.syndication.feed.synd.SyndEntryImpl;
+
+public class RSSRecordReaderFactory implements IRecordReaderFactory<SyndEntryImpl> {
+
+    private static final long serialVersionUID = 1L;
+    private Map<String, String> configuration;
+    private List<String> urls = new ArrayList<String>();
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return new AlgebricksCountPartitionConstraint(urls.size());
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        String url = configuration.get(ExternalDataConstants.KEY_RSS_URL);
+        if (url == null) {
+            throw new IllegalArgumentException("no RSS URL provided");
+        }
+        initializeURLs(url);
+    }
+
+    private void initializeURLs(String url) {
+        urls.clear();
+        String[] rssURLs = url.split(",");
+        for (String rssURL : rssURLs) {
+            urls.add(rssURL);
+        }
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+
+    @Override
+    public IRecordReader<? extends SyndEntryImpl> createRecordReader(IHyracksTaskContext ctx, int partition)
+            throws Exception {
+        RSSRecordReader reader = new RSSRecordReader(urls.get(partition));
+        reader.configure(configuration);
+        return reader;
+    }
+
+    @Override
+    public Class<? extends SyndEntryImpl> getRecordClass() {
+        return SyndEntryImpl.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/SemiStructuredRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/SemiStructuredRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/SemiStructuredRecordReaderFactory.java
new file mode 100644
index 0000000..91b439c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/SemiStructuredRecordReaderFactory.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.reader.AbstractStreamRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.SemiStructuredRecordReader;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class SemiStructuredRecordReaderFactory extends AbstractStreamRecordReaderFactory<char[]> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public IRecordReader<? extends char[]> createRecordReader(IHyracksTaskContext ctx, int partition) throws Exception {
+        SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader();
+        return configureReader(recordReader, ctx, partition);
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    protected void configureStreamReaderFactory(Map<String, String> configuration) throws Exception {
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/TwitterRecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/TwitterRecordReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/TwitterRecordReaderFactory.java
new file mode 100644
index 0000000..72aaa37
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/factory/TwitterRecordReaderFactory.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.factory;
+
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.TwitterPullRecordReader;
+import org.apache.asterix.external.input.record.reader.TwitterPushRecordReader;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.external.util.TwitterUtil;
+import org.apache.asterix.external.util.TwitterUtil.AuthenticationConstants;
+import org.apache.asterix.external.util.TwitterUtil.SearchAPIConstants;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+import twitter4j.Status;
+
+public class TwitterRecordReaderFactory implements IRecordReaderFactory<Status> {
+
+    private static final long serialVersionUID = 1L;
+    private static final Logger LOGGER = Logger.getLogger(TwitterRecordReaderFactory.class.getName());
+
+    private static final String DEFAULT_INTERVAL = "10"; // 10 seconds
+    private static final int INTAKE_CARDINALITY = 1; // degree of parallelism at intake stage
+
+    private Map<String, String> configuration;
+    private boolean pull;
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return new AlgebricksCountPartitionConstraint(INTAKE_CARDINALITY);
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        TwitterUtil.initializeConfigurationWithAuthInfo(configuration);
+        if (!validateConfiguration(configuration)) {
+            StringBuilder builder = new StringBuilder();
+            builder.append("One or more parameters are missing from adapter configuration\n");
+            builder.append(AuthenticationConstants.OAUTH_CONSUMER_KEY + "\n");
+            builder.append(AuthenticationConstants.OAUTH_CONSUMER_SECRET + "\n");
+            builder.append(AuthenticationConstants.OAUTH_ACCESS_TOKEN + "\n");
+            builder.append(AuthenticationConstants.OAUTH_ACCESS_TOKEN_SECRET + "\n");
+            throw new Exception(builder.toString());
+        }
+        if (ExternalDataUtils.isPull(configuration)) {
+            pull = true;
+            if (configuration.get(SearchAPIConstants.QUERY) == null) {
+                throw new AsterixException(
+                        "parameter " + SearchAPIConstants.QUERY + " not specified as part of adaptor configuration");
+            }
+            String interval = configuration.get(SearchAPIConstants.INTERVAL);
+            if (interval != null) {
+                try {
+                    Integer.parseInt(interval);
+                } catch (NumberFormatException nfe) {
+                    throw new IllegalArgumentException(
+                            "parameter " + SearchAPIConstants.INTERVAL + " is defined incorrectly, expecting a number");
+                }
+            } else {
+                configuration.put(SearchAPIConstants.INTERVAL, DEFAULT_INTERVAL);
+                if (LOGGER.isLoggable(Level.WARNING)) {
+                    LOGGER.warning(" Parameter " + SearchAPIConstants.INTERVAL + " not defined, using default ("
+                            + DEFAULT_INTERVAL + ")");
+                }
+            }
+        } else if (ExternalDataUtils.isPush(configuration)) {
+            pull = false;
+        } else {
+            throw new AsterixException("One of boolean parameters " + ExternalDataConstants.KEY_PULL + " and "
+                    + ExternalDataConstants.KEY_PUSH + "must be specified as part of adaptor configuration");
+        }
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+
+    @Override
+    public IRecordReader<? extends Status> createRecordReader(IHyracksTaskContext ctx, int partition) throws Exception {
+        IRecordReader<Status> reader;
+        if (pull) {
+            reader = new TwitterPullRecordReader();
+        } else {
+            reader = new TwitterPushRecordReader();
+        }
+        reader.configure(configuration);
+        return reader;
+    }
+
+    @Override
+    public Class<? extends Status> getRecordClass() {
+        return Status.class;
+    }
+
+    private boolean validateConfiguration(Map<String, String> configuration) {
+        String consumerKey = configuration.get(AuthenticationConstants.OAUTH_CONSUMER_KEY);
+        String consumerSecret = configuration.get(AuthenticationConstants.OAUTH_CONSUMER_SECRET);
+        String accessToken = configuration.get(AuthenticationConstants.OAUTH_ACCESS_TOKEN);
+        String tokenSecret = configuration.get(AuthenticationConstants.OAUTH_ACCESS_TOKEN_SECRET);
+        if (consumerKey == null || consumerSecret == null || accessToken == null || tokenSecret == null) {
+            return false;
+        }
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStream.java
new file mode 100644
index 0000000..73f6195
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStream.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.InputStream;
+
+public abstract class AInputStream extends InputStream {
+    public abstract boolean skipError() throws Exception;
+
+    public abstract boolean stop() throws Exception;
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStreamReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStreamReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStreamReader.java
new file mode 100644
index 0000000..e573f74
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AInputStreamReader.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.InputStreamReader;
+
+public class AInputStreamReader extends InputStreamReader {
+    private AInputStream in;
+
+    public AInputStreamReader(AInputStream in) {
+        super(in);
+        this.in = in;
+    }
+
+    public boolean skipError() throws Exception {
+        return in.skipError();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/BasicInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/BasicInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/BasicInputStream.java
new file mode 100644
index 0000000..aa7a3d8
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/BasicInputStream.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class BasicInputStream extends AInputStream {
+    private final InputStream in;
+
+    public BasicInputStream(InputStream in) {
+        this.in = in;
+    }
+
+    @Override
+    public int read() throws IOException {
+        return in.read();
+    }
+
+    @Override
+    public int read(byte b[]) throws IOException {
+        return in.read(b);
+    }
+
+    @Override
+    public int read(byte b[], int off, int len) throws IOException {
+        return in.read(b, off, len);
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        return in.skip(n);
+
+    }
+
+    @Override
+    public int available() throws IOException {
+        return in.available();
+    }
+
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+
+    @Override
+    public synchronized void mark(int readlimit) {
+        in.mark(readlimit);
+    }
+
+    @Override
+    public synchronized void reset() throws IOException {
+        in.reset();
+    }
+
+    @Override
+    public boolean markSupported() {
+        return in.markSupported();
+    }
+
+    @Override
+    public boolean skipError() {
+        return false;
+    }
+
+    @Override
+    public boolean stop() throws Exception {
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/HDFSInputStreamProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/HDFSInputStreamProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/HDFSInputStreamProvider.java
new file mode 100644
index 0000000..b3ad1c3
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/HDFSInputStreamProvider.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.input.record.reader.HDFSRecordReader;
+import org.apache.asterix.external.provider.ExternalIndexerProvider;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+public class HDFSInputStreamProvider<K> extends HDFSRecordReader<K, Text> implements IInputStreamProvider {
+
+    public HDFSInputStreamProvider(boolean read[], InputSplit[] inputSplits, String[] readSchedule, String nodeName,
+            JobConf conf, Map<String, String> configuration, List<ExternalFile> snapshot) throws Exception {
+        super(read, inputSplits, readSchedule, nodeName, conf);
+        value = new Text();
+        configure(configuration);
+        if (snapshot != null) {
+            setSnapshot(snapshot);
+            setIndexer(ExternalIndexerProvider.getIndexer(configuration));
+            if (currentSplitIndex < snapshot.size()) {
+                indexer.reset(this);
+            }
+        }
+    }
+
+    @Override
+    public AInputStream getInputStream() throws Exception {
+        return new HDFSInputStream();
+    }
+
+    private class HDFSInputStream extends AInputStream {
+        int pos = 0;
+
+        @Override
+        public int read() throws IOException {
+            if (value.getLength() < pos) {
+                if (!readMore()) {
+                    return -1;
+                }
+            } else if (value.getLength() == pos) {
+                pos++;
+                return ExternalDataConstants.EOL;
+            }
+            return value.getBytes()[pos++];
+        }
+
+        private int readRecord(byte[] buffer, int offset, int len) {
+            int actualLength = value.getLength() + 1;
+            if ((actualLength - pos) > len) {
+                //copy partial record
+                System.arraycopy(value.getBytes(), pos, buffer, offset, len);
+                pos += len;
+                return len;
+            } else {
+                int numBytes = value.getLength() - pos;
+                System.arraycopy(value.getBytes(), pos, buffer, offset, numBytes);
+                buffer[offset + numBytes] = ExternalDataConstants.LF;
+                pos += numBytes;
+                numBytes++;
+                return numBytes;
+            }
+        }
+
+        @Override
+        public int read(byte[] buffer, int offset, int len) throws IOException {
+            if (value.getLength() > pos) {
+                return readRecord(buffer, offset, len);
+            }
+            if (!readMore()) {
+                return -1;
+            }
+            return readRecord(buffer, offset, len);
+        }
+
+        private boolean readMore() throws IOException {
+            try {
+                pos = 0;
+                return HDFSInputStreamProvider.this.hasNext();
+            } catch (Exception e) {
+                throw new IOException(e);
+            }
+        }
+
+        @Override
+        public boolean skipError() throws Exception {
+            return true;
+        }
+
+        @Override
+        public boolean stop() throws Exception {
+            return false;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStreamProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStreamProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStreamProvider.java
new file mode 100644
index 0000000..b511617
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/LocalFSInputStreamProvider.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.dataflow.std.file.FileSplit;
+
+public class LocalFSInputStreamProvider implements IInputStreamProvider {
+
+    private FileSplit[] fileSplits;
+    private int partition;
+
+    public LocalFSInputStreamProvider(FileSplit[] fileSplits, IHyracksTaskContext ctx,
+            Map<String, String> configuration, int partition) {
+        this.partition = partition;
+        this.fileSplits = fileSplits;
+    }
+
+    @Override
+    public AInputStream getInputStream() throws Exception {
+        FileSplit split = fileSplits[partition];
+        File inputFile = split.getLocalFile().getFile();
+        InputStream in;
+        try {
+            in = new FileInputStream(inputFile);
+            return new BasicInputStream(in);
+        } catch (FileNotFoundException e) {
+            throw new IOException(e);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStream.java
new file mode 100644
index 0000000..2253a73
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStream.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.ServerSocket;
+import java.net.Socket;
+
+public class SocketInputStream extends AInputStream {
+    private ServerSocket server;
+    private Socket socket;
+    private InputStream connectionStream;
+
+    public SocketInputStream(ServerSocket server) throws IOException {
+        this.server = server;
+        socket = server.accept();
+        connectionStream = socket.getInputStream();
+    }
+
+    @Override
+    public int read() throws IOException {
+        int read = connectionStream.read();
+        while (read < 0) {
+            accept();
+            read = connectionStream.read();
+        }
+        return read;
+    }
+
+    @Override
+    public boolean skipError() throws Exception {
+        accept();
+        return true;
+    }
+
+    @Override
+    public int read(byte b[]) throws IOException {
+        int read = connectionStream.read(b, 0, b.length);
+        while (read < 0) {
+            accept();
+            read = connectionStream.read(b, 0, b.length);
+        }
+        return read;
+    }
+
+    @Override
+    public int read(byte b[], int off, int len) throws IOException {
+        int read = connectionStream.read(b, off, len);
+        while (read < 0) {
+            accept();
+            read = connectionStream.read(b, off, len);
+        }
+        return read;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        return 0;
+    }
+
+    @Override
+    public int available() throws IOException {
+        return 1;
+    }
+
+    @Override
+    public void close() throws IOException {
+        connectionStream.close();
+        socket.close();
+        server.close();
+    }
+
+    private void accept() throws IOException {
+        connectionStream.close();
+        socket.close();
+        socket = server.accept();
+        connectionStream = socket.getInputStream();
+    }
+
+    @Override
+    public boolean stop() throws Exception {
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStreamProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStreamProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStreamProvider.java
new file mode 100644
index 0000000..1f920e9
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/SocketInputStreamProvider.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.net.ServerSocket;
+
+import org.apache.asterix.external.api.IInputStreamProvider;
+
+public class SocketInputStreamProvider implements IInputStreamProvider {
+    private ServerSocket server;
+
+    public SocketInputStreamProvider(ServerSocket server) {
+        this.server = server;
+    }
+
+    @Override
+    public AInputStream getInputStream() throws Exception {
+        return new SocketInputStream(server);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/TwitterFirehoseInputStreamProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/TwitterFirehoseInputStreamProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/TwitterFirehoseInputStreamProvider.java
new file mode 100644
index 0000000..d32a94f
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/TwitterFirehoseInputStreamProvider.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.runtime.TweetGenerator;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class TwitterFirehoseInputStreamProvider implements IInputStreamProvider {
+
+    private static final Logger LOGGER = Logger.getLogger(TwitterFirehoseInputStreamProvider.class.getName());
+
+    private ExecutorService executorService;
+
+    private PipedOutputStream outputStream;
+
+    private PipedInputStream inputStream;
+
+    private TwitterServer twitterServer;
+
+    public TwitterFirehoseInputStreamProvider(Map<String, String> configuration, IHyracksTaskContext ctx, int partition)
+            throws Exception {
+        executorService = Executors.newCachedThreadPool();
+        outputStream = new PipedOutputStream();
+        inputStream = new PipedInputStream(outputStream);
+        twitterServer = new TwitterServer(configuration, partition, outputStream, executorService, inputStream);
+    }
+
+    @Override
+    public AInputStream getInputStream() throws Exception {
+        twitterServer.start();
+        return twitterServer;
+    }
+
+    private static class TwitterServer extends AInputStream {
+        private final DataProvider dataProvider;
+        private final ExecutorService executorService;
+        private InputStream in;
+        private boolean started;
+
+        public TwitterServer(Map<String, String> configuration, int partition, OutputStream os,
+                ExecutorService executorService, InputStream in) throws Exception {
+            dataProvider = new DataProvider(configuration, partition, os);
+            this.executorService = executorService;
+            this.in = in;
+            this.started = false;
+        }
+
+        @Override
+        public boolean stop() throws IOException {
+            dataProvider.stop();
+            return true;
+        }
+
+        public void start() {
+            executorService.execute(dataProvider);
+        }
+
+        @Override
+        public boolean skipError() throws Exception {
+            return false;
+        }
+
+        @Override
+        public int read() throws IOException {
+            if (!started) {
+                start();
+                started = true;
+            }
+            return in.read();
+        }
+
+        @Override
+        public int read(byte b[], int off, int len) throws IOException {
+            if (!started) {
+                start();
+                started = true;
+            }
+            return in.read(b, off, len);
+        }
+    }
+
+    private static class DataProvider implements Runnable {
+
+        public static final String KEY_MODE = "mode";
+
+        private TweetGenerator tweetGenerator;
+        private boolean continuePush = true;
+        private int batchSize;
+        private final Mode mode;
+        private final OutputStream os;
+
+        public static enum Mode {
+            AGGRESSIVE,
+            CONTROLLED
+        }
+
+        public DataProvider(Map<String, String> configuration, int partition, OutputStream os) throws Exception {
+            this.tweetGenerator = new TweetGenerator(configuration, partition);
+            this.tweetGenerator.registerSubscriber(os);
+            this.os = os;
+            mode = configuration.get(KEY_MODE) != null ? Mode.valueOf(configuration.get(KEY_MODE).toUpperCase())
+                    : Mode.AGGRESSIVE;
+            switch (mode) {
+                case CONTROLLED:
+                    String tpsValue = configuration.get(TweetGenerator.KEY_TPS);
+                    if (tpsValue == null) {
+                        throw new IllegalArgumentException("TPS value not configured. use tps=<value>");
+                    }
+                    batchSize = Integer.parseInt(tpsValue);
+                    break;
+                case AGGRESSIVE:
+                    batchSize = 5000;
+                    break;
+            }
+        }
+
+        @Override
+        public void run() {
+            boolean moreData = true;
+            long startBatch;
+            long endBatch;
+
+            while (true) {
+                try {
+                    while (moreData && continuePush) {
+                        switch (mode) {
+                            case AGGRESSIVE:
+                                moreData = tweetGenerator.generateNextBatch(batchSize);
+                                break;
+                            case CONTROLLED:
+                                startBatch = System.currentTimeMillis();
+                                moreData = tweetGenerator.generateNextBatch(batchSize);
+                                endBatch = System.currentTimeMillis();
+                                if (endBatch - startBatch < 1000) {
+                                    Thread.sleep(1000 - (endBatch - startBatch));
+                                }
+                                break;
+                        }
+                    }
+                    os.close();
+                    break;
+                } catch (Exception e) {
+                    if (LOGGER.isLoggable(Level.WARNING)) {
+                        LOGGER.warning("Exception in adaptor " + e.getMessage());
+                    }
+                }
+            }
+        }
+
+        public void stop() {
+            continuePush = false;
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/LocalFSInputStreamProviderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/LocalFSInputStreamProviderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/LocalFSInputStreamProviderFactory.java
new file mode 100644
index 0000000..14c712a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/LocalFSInputStreamProviderFactory.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream.factory;
+
+import java.io.File;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.INodeResolver;
+import org.apache.asterix.external.api.INodeResolverFactory;
+import org.apache.asterix.external.input.stream.LocalFSInputStreamProvider;
+import org.apache.asterix.external.util.DNSResolverFactory;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.io.FileReference;
+import org.apache.hyracks.dataflow.std.file.FileSplit;
+
+public class LocalFSInputStreamProviderFactory implements IInputStreamProviderFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    protected static final INodeResolver DEFAULT_NODE_RESOLVER = new DNSResolverFactory().createNodeResolver();
+    protected static final Logger LOGGER = Logger.getLogger(LocalFSInputStreamProviderFactory.class.getName());
+    protected static INodeResolver nodeResolver;
+    protected Map<String, String> configuration;
+    protected FileSplit[] fileSplits;
+
+    @Override
+    public IInputStreamProvider createInputStreamProvider(IHyracksTaskContext ctx, int partition) throws Exception {
+        return new LocalFSInputStreamProvider(fileSplits, ctx, configuration, partition);
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.STREAM;
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+        String[] splits = configuration.get(ExternalDataConstants.KEY_PATH).split(",");
+        configureFileSplits(splits);
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return configurePartitionConstraint();
+    }
+
+    private void configureFileSplits(String[] splits) throws AsterixException {
+        if (fileSplits == null) {
+            fileSplits = new FileSplit[splits.length];
+            String nodeName;
+            String nodeLocalPath;
+            int count = 0;
+            String trimmedValue;
+            for (String splitPath : splits) {
+                trimmedValue = splitPath.trim();
+                if (!trimmedValue.contains("://")) {
+                    throw new AsterixException(
+                            "Invalid path: " + splitPath + "\nUsage- path=\"Host://Absolute File Path\"");
+                }
+                nodeName = trimmedValue.split(":")[0];
+                nodeLocalPath = trimmedValue.split("://")[1];
+                FileSplit fileSplit = new FileSplit(nodeName, new FileReference(new File(nodeLocalPath)));
+                fileSplits[count++] = fileSplit;
+            }
+        }
+    }
+
+    private AlgebricksPartitionConstraint configurePartitionConstraint() throws AsterixException {
+        String[] locs = new String[fileSplits.length];
+        String location;
+        for (int i = 0; i < fileSplits.length; i++) {
+            location = getNodeResolver().resolveNode(fileSplits[i].getNodeName());
+            locs[i] = location;
+        }
+        return new AlgebricksAbsolutePartitionConstraint(locs);
+    }
+
+    protected INodeResolver getNodeResolver() {
+        if (nodeResolver == null) {
+            synchronized (DEFAULT_NODE_RESOLVER) {
+                if (nodeResolver == null) {
+                    nodeResolver = initializeNodeResolver();
+                }
+            }
+        }
+        return nodeResolver;
+    }
+
+    private static INodeResolver initializeNodeResolver() {
+        INodeResolver nodeResolver = null;
+        String configuredNodeResolverFactory = System.getProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY);
+        if (configuredNodeResolverFactory != null) {
+            try {
+                nodeResolver = ((INodeResolverFactory) (Class.forName(configuredNodeResolverFactory).newInstance()))
+                        .createNodeResolver();
+
+            } catch (Exception e) {
+                if (LOGGER.isLoggable(Level.WARNING)) {
+                    LOGGER.log(Level.WARNING, "Unable to create node resolver from the configured classname "
+                            + configuredNodeResolverFactory + "\n" + e.getMessage());
+                }
+                nodeResolver = DEFAULT_NODE_RESOLVER;
+            }
+        } else {
+            nodeResolver = DEFAULT_NODE_RESOLVER;
+        }
+        return nodeResolver;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/SocketInputStreamProviderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/SocketInputStreamProviderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/SocketInputStreamProviderFactory.java
new file mode 100644
index 0000000..37afa53
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/SocketInputStreamProviderFactory.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream.factory;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.ServerSocket;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.input.stream.SocketInputStreamProvider;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.om.util.AsterixRuntimeUtil;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class SocketInputStreamProviderFactory implements IInputStreamProviderFactory {
+
+    private static final long serialVersionUID = 1L;
+    private List<Pair<String, Integer>> sockets;
+    private Mode mode = Mode.IP;
+
+    public static enum Mode {
+        NC,
+        IP
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        sockets = new ArrayList<Pair<String, Integer>>();
+        String modeValue = configuration.get(ExternalDataConstants.KEY_MODE);
+        if (modeValue != null) {
+            mode = Mode.valueOf(modeValue.trim().toUpperCase());
+        }
+        String socketsValue = configuration.get(ExternalDataConstants.KEY_SOCKETS);
+        if (socketsValue == null) {
+            throw new IllegalArgumentException("\'sockets\' parameter not specified as part of adapter configuration");
+        }
+        Map<InetAddress, Set<String>> ncMap = AsterixRuntimeUtil.getNodeControllerMap();
+        List<String> ncs = AsterixRuntimeUtil.getAllNodeControllers();
+        String[] socketsArray = socketsValue.split(",");
+        Random random = new Random();
+        for (String socket : socketsArray) {
+            String[] socketTokens = socket.split(":");
+            String host = socketTokens[0].trim();
+            int port = Integer.parseInt(socketTokens[1].trim());
+            Pair<String, Integer> p = null;
+            switch (mode) {
+                case IP:
+                    Set<String> ncsOnIp = ncMap.get(InetAddress.getByName(host));
+                    if (ncsOnIp == null || ncsOnIp.isEmpty()) {
+                        throw new IllegalArgumentException("Invalid host " + host
+                                + " as it is not part of the AsterixDB cluster. Valid choices are "
+                                + StringUtils.join(ncMap.keySet(), ", "));
+                    }
+                    String[] ncArray = ncsOnIp.toArray(new String[] {});
+                    String nc = ncArray[random.nextInt(ncArray.length)];
+                    p = new Pair<String, Integer>(nc, port);
+                    break;
+
+                case NC:
+                    p = new Pair<String, Integer>(host, port);
+                    if (!ncs.contains(host)) {
+                        throw new IllegalArgumentException(
+                                "Invalid NC " + host + " as it is not part of the AsterixDB cluster. Valid choices are "
+                                        + StringUtils.join(ncs, ", "));
+
+                    }
+                    break;
+            }
+            sockets.add(p);
+        }
+    }
+
+    @Override
+    public synchronized IInputStreamProvider createInputStreamProvider(IHyracksTaskContext ctx, int partition)
+            throws IOException, AsterixException {
+        Pair<String, Integer> socket = sockets.get(partition);
+        ServerSocket server = new ServerSocket(socket.second);
+        return new SocketInputStreamProvider(server);
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() {
+        List<String> locations = new ArrayList<String>();
+        for (Pair<String, Integer> socket : sockets) {
+            locations.add(socket.first);
+        }
+        return new AlgebricksAbsolutePartitionConstraint(locations.toArray(new String[] {}));
+    }
+
+    public List<Pair<String, Integer>> getSockets() {
+        return sockets;
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.STREAM;
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/TwitterFirehoseStreamProviderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/TwitterFirehoseStreamProviderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/TwitterFirehoseStreamProviderFactory.java
new file mode 100644
index 0000000..b86c294
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/factory/TwitterFirehoseStreamProviderFactory.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.stream.factory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.om.util.AsterixClusterProperties;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+/**
+ * Factory class for creating @see{TwitterFirehoseFeedAdapter}. The adapter
+ * simulates a twitter firehose with tweets being "pushed" into Asterix at a
+ * configurable rate measured in terms of TPS (tweets/second). The stream of
+ * tweets lasts for a configurable duration (measured in seconds).
+ */
+public class TwitterFirehoseStreamProviderFactory implements IInputStreamProviderFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * Degree of parallelism for feed ingestion activity. Defaults to 1. This
+     * determines the count constraint for the ingestion operator.
+     **/
+    private static final String KEY_INGESTION_CARDINALITY = "ingestion-cardinality";
+
+    /**
+     * The absolute locations where ingestion operator instances will be placed.
+     **/
+    private static final String KEY_INGESTION_LOCATIONS = "ingestion-location";
+
+    private Map<String, String> configuration;
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        String ingestionCardinalityParam = configuration.get(KEY_INGESTION_CARDINALITY);
+        String ingestionLocationParam = configuration.get(KEY_INGESTION_LOCATIONS);
+        String[] locations = null;
+        if (ingestionLocationParam != null) {
+            locations = ingestionLocationParam.split(",");
+        }
+        int count = locations != null ? locations.length : 1;
+        if (ingestionCardinalityParam != null) {
+            count = Integer.parseInt(ingestionCardinalityParam);
+        }
+
+        List<String> chosenLocations = new ArrayList<String>();
+        String[] availableLocations = locations != null ? locations
+                : AsterixClusterProperties.INSTANCE.getParticipantNodes().toArray(new String[] {});
+        for (int i = 0, k = 0; i < count; i++, k = (k + 1) % availableLocations.length) {
+            chosenLocations.add(availableLocations[k]);
+        }
+        return new AlgebricksAbsolutePartitionConstraint(chosenLocations.toArray(new String[] {}));
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.STREAM;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public boolean isIndexible() {
+        return false;
+    }
+
+    @Override
+    public IInputStreamProvider createInputStreamProvider(IHyracksTaskContext ctx, int partition) throws Exception {
+        return null;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunction.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunction.java
index 14f831b..e9c15cb 100755
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunction.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalFunction.java
@@ -21,6 +21,9 @@ package org.apache.asterix.external.library;
 import java.io.IOException;
 
 import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import org.apache.asterix.om.functions.IExternalFunctionInfo;
 import org.apache.asterix.om.types.ATypeTag;



[15/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
deleted file mode 100644
index d7fa4f2..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.io.Writable;
-
-import org.apache.asterix.builders.IARecordBuilder;
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.builders.RecordBuilder;
-import org.apache.asterix.builders.UnorderedListBuilder;
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.AUnorderedListType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.util.string.UTF8StringWriter;
-
-@SuppressWarnings("deprecation")
-public class HiveObjectParser implements IAsterixHDFSRecordParser {
-
-    private static final String KEY_HIVE_SERDE = "hive-serde";
-    private ARecordType aRecord;
-    private SerDe hiveSerde;
-    private StructObjectInspector oi;
-    private IARecordBuilder recBuilder;
-    private ArrayBackedValueStorage fieldValueBuffer;
-    private ArrayBackedValueStorage listItemBuffer;
-    private byte[] fieldTypeTags;
-    private IAType[] fieldTypes;
-    private OrderedListBuilder orderedListBuilder;
-    private UnorderedListBuilder unorderedListBuilder;
-    private boolean initialized = false;
-    private List<StructField> fieldRefs;
-    private UTF8StringWriter utf8Writer = new UTF8StringWriter();
-
-    @SuppressWarnings({ "unchecked" })
-    @Override
-    public void initialize(ARecordType record, Map<String, String> arguments, Configuration hadoopConfig)
-            throws Exception {
-        if (!initialized) {
-            this.aRecord = record;
-            int n = record.getFieldNames().length;
-            fieldTypes = record.getFieldTypes();
-
-            //create the hive table schema.
-            Properties tbl = new Properties();
-            tbl.put(Constants.LIST_COLUMNS, getCommaDelimitedColNames(record));
-            tbl.put(Constants.LIST_COLUMN_TYPES, getColTypes(record));
-            String hiveSerdeClassName = (String) arguments.get(KEY_HIVE_SERDE);
-            if (hiveSerdeClassName == null) {
-                throw new IllegalArgumentException("no hive serde provided for hive deserialized records");
-            }
-            hiveSerde = (SerDe) Class.forName(hiveSerdeClassName).newInstance();
-            hiveSerde.initialize(hadoopConfig, tbl);
-            oi = (StructObjectInspector) hiveSerde.getObjectInspector();
-
-            fieldValueBuffer = new ArrayBackedValueStorage();
-            recBuilder = new RecordBuilder();
-            recBuilder.reset(record);
-            recBuilder.init();
-            fieldTypeTags = new byte[n];
-            for (int i = 0; i < n; i++) {
-                ATypeTag tag = record.getFieldTypes()[i].getTypeTag();
-                fieldTypeTags[i] = tag.serialize();
-            }
-            fieldRefs = (List<StructField>) oi.getAllStructFieldRefs();
-            initialized = true;
-        }
-    }
-
-    private Object getColTypes(ARecordType record) throws Exception {
-        int n = record.getFieldTypes().length;
-        if (n < 1) {
-            throw new HyracksDataException("Failed to get columns of record");
-        }
-        ATypeTag tag = null;
-
-        //First Column
-        if (record.getFieldTypes()[0].getTypeTag() == ATypeTag.UNION) {
-            if (NonTaggedFormatUtil.isOptional(record.getFieldTypes()[0])) {
-                throw new NotImplementedException("Non-optional UNION type is not supported.");
-            }
-            tag = ((AUnionType) record.getFieldTypes()[0]).getNullableType().getTypeTag();
-        } else {
-            tag = record.getFieldTypes()[0].getTypeTag();
-        }
-        if (tag == null) {
-            throw new NotImplementedException("Failed to get the type information for field " + 0 + ".");
-        }
-        String cols = getHiveTypeString(tag);
-
-        for (int i = 1; i < n; i++) {
-            tag = null;
-            if (record.getFieldTypes()[i].getTypeTag() == ATypeTag.UNION) {
-                if (NonTaggedFormatUtil.isOptional(record.getFieldTypes()[i])) {
-                    throw new NotImplementedException("Non-optional UNION type is not supported.");
-                }
-                tag = ((AUnionType) record.getFieldTypes()[i]).getNullableType().getTypeTag();
-            } else {
-                tag = record.getFieldTypes()[i].getTypeTag();
-            }
-            if (tag == null) {
-                throw new NotImplementedException("Failed to get the type information for field " + i + ".");
-            }
-            cols = cols + "," + getHiveTypeString(tag);
-        }
-        return cols;
-    }
-
-    private String getCommaDelimitedColNames(ARecordType record) throws Exception {
-        if (record.getFieldNames().length < 1) {
-            throw new HyracksDataException("Can't deserialize hive records with no closed columns");
-        }
-
-        String cols = record.getFieldNames()[0];
-        for (int i = 1; i < record.getFieldNames().length; i++) {
-            cols = cols + "," + record.getFieldNames()[i];
-        }
-        return cols;
-    }
-
-    private String getHiveTypeString(ATypeTag tag) throws Exception {
-        switch (tag) {
-            case BOOLEAN:
-                return Constants.BOOLEAN_TYPE_NAME;
-            case DATE:
-                return Constants.DATE_TYPE_NAME;
-            case DATETIME:
-                return Constants.DATETIME_TYPE_NAME;
-            case DOUBLE:
-                return Constants.DOUBLE_TYPE_NAME;
-            case FLOAT:
-                return Constants.FLOAT_TYPE_NAME;
-            case INT16:
-                return Constants.SMALLINT_TYPE_NAME;
-            case INT32:
-                return Constants.INT_TYPE_NAME;
-            case INT64:
-                return Constants.BIGINT_TYPE_NAME;
-            case INT8:
-                return Constants.TINYINT_TYPE_NAME;
-            case ORDEREDLIST:
-                return Constants.LIST_TYPE_NAME;
-            case STRING:
-                return Constants.STRING_TYPE_NAME;
-            case TIME:
-                return Constants.DATETIME_TYPE_NAME;
-            case UNORDEREDLIST:
-                return Constants.LIST_TYPE_NAME;
-            default:
-                throw new HyracksDataException("Can't get hive type for field of type " + tag);
-        }
-    }
-
-    @Override
-    public void parse(Object object, DataOutput output) throws Exception {
-        if (object == null) {
-            throw new HyracksDataException("Hive parser can't parse null objects");
-        }
-        Object hiveObject = hiveSerde.deserialize((Writable) object);
-        int n = aRecord.getFieldNames().length;
-        List<Object> attributesValues = oi.getStructFieldsDataAsList(hiveObject);
-        recBuilder.reset(aRecord);
-        recBuilder.init();
-        for (int i = 0; i < n; i++) {
-            fieldValueBuffer.reset();
-            fieldValueBuffer.getDataOutput().writeByte(fieldTypeTags[i]);
-            ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector();
-            //get field type
-            switch (fieldTypes[i].getTypeTag()) {
-                case BOOLEAN:
-                    parseBoolean(attributesValues.get(i), (BooleanObjectInspector) foi,
-                            fieldValueBuffer.getDataOutput());
-                    break;
-                case TIME:
-                    parseTime(attributesValues.get(i), (TimestampObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case DATE:
-                    parseDate(attributesValues.get(i), (TimestampObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case DATETIME:
-                    parseDateTime(attributesValues.get(i), (TimestampObjectInspector) foi,
-                            fieldValueBuffer.getDataOutput());
-                    break;
-                case DOUBLE:
-                    parseDouble(attributesValues.get(i), (DoubleObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case FLOAT:
-                    parseFloat(attributesValues.get(i), (FloatObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case INT8:
-                    parseInt8(attributesValues.get(i), (ByteObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case INT16:
-                    parseInt16(attributesValues.get(i), (ShortObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case INT32:
-                    parseInt32(attributesValues.get(i), (IntObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case INT64:
-                    parseInt64(attributesValues.get(i), (LongObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case STRING:
-                    parseString(attributesValues.get(i), (StringObjectInspector) foi, fieldValueBuffer.getDataOutput());
-                    break;
-                case ORDEREDLIST:
-                    parseOrderedList((AOrderedListType) fieldTypes[i], attributesValues.get(i),
-                            (ListObjectInspector) foi);
-                    break;
-                case UNORDEREDLIST:
-                    parseUnorderedList((AUnorderedListType) fieldTypes[i], attributesValues.get(i),
-                            (ListObjectInspector) foi);
-                    break;
-                default:
-                    throw new HyracksDataException("Can't get hive type for field of type "
-                            + fieldTypes[i].getTypeTag());
-            }
-            recBuilder.addField(i, fieldValueBuffer);
-        }
-        recBuilder.write(output, true);
-    }
-
-    private void parseInt64(Object obj, LongObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeLong(foi.get(obj));
-    }
-
-    private void parseInt32(Object obj, IntObjectInspector foi, DataOutput dataOutput) throws IOException {
-        if (obj == null) {
-            throw new HyracksDataException("can't parse null field");
-        }
-        dataOutput.writeInt(foi.get(obj));
-    }
-
-    private void parseInt16(Object obj, ShortObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeShort(foi.get(obj));
-    }
-
-    private void parseFloat(Object obj, FloatObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeFloat(foi.get(obj));
-    }
-
-    private void parseDouble(Object obj, DoubleObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeDouble(foi.get(obj));
-    }
-
-    private void parseDateTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeLong(foi.getPrimitiveJavaObject(obj).getTime());
-    }
-
-    private void parseDate(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
-        long chrononTimeInMs = foi.getPrimitiveJavaObject(obj).getTime();
-        short temp = 0;
-        if (chrononTimeInMs < 0 && chrononTimeInMs % GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
-            temp = 1;
-        }
-        dataOutput.writeInt((int) (chrononTimeInMs / GregorianCalendarSystem.CHRONON_OF_DAY) - temp);
-    }
-
-    private void parseBoolean(Object obj, BooleanObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeBoolean(foi.get(obj));
-    }
-
-    private void parseInt8(Object obj, ByteObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeByte(foi.get(obj));
-    }
-
-    private void parseString(Object obj, StringObjectInspector foi, DataOutput dataOutput) throws IOException {
-        utf8Writer.writeUTF8(foi.getPrimitiveJavaObject(obj), dataOutput);
-    }
-
-    private void parseTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeInt((int) (foi.getPrimitiveJavaObject(obj).getTime() % 86400000));
-    }
-
-    private void parseOrderedList(AOrderedListType aOrderedListType, Object obj, ListObjectInspector foi)
-            throws IOException {
-        OrderedListBuilder orderedListBuilder = getOrderedListBuilder();
-        IAType itemType = null;
-        if (aOrderedListType != null)
-            itemType = aOrderedListType.getItemType();
-        orderedListBuilder.reset(aOrderedListType);
-
-        int n = foi.getListLength(obj);
-        for (int i = 0; i < n; i++) {
-            Object element = foi.getListElement(obj, i);
-            ObjectInspector eoi = foi.getListElementObjectInspector();
-            if (element == null) {
-                throw new HyracksDataException("can't parse hive list with null values");
-            }
-
-            parseHiveListItem(element, eoi, listItemBuffer, itemType);
-            orderedListBuilder.addItem(listItemBuffer);
-        }
-        orderedListBuilder.write(fieldValueBuffer.getDataOutput(), true);
-    }
-
-    private void parseUnorderedList(AUnorderedListType uoltype, Object obj, ListObjectInspector oi) throws IOException,
-            AsterixException {
-        UnorderedListBuilder unorderedListBuilder = getUnorderedListBuilder();
-        IAType itemType = null;
-        if (uoltype != null)
-            itemType = uoltype.getItemType();
-        byte tagByte = itemType.getTypeTag().serialize();
-        unorderedListBuilder.reset(uoltype);
-
-        int n = oi.getListLength(obj);
-        for (int i = 0; i < n; i++) {
-            Object element = oi.getListElement(obj, i);
-            ObjectInspector eoi = oi.getListElementObjectInspector();
-            if (element == null) {
-                throw new HyracksDataException("can't parse hive list with null values");
-            }
-            listItemBuffer.reset();
-            listItemBuffer.getDataOutput().writeByte(tagByte);
-            parseHiveListItem(element, eoi, listItemBuffer, itemType);
-            unorderedListBuilder.addItem(listItemBuffer);
-        }
-        unorderedListBuilder.write(fieldValueBuffer.getDataOutput(), true);
-    }
-
-    private void parseHiveListItem(Object obj, ObjectInspector eoi, ArrayBackedValueStorage fieldValueBuffer,
-            IAType itemType) throws IOException {
-        //get field type
-        switch (itemType.getTypeTag()) {
-            case BOOLEAN:
-                parseBoolean(obj, (BooleanObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case TIME:
-                parseTime(obj, (TimestampObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case DATE:
-                parseDate(obj, (TimestampObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case DATETIME:
-                parseDateTime(obj, (TimestampObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case DOUBLE:
-                parseDouble(obj, (DoubleObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case FLOAT:
-                parseFloat(obj, (FloatObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case INT8:
-                parseInt8(obj, (ByteObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case INT16:
-                parseInt16(obj, (ShortObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case INT32:
-                parseInt32(obj, (IntObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case INT64:
-                parseInt64(obj, (LongObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            case STRING:
-                parseString(obj, (StringObjectInspector) eoi, fieldValueBuffer.getDataOutput());
-                break;
-            default:
-                throw new HyracksDataException("doesn't support hive data with list of non-primitive types");
-        }
-    }
-
-    private OrderedListBuilder getOrderedListBuilder() {
-        if (orderedListBuilder != null)
-            return orderedListBuilder;
-        else {
-            orderedListBuilder = new OrderedListBuilder();
-            return orderedListBuilder;
-        }
-    }
-
-    private UnorderedListBuilder getUnorderedListBuilder() {
-        if (unorderedListBuilder != null)
-            return unorderedListBuilder;
-        else {
-            unorderedListBuilder = new UnorderedListBuilder();
-            return unorderedListBuilder;
-        }
-    }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IAsterixHDFSRecordParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IAsterixHDFSRecordParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IAsterixHDFSRecordParser.java
deleted file mode 100644
index ff5bc27..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IAsterixHDFSRecordParser.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataOutput;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-
-import org.apache.asterix.om.types.ARecordType;
-
-/**
- * This interface is provided for users to implements in order to support their own
- * it should be included sometimes in the future in the external library
- * input parsing
- * @author alamouda
- *
- */
-public interface IAsterixHDFSRecordParser {
-
-    /**
-     * This method is called once upon creating the serde before starting to parse objects
-     * @param record
-     *  The description of the expected dataset record.
-     * @param arguments
-     *  The arguments passed when creating the external dataset
-     */
-    public void initialize(ARecordType record, Map<String, String> arguments, Configuration hadoopConfig) throws Exception;
-    
-    /**
-     * This function takes an object, parse it and then serialize it into an adm record in the output buffer
-     * @param object
-     *  the serialized I/O object
-     * @param output
-     *  output buffer where deserialized object need to be serialized
-     */
-    public void parse(Object object, DataOutput output) throws Exception;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParser.java
deleted file mode 100644
index c8fc6c2..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParser.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-/**
- * This interface is to be implemented by parsers used in a pipelined hyracks job where input is not ready all at once
- */
-public interface IControlledTupleParser {
-    /**
-     * This function should flush the tuples setting in the frame writer buffer
-     * and free all resources
-     */
-    public void close(IFrameWriter writer) throws Exception;
-
-    /**
-     * This function is called when there are more data ready for parsing in the input stream
-     * @param writer
-     *          a frame writer that is used to push outgoig frames 
-     * @param frameBuffer 
-     *          a frame buffer containing the incoming tuples, used for propagating fields.
-     */
-    public void parseNext(IFrameWriter writer, ByteBuffer frameBuffer) throws HyracksDataException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParserFactory.java
deleted file mode 100644
index 52d5123..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IControlledTupleParserFactory.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-public interface IControlledTupleParserFactory {
-    public IControlledTupleParser createTupleParser();
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IndexingScheduler.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IndexingScheduler.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IndexingScheduler.java
deleted file mode 100644
index 2a51380..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/IndexingScheduler.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.PriorityQueue;
-import java.util.Random;
-import java.util.logging.Logger;
-
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hyracks.api.client.HyracksConnection;
-import org.apache.hyracks.api.client.IHyracksClientConnection;
-import org.apache.hyracks.api.client.NodeControllerInfo;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.api.exceptions.HyracksException;
-import org.apache.hyracks.hdfs.scheduler.Scheduler;
-
-public class IndexingScheduler {
-    private static final Logger LOGGER = Logger.getLogger(Scheduler.class.getName());
-
-    /** a list of NCs */
-    private String[] NCs;
-
-    /** a map from ip to NCs */
-    private Map<String, List<String>> ipToNcMapping = new HashMap<String, List<String>>();
-
-    /** a map from the NC name to the index */
-    private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
-
-    /** a map from NC name to the NodeControllerInfo */
-    private Map<String, NodeControllerInfo> ncNameToNcInfos;
-
-    /**
-     * The constructor of the scheduler.
-     *
-     * @param ncNameToNcInfos
-     * @throws HyracksException
-     */
-    public IndexingScheduler(String ipAddress, int port) throws HyracksException {
-        try {
-            IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
-            this.ncNameToNcInfos = hcc.getNodeControllerInfos();
-            loadIPAddressToNCMap(ncNameToNcInfos);
-        } catch (Exception e) {
-            throw new HyracksException(e);
-        }
-    }
-
-    /**
-     * Set location constraints for a file scan operator with a list of file
-     * splits. It tries to assign splits to their local machines fairly
-     * Locality is more important than fairness
-     *
-     * @throws HyracksDataException
-     */
-    public String[] getLocationConstraints(InputSplit[] splits) throws HyracksException {
-        if (splits == null) {
-            /** deal the case when the splits array is null */
-            return new String[] {};
-        }
-        int[] workloads = new int[NCs.length];
-        Arrays.fill(workloads, 0);
-        String[] locations = new String[splits.length];
-        Map<String, IntWritable> locationToNumOfSplits = new HashMap<String, IntWritable>();
-        /**
-         * upper bound is number of splits
-         */
-        int upperBoundSlots = splits.length;
-
-        try {
-            Random random = new Random(System.currentTimeMillis());
-            boolean scheduled[] = new boolean[splits.length];
-            Arrays.fill(scheduled, false);
-            /**
-             * scan the splits and build the popularity map
-             * give the machines with less local splits more scheduling priority
-             */
-            buildPopularityMap(splits, locationToNumOfSplits);
-            HashMap<String, Integer> locationToNumOfAssignement = new HashMap<String, Integer>();
-            for (String location : locationToNumOfSplits.keySet()) {
-                locationToNumOfAssignement.put(location, 0);
-            }
-            /**
-             * push data-local upper-bounds slots to each machine
-             */
-            scheduleLocalSlots(splits, workloads, locations, upperBoundSlots, random, scheduled, locationToNumOfSplits,
-                    locationToNumOfAssignement);
-
-            int dataLocalCount = 0;
-            for (int i = 0; i < scheduled.length; i++) {
-                if (scheduled[i] == true) {
-                    dataLocalCount++;
-                }
-            }
-            LOGGER.info("Data local rate: "
-                    + (scheduled.length == 0 ? 0.0 : ((float) dataLocalCount / (float) (scheduled.length))));
-            /**
-             * push non-data-local upper-bounds slots to each machine
-             */
-            locationToNumOfAssignement.clear();
-            for (String nc : NCs) {
-                locationToNumOfAssignement.put(nc, 0);
-            }
-            for (int i = 0; i < scheduled.length; i++) {
-                if (scheduled[i]) {
-                    locationToNumOfAssignement.put(locations[i], locationToNumOfAssignement.get(locations[i]) + 1);
-                }
-            }
-
-            scheduleNonLocalSlots(splits, workloads, locations, upperBoundSlots, scheduled, locationToNumOfAssignement);
-            return locations;
-        } catch (IOException e) {
-            throw new HyracksException(e);
-        }
-    }
-
-    /**
-     * Schedule non-local slots to each machine
-     *
-     * @param splits
-     *            The HDFS file splits.
-     * @param workloads
-     *            The current capacity of each machine.
-     * @param locations
-     *            The result schedule.
-     * @param slotLimit
-     *            The maximum slots of each machine.
-     * @param scheduled
-     *            Indicate which slot is scheduled.
-     * @param locationToNumOfAssignement
-     */
-    private void scheduleNonLocalSlots(InputSplit[] splits, final int[] workloads, String[] locations, int slotLimit,
-            boolean[] scheduled, final HashMap<String, Integer> locationToNumOfAssignement)
-                    throws IOException, UnknownHostException {
-
-        PriorityQueue<String> scheduleCadndiates = new PriorityQueue<String>(NCs.length, new Comparator<String>() {
-            @Override
-            public int compare(String s1, String s2) {
-                return locationToNumOfAssignement.get(s1).compareTo(locationToNumOfAssignement.get(s2));
-            }
-
-        });
-
-        for (String nc : NCs) {
-            scheduleCadndiates.add(nc);
-        }
-        /**
-         * schedule no-local file reads
-         */
-        for (int i = 0; i < splits.length; i++) {
-            /** if there is no data-local NC choice, choose a random one */
-            if (!scheduled[i]) {
-                String selectedNcName = scheduleCadndiates.remove();
-                if (selectedNcName != null) {
-                    int ncIndex = ncNameToIndex.get(selectedNcName);
-                    workloads[ncIndex]++;
-                    scheduled[i] = true;
-                    locations[i] = selectedNcName;
-                    locationToNumOfAssignement.put(selectedNcName, workloads[ncIndex]);
-                    scheduleCadndiates.add(selectedNcName);
-                }
-            }
-        }
-    }
-
-    /**
-     * Schedule data-local slots to each machine.
-     *
-     * @param splits
-     *            The HDFS file splits.
-     * @param workloads
-     *            The current capacity of each machine.
-     * @param locations
-     *            The result schedule.
-     * @param slots
-     *            The maximum slots of each machine.
-     * @param random
-     *            The random generator.
-     * @param scheduled
-     *            Indicate which slot is scheduled.
-     * @throws IOException
-     * @throws UnknownHostException
-     */
-    private void scheduleLocalSlots(InputSplit[] splits, int[] workloads, String[] locations, int slots, Random random,
-            boolean[] scheduled, final Map<String, IntWritable> locationToNumSplits,
-            final HashMap<String, Integer> locationToNumOfAssignement) throws IOException, UnknownHostException {
-        /** scheduling candidates will be ordered inversely according to their popularity */
-        PriorityQueue<String> scheduleCadndiates = new PriorityQueue<String>(3, new Comparator<String>() {
-            @Override
-            public int compare(String s1, String s2) {
-                int assignmentDifference = locationToNumOfAssignement.get(s1)
-                        .compareTo(locationToNumOfAssignement.get(s2));
-                if (assignmentDifference != 0) {
-                    return assignmentDifference;
-                }
-                return locationToNumSplits.get(s1).compareTo(locationToNumSplits.get(s2));
-            }
-
-        });
-
-        for (int i = 0; i < splits.length; i++) {
-            if (scheduled[i]) {
-                continue;
-            }
-            /**
-             * get the location of all the splits
-             */
-            String[] locs = splits[i].getLocations();
-            if (locs.length > 0) {
-                scheduleCadndiates.clear();
-                for (int j = 0; j < locs.length; j++) {
-                    scheduleCadndiates.add(locs[j]);
-                }
-
-                for (String candidate : scheduleCadndiates) {
-                    /**
-                     * get all the IP addresses from the name
-                     */
-                    InetAddress[] allIps = InetAddress.getAllByName(candidate);
-                    /**
-                     * iterate overa all ips
-                     */
-                    for (InetAddress ip : allIps) {
-                        /**
-                         * if the node controller exists
-                         */
-                        if (ipToNcMapping.get(ip.getHostAddress()) != null) {
-                            /**
-                             * set the ncs
-                             */
-                            List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
-                            int arrayPos = random.nextInt(dataLocations.size());
-                            String nc = dataLocations.get(arrayPos);
-                            int pos = ncNameToIndex.get(nc);
-                            /**
-                             * check if the node is already full
-                             */
-                            if (workloads[pos] < slots) {
-                                locations[i] = nc;
-                                workloads[pos]++;
-                                scheduled[i] = true;
-                                locationToNumOfAssignement.put(candidate,
-                                        locationToNumOfAssignement.get(candidate) + 1);
-                                break;
-                            }
-                        }
-                    }
-                    /**
-                     * break the loop for data-locations if the schedule has
-                     * already been found
-                     */
-                    if (scheduled[i] == true) {
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Scan the splits once and build a popularity map
-     *
-     * @param splits
-     *            the split array
-     * @param locationToNumOfSplits
-     *            the map to be built
-     * @throws IOException
-     */
-    private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits)
-            throws IOException {
-        for (InputSplit split : splits) {
-            String[] locations = split.getLocations();
-            for (String loc : locations) {
-                IntWritable locCount = locationToNumOfSplits.get(loc);
-                if (locCount == null) {
-                    locCount = new IntWritable(0);
-                    locationToNumOfSplits.put(loc, locCount);
-                }
-                locCount.set(locCount.get() + 1);
-            }
-        }
-    }
-
-    /**
-     * Load the IP-address-to-NC map from the NCNameToNCInfoMap
-     *
-     * @param ncNameToNcInfos
-     * @throws HyracksException
-     */
-    private void loadIPAddressToNCMap(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
-        try {
-            NCs = new String[ncNameToNcInfos.size()];
-            ipToNcMapping.clear();
-            ncNameToIndex.clear();
-            int i = 0;
-
-            /**
-             * build the IP address to NC map
-             */
-            for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
-                String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().lookupIpAddress())
-                        .getHostAddress();
-                List<String> matchedNCs = ipToNcMapping.get(ipAddr);
-                if (matchedNCs == null) {
-                    matchedNCs = new ArrayList<String>();
-                    ipToNcMapping.put(ipAddr, matchedNCs);
-                }
-                matchedNCs.add(entry.getKey());
-                NCs[i] = entry.getKey();
-                i++;
-            }
-
-            /**
-             * set up the NC name to index mapping
-             */
-            for (i = 0; i < NCs.length; i++) {
-                ncNameToIndex.put(NCs[i], i);
-            }
-        } catch (Exception e) {
-            throw new HyracksException(e);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileControlledTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileControlledTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileControlledTupleParser.java
deleted file mode 100644
index c8e9c65..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileControlledTupleParser.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.input.RCFileLookupReader;
-import org.apache.asterix.om.base.AInt32;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.INullWriter;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
-import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-public class RCFileControlledTupleParser implements IControlledTupleParser {
-
-    private ArrayTupleBuilder tb;
-    private transient DataOutput dos;
-    private final FrameTupleAppender appender;
-    private boolean propagateInput;
-    private int[] propagatedFields;
-    private FrameTupleReference frameTuple;
-    private IAsterixHDFSRecordParser parser;
-    private RCFileLookupReader reader;
-    private int[] ridFields;
-    private RecordDescriptor inRecDesc;
-    private FrameTupleAccessor tupleAccessor;
-    private ByteBufferInputStream bbis;
-    private DataInputStream dis;
-    private boolean retainNull;
-    protected byte nullByte;
-    protected ArrayTupleBuilder nullTupleBuild;
-
-    public RCFileControlledTupleParser(IHyracksTaskContext ctx, IAsterixHDFSRecordParser parser,
-            RCFileLookupReader reader, boolean propagateInput, int[] propagatedFields, RecordDescriptor inRecDesc,
-            int[] ridFields, boolean retainNull, INullWriterFactory iNullWriterFactory) throws HyracksDataException {
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        this.parser = parser;
-        this.reader = reader;
-        this.propagateInput = propagateInput;
-        this.propagatedFields = propagatedFields;
-        this.retainNull = retainNull;
-        this.inRecDesc = inRecDesc;
-        this.ridFields = ridFields;
-        this.tupleAccessor = new FrameTupleAccessor(inRecDesc);
-        if (propagateInput) {
-            tb = new ArrayTupleBuilder(propagatedFields.length + 1);
-        } else {
-            tb = new ArrayTupleBuilder(1);
-        }
-        frameTuple = new FrameTupleReference();
-        dos = tb.getDataOutput();
-        bbis = new ByteBufferInputStream();
-        dis = new DataInputStream(bbis);
-        nullByte = ATypeTag.NULL.serialize();
-        if (retainNull) {
-            INullWriter nullWriter = iNullWriterFactory.createNullWriter();
-            nullTupleBuild = new ArrayTupleBuilder(1);
-            DataOutput out = nullTupleBuild.getDataOutput();
-            try {
-                nullWriter.writeNull(out);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        } else {
-            nullTupleBuild = null;
-        }
-    }
-
-    @Override
-    public void close(IFrameWriter writer) throws Exception {
-        try {
-            reader.close();
-            appender.flush(writer, true);
-        } catch (IOException ioe) {
-            throw new HyracksDataException(ioe);
-        }
-    }
-
-    @Override
-    public void parseNext(IFrameWriter writer, ByteBuffer frameBuffer) throws HyracksDataException {
-        try {
-            int tupleCount = 0;
-            int tupleIndex = 0;
-            Object object;
-            tupleAccessor.reset(frameBuffer);
-            tupleCount = tupleAccessor.getTupleCount();
-            int fieldSlotsLength = tupleAccessor.getFieldSlotsLength();
-            // Loop over tuples
-            while (tupleIndex < tupleCount) {
-                int tupleStartOffset = tupleAccessor.getTupleStartOffset(tupleIndex) + fieldSlotsLength;
-                int fileNumberStartOffset = tupleAccessor.getFieldStartOffset(tupleIndex,
-                        ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]);
-                // Check if null <- for outer join ->
-                if (frameBuffer.get(tupleStartOffset + fileNumberStartOffset) == nullByte) {
-                    object = null;
-                } else {
-                    // Get file number
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + fileNumberStartOffset);
-                    int fileNumber = ((AInt32) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]].deserialize(dis))
-                                    .getIntegerValue();
-                    // Get record group offset
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + tupleAccessor.getFieldStartOffset(tupleIndex,
-                            ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]));
-                    long recordOffset = ((AInt64) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]].deserialize(dis))
-                                    .getLongValue();
-                    // Get row number
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + tupleAccessor.getFieldStartOffset(tupleIndex,
-                            ridFields[IndexingConstants.ROW_NUMBER_FIELD_INDEX]));
-                    int rowNumber = ((AInt32) inRecDesc.getFields()[ridFields[IndexingConstants.ROW_NUMBER_FIELD_INDEX]]
-                            .deserialize(dis)).getIntegerValue();
-
-                    // Read record from external source
-                    object = reader.read(fileNumber, recordOffset, rowNumber);
-                }
-                if (object != null) {
-                    tb.reset();
-                    if (propagateInput) {
-                        frameTuple.reset(tupleAccessor, tupleIndex);
-                        for (int i = 0; i < propagatedFields.length; i++) {
-                            dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                    frameTuple.getFieldStart(propagatedFields[i]),
-                                    frameTuple.getFieldLength(propagatedFields[i]));
-                            tb.addFieldEndOffset();
-                        }
-                    }
-                    // parse record
-                    parser.parse(object, tb.getDataOutput());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                } else if (propagateInput && retainNull) {
-                    tb.reset();
-                    frameTuple.reset(tupleAccessor, tupleIndex);
-                    for (int i = 0; i < propagatedFields.length; i++) {
-                        dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                frameTuple.getFieldStart(propagatedFields[i]),
-                                frameTuple.getFieldLength(propagatedFields[i]));
-                        tb.addFieldEndOffset();
-                    }
-                    dos.write(nullTupleBuild.getByteArray());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                }
-                tupleIndex++;
-            }
-        } catch (Exception e) {
-            // Something went wrong, try to close the reader and then throw an exception <-this should never happen->
-            try {
-                reader.close();
-            } catch (Exception e1) {
-                e.addSuppressed(e1);
-            }
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileIndexingTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileIndexingTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileIndexingTupleParser.java
deleted file mode 100644
index eaa3381..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/RCFileIndexingTupleParser.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import org.apache.asterix.external.indexing.input.AbstractHDFSReader;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class RCFileIndexingTupleParser extends AbstractIndexingTupleParser{
-
-    private Integer rowNumber = -1;
-    private Integer lastFileNumber = -1;
-    private long lastByteLocation = -1;
-
-    public RCFileIndexingTupleParser(IHyracksCommonContext ctx, ARecordType recType, IAsterixHDFSRecordParser
-            deserializer)
-            throws HyracksDataException {
-        super(ctx, recType, deserializer);
-        tb = new ArrayTupleBuilder(4);
-        dos = tb.getDataOutput();
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    protected void appendIndexingData(ArrayTupleBuilder tb,
-            AbstractHDFSReader inReader) throws Exception {
-        aMutableInt.setValue(inReader.getFileNumber());
-        aMutableLong.setValue(inReader.getReaderPosition());
-        //add file number
-        tb.addField(intSerde, aMutableInt);
-        //add record offset
-        tb.addField(longSerde, aMutableLong);
-        //add row number
-        if(aMutableInt.getIntegerValue().equals(lastFileNumber) && aMutableLong.getLongValue() == lastByteLocation){
-            rowNumber++;
-        }else{
-            lastFileNumber = aMutableInt.getIntegerValue();
-            lastByteLocation = aMutableLong.getLongValue();
-            rowNumber = 0;
-        }
-        aMutableInt.setValue(rowNumber);
-        tb.addField(intSerde, aMutableInt);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/SeqOrTxtControlledTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/SeqOrTxtControlledTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/SeqOrTxtControlledTupleParser.java
deleted file mode 100644
index 23ddd8a..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/SeqOrTxtControlledTupleParser.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.input.ILookupReader;
-import org.apache.asterix.om.base.AInt32;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.INullWriter;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
-import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-public class SeqOrTxtControlledTupleParser implements IControlledTupleParser {
-
-    private ArrayTupleBuilder tb;
-    private transient DataOutput dos;
-    private final FrameTupleAppender appender;
-    private boolean propagateInput;
-    private int[] propagatedFields;
-    private FrameTupleReference frameTuple;
-    private IAsterixHDFSRecordParser parser;
-    private ILookupReader reader;
-    private int[] ridFields;
-    private RecordDescriptor inRecDesc;
-    private FrameTupleAccessor tupleAccessor;
-    private ByteBufferInputStream bbis;
-    private DataInputStream dis;
-    private boolean retainNull;
-    protected byte nullByte;
-    protected ArrayTupleBuilder nullTupleBuild;
-
-    public SeqOrTxtControlledTupleParser(IHyracksTaskContext ctx, IAsterixHDFSRecordParser parser, ILookupReader reader,
-            boolean propagateInput, int[] propagatedFields, RecordDescriptor inRecDesc, int[] ridFields,
-            boolean retainNull, INullWriterFactory iNullWriterFactory) throws HyracksDataException {
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        this.parser = parser;
-        this.reader = reader;
-        this.propagateInput = propagateInput;
-        this.ridFields = ridFields;
-        this.retainNull = retainNull;
-        if (propagateInput) {
-            tb = new ArrayTupleBuilder(propagatedFields.length + 1);
-            frameTuple = new FrameTupleReference();
-            this.propagatedFields = propagatedFields;
-        } else {
-            tb = new ArrayTupleBuilder(1);
-        }
-        dos = tb.getDataOutput();
-        this.tupleAccessor = new FrameTupleAccessor(inRecDesc);
-        bbis = new ByteBufferInputStream();
-        dis = new DataInputStream(bbis);
-        nullByte = ATypeTag.NULL.serialize();
-        if (retainNull) {
-            INullWriter nullWriter = iNullWriterFactory.createNullWriter();
-            nullTupleBuild = new ArrayTupleBuilder(1);
-            DataOutput out = nullTupleBuild.getDataOutput();
-            try {
-                nullWriter.writeNull(out);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        } else {
-            nullTupleBuild = null;
-        }
-    }
-
-    @Override
-    public void close(IFrameWriter writer) throws Exception {
-        try {
-            reader.close();
-            appender.flush(writer, true);
-        } catch (IOException ioe) {
-            throw new HyracksDataException(ioe);
-        }
-    }
-
-    @Override
-    public void parseNext(IFrameWriter writer, ByteBuffer frameBuffer) throws HyracksDataException {
-        try {
-            int tupleCount = 0;
-            int tupleIndex = 0;
-            Object record;
-            tupleAccessor.reset(frameBuffer);
-            tupleCount = tupleAccessor.getTupleCount();
-            int fieldSlotsLength = tupleAccessor.getFieldSlotsLength();
-            // Loop over incoming tuples
-            while (tupleIndex < tupleCount) {
-                int tupleStartOffset = tupleAccessor.getTupleStartOffset(tupleIndex) + fieldSlotsLength;
-                int fileNumberStartOffset = tupleAccessor.getFieldStartOffset(tupleIndex,
-                        ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]);
-                // Check if null <- for outer join ->
-                if (frameBuffer.get(tupleStartOffset + fileNumberStartOffset) == nullByte) {
-                    record = null;
-                } else {
-                    // Get file number
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + fileNumberStartOffset);
-                    int fileNumber = ((AInt32) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]].deserialize(dis))
-                                    .getIntegerValue();
-                    // Get record offset
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + tupleAccessor.getFieldStartOffset(tupleIndex,
-                            ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]));
-                    long recordOffset = ((AInt64) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]].deserialize(dis))
-                                    .getLongValue();
-                    // Read the record
-                    record = reader.read(fileNumber, recordOffset);
-                }
-                if (record != null) {
-                    tb.reset();
-                    if (propagateInput) {
-                        frameTuple.reset(tupleAccessor, tupleIndex);
-                        for (int i = 0; i < propagatedFields.length; i++) {
-                            dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                    frameTuple.getFieldStart(propagatedFields[i]),
-                                    frameTuple.getFieldLength(propagatedFields[i]));
-                            tb.addFieldEndOffset();
-                        }
-                    }
-                    // parse it
-                    parser.parse(record, tb.getDataOutput());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                } else if (propagateInput && retainNull) {
-                    tb.reset();
-                    frameTuple.reset(tupleAccessor, tupleIndex);
-                    for (int i = 0; i < propagatedFields.length; i++) {
-                        dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                frameTuple.getFieldStart(propagatedFields[i]),
-                                frameTuple.getFieldLength(propagatedFields[i]));
-                        tb.addFieldEndOffset();
-                    }
-                    dos.write(nullTupleBuild.getByteArray());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                }
-                tupleIndex++;
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
-            try {
-                reader.close();
-            } catch (Exception e2) {
-                e.addSuppressed(e2);
-            }
-            throw new HyracksDataException(e);
-        }
-    }
-
-    private void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/TextOrSeqIndexingTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/TextOrSeqIndexingTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/TextOrSeqIndexingTupleParser.java
deleted file mode 100644
index d44b3f3..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/TextOrSeqIndexingTupleParser.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import org.apache.asterix.external.indexing.input.AbstractHDFSReader;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class TextOrSeqIndexingTupleParser extends AbstractIndexingTupleParser{
-    public TextOrSeqIndexingTupleParser(IHyracksCommonContext ctx,
-            ARecordType recType, IAsterixHDFSRecordParser deserializer)
-            throws HyracksDataException {
-        super(ctx, recType, deserializer);
-        tb = new ArrayTupleBuilder(3);
-        dos = tb.getDataOutput();
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    protected void appendIndexingData(ArrayTupleBuilder tb,
-            AbstractHDFSReader inReader) throws Exception {
-        aMutableInt.setValue(inReader.getFileNumber());
-        aMutableLong.setValue(inReader.getReaderPosition());
-
-        tb.addField(intSerde, aMutableInt);
-        tb.addField(longSerde, aMutableLong);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSLookupInputStream.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSLookupInputStream.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSLookupInputStream.java
deleted file mode 100644
index 563a46d..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSLookupInputStream.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-
-/*
- * This class is used for seek and read of external data of format adm or delimited text in sequence of text input format
- */
-public abstract class AbstractHDFSLookupInputStream extends InputStream {
-
-    protected String pendingValue = null;
-    protected FileSystem fs;
-    protected int fileNumber = -1;
-    protected int EOL = "\n".getBytes()[0];
-    protected boolean skipFile = false;
-    protected ExternalFile file = new ExternalFile(null, null, 0, null, null, 0, ExternalFilePendingOp.PENDING_NO_OP);
-    protected ExternalFileIndexAccessor filesIndexAccessor;
-
-    public AbstractHDFSLookupInputStream(ExternalFileIndexAccessor filesIndexAccessor, JobConf conf)
-            throws IOException {
-        this.filesIndexAccessor = filesIndexAccessor;
-        fs = FileSystem.get(conf);
-    }
-
-    @Override
-    public int read(byte[] buffer, int offset, int len) throws IOException {
-        if (pendingValue != null) {
-            int size = pendingValue.length() + 1;
-            if (size > len) {
-                return 0;
-            }
-            System.arraycopy(pendingValue.getBytes(), 0, buffer, offset, pendingValue.length());
-            buffer[offset + pendingValue.length()] = (byte) EOL;
-            pendingValue = null;
-            return size;
-        }
-        return -1;
-    }
-
-    public boolean fetchRecord(int fileNumber, long recordOffset) throws Exception {
-        if (fileNumber != this.fileNumber) {
-            // New file number
-            this.fileNumber = fileNumber;
-            filesIndexAccessor.searchForFile(fileNumber, file);
-
-            try {
-                FileStatus fileStatus = fs.getFileStatus(new Path(file.getFileName()));
-                if (fileStatus.getModificationTime() != file.getLastModefiedTime().getTime()) {
-                    this.fileNumber = fileNumber;
-                    skipFile = true;
-                    return false;
-                } else {
-                    this.fileNumber = fileNumber;
-                    skipFile = false;
-                    openFile(file.getFileName());
-                }
-            } catch (FileNotFoundException e) {
-                // We ignore File not found exceptions <- it means file was deleted and so we don't care about it anymore ->
-                this.fileNumber = fileNumber;
-                skipFile = true;
-                return false;
-            }
-        } else if (skipFile) {
-            return false;
-        }
-        return read(recordOffset);
-    }
-
-    @Override
-    public int read() throws IOException {
-        return 0;
-    }
-
-    protected abstract boolean read(long byteLocation);
-
-    protected abstract void openFile(String fileName) throws IOException;
-
-    @Override
-    public void close() throws IOException {
-        super.close();
-    }
-
-    public ExternalFileIndexAccessor getExternalFileIndexAccessor() {
-        return filesIndexAccessor;
-    }
-
-    public void setExternalFileIndexAccessor(ExternalFileIndexAccessor filesIndexAccessor) {
-        this.filesIndexAccessor = filesIndexAccessor;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSReader.java
deleted file mode 100644
index 65bfcf3..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/AbstractHDFSReader.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.InputStream;
-
-import org.apache.hadoop.mapred.Counters.Counter;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.Reporter;
-
-/***
- * an abstract class to be used for reading hdfs based datasets one record at a time <- used for indexing->
- */
-public abstract class AbstractHDFSReader extends InputStream {
-
-    /***
-     * This function should be called once to do initial setup before starting to read records
-     *
-     * @return true if ready for reading
-     */
-    abstract public boolean initialize() throws Exception;
-
-    /***
-     * @return the next object read or null if reached end of stream
-     */
-    abstract public Object readNext() throws Exception;
-
-    /**
-     * @return the file name of the current filesplit being read
-     * @throws Exception
-     *             in case of end of records is reached
-     */
-    abstract public String getFileName() throws Exception;
-
-    /**
-     * @return return the reader position of last record read
-     * @throws Exception
-     *             in case of end of records is reached
-     */
-    abstract public long getReaderPosition() throws Exception;
-
-    /**
-     * @return the file number of the file being read
-     * @throws Exception
-     */
-    abstract public int getFileNumber() throws Exception;
-
-    protected Reporter getReporter() {
-        Reporter reporter = new Reporter() {
-
-            @Override
-            public Counter getCounter(Enum<?> arg0) {
-                return null;
-            }
-
-            @Override
-            public Counter getCounter(String arg0, String arg1) {
-                return null;
-            }
-
-            @Override
-            public InputSplit getInputSplit() throws UnsupportedOperationException {
-                return null;
-            }
-
-            @Override
-            public void incrCounter(Enum<?> arg0, long arg1) {
-            }
-
-            @Override
-            public void incrCounter(String arg0, String arg1, long arg2) {
-            }
-
-            @Override
-            public void setStatus(String arg0) {
-            }
-
-            @Override
-            public void progress() {
-            }
-
-            @Override
-            public float getProgress() {
-                return 0.0f;
-            }
-        };
-
-        return reporter;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericFileAwareRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericFileAwareRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericFileAwareRecordReader.java
deleted file mode 100644
index ba36407..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericFileAwareRecordReader.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-
-/**
- * This is a generic reader used for indexing external dataset or for performing full scan for external dataset with
- * a stored snapshot
- *
- * @author alamouda
- */
-
-public class GenericFileAwareRecordReader extends GenericRecordReader {
-
-    private List<ExternalFile> files;
-    private FileSystem hadoopFS;
-    private long recordOffset = 0L;
-
-    public GenericFileAwareRecordReader(InputSplit[] inputSplits, String[] readSchedule, String nodeName, JobConf conf,
-            boolean[] executed, List<ExternalFile> files) throws IOException {
-        super(inputSplits, readSchedule, nodeName, conf, executed);
-        this.files = files;
-        hadoopFS = FileSystem.get(conf);
-    }
-
-    private boolean moveToNext() throws IOException {
-        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-            /**
-             * read all the partitions scheduled to the current node
-             */
-            if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                /**
-                 * pick an unread split to read synchronize among
-                 * simultaneous partitions in the same machine
-                 */
-                synchronized (executed) {
-                    if (executed[currentSplitIndex] == false) {
-                        executed[currentSplitIndex] = true;
-                    } else {
-                        continue;
-                    }
-                }
-
-                /**
-                 * read the split
-                 */
-                try {
-                    String fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
-                    FileStatus fileStatus = hadoopFS.getFileStatus(new Path(fileName));
-                    //skip if not the same file stored in the files snapshot
-                    if (fileStatus.getModificationTime() != files.get(currentSplitIndex).getLastModefiedTime()
-                            .getTime())
-                        continue;
-                    reader = getRecordReader(currentSplitIndex);
-                } catch (Exception e) {
-                    continue;
-                }
-                key = reader.createKey();
-                value = reader.createValue();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Object readNext() throws IOException {
-
-        if (reader == null) {
-            return null;
-        }
-        recordOffset = reader.getPos();
-        if (reader.next(key, value)) {
-            return value;
-        }
-        while (moveToNext()) {
-            recordOffset = reader.getPos();
-            if (reader.next(key, value)) {
-                return value;
-            }
-        }
-        return null;
-    }
-
-    @Override
-    public String getFileName() throws Exception {
-        return files.get(currentSplitIndex).getFileName();
-    }
-
-    @Override
-    public long getReaderPosition() throws Exception {
-        return recordOffset;
-    }
-
-    @Override
-    public int getFileNumber() throws Exception {
-        return files.get(currentSplitIndex).getFileNumber();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericRecordReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericRecordReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericRecordReader.java
deleted file mode 100644
index ab050a7..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/input/GenericRecordReader.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.input;
-
-import java.io.IOException;
-
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
-
-/**
- * This class can be used by any input format to perform full scan operations
- */
-
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class GenericRecordReader extends AbstractHDFSReader {
-
-    protected RecordReader reader;
-    protected Object key;
-    protected Object value;
-    protected int currentSplitIndex = 0;
-    protected boolean executed[];
-    protected InputSplit[] inputSplits;
-    protected String[] readSchedule;
-    protected String nodeName;
-    protected JobConf conf;
-
-    public GenericRecordReader(InputSplit[] inputSplits, String[] readSchedule, String nodeName, JobConf conf,
-            boolean executed[]) {
-        this.inputSplits = inputSplits;
-        this.readSchedule = readSchedule;
-        this.nodeName = nodeName;
-        this.conf = conf;
-        this.executed = executed;
-    }
-
-    @Override
-    public boolean initialize() throws IOException {
-        return moveToNext();
-    }
-
-    private boolean moveToNext() throws IOException {
-        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-            /**
-             * read all the partitions scheduled to the current node
-             */
-            if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                /**
-                 * pick an unread split to read synchronize among
-                 * simultaneous partitions in the same machine
-                 */
-                synchronized (executed) {
-                    if (executed[currentSplitIndex] == false) {
-                        executed[currentSplitIndex] = true;
-                    } else {
-                        continue;
-                    }
-                }
-
-                /**
-                 * read the split
-                 */
-                reader = getRecordReader(currentSplitIndex);
-                key = reader.createKey();
-                value = reader.createValue();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    protected RecordReader getRecordReader(int slitIndex) throws IOException {
-        RecordReader reader = conf.getInputFormat().getRecordReader(inputSplits[slitIndex], conf, getReporter());
-        return reader;
-    }
-
-    @Override
-    public Object readNext() throws IOException {
-        if (reader == null) {
-            return null;
-        }
-        if (reader.next(key, value)) {
-            return value;
-        }
-        while (moveToNext()) {
-            if (reader.next(key, value)) {
-                return value;
-            }
-        }
-        return null;
-    }
-
-    @Override
-    public String getFileName() throws Exception {
-        return null;
-    }
-
-    @Override
-    public long getReaderPosition() throws Exception {
-        return reader.getPos();
-    }
-
-    @Override
-    public int getFileNumber() throws Exception {
-        throw new NotImplementedException("This reader doesn't support this function");
-    }
-
-    @Override
-    public int read(byte[] buffer, int offset, int len) throws IOException {
-        throw new NotImplementedException("Use readNext()");
-    }
-
-    @Override
-    public int read() throws IOException {
-        throw new NotImplementedException("Use readNext()");
-    }
-
-}


[06/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFactory.java
index 2ccc91c..5202093 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFactory.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class SumFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFunction.java
index d2c9e1b..d81f01b 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/SumFunction.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalScalarFunction;
-import org.apache.asterix.external.library.IFunctionHelper;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JObjects.JInt;
 
 public class SumFunction implements IExternalScalarFunction {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFactory.java
index 0d738da..f74ed38 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.asterix.external.library;
 
-import org.apache.asterix.external.library.IExternalFunction;
-import org.apache.asterix.external.library.IFunctionFactory;
+import org.apache.asterix.external.api.IExternalFunction;
+import org.apache.asterix.external.api.IFunctionFactory;
 
 public class UpperCaseFactory implements IFunctionFactory {
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFunction.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFunction.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFunction.java
index 56121b0..70bd3e1 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFunction.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/UpperCaseFunction.java
@@ -23,6 +23,8 @@ import java.util.Random;
 import org.apache.asterix.external.library.java.JObjects.JInt;
 import org.apache.asterix.external.library.java.JObjects.JRecord;
 import org.apache.asterix.external.library.java.JObjects.JString;
+import org.apache.asterix.external.api.IExternalScalarFunction;
+import org.apache.asterix.external.api.IFunctionHelper;
 import org.apache.asterix.external.library.java.JTypeTag;
 
 /**

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapter.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapter.java
index 39f8271..df0fb94 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapter.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapter.java
@@ -27,14 +27,13 @@ import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
 import org.apache.asterix.external.dataset.adapter.StreamBasedAdapter;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.hyracks.api.comm.IFrameWriter;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
 
-public class TestTypedAdapter extends StreamBasedAdapter implements IFeedAdapter {
+public class TestTypedAdapter extends StreamBasedAdapter {
 
     private static final long serialVersionUID = 1L;
 
@@ -126,17 +125,13 @@ public class TestTypedAdapter extends StreamBasedAdapter implements IFeedAdapter
     }
 
     @Override
-    public DataExchangeMode getDataExchangeMode() {
-        return DataExchangeMode.PUSH;
-    }
-
-    @Override
-    public void stop() throws Exception {
+    public boolean stop() throws Exception {
         generator.stop();
+        return true;
     }
 
     @Override
-    public boolean handleException(Exception e) {
+    public boolean handleException(Throwable e) {
         return false;
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapterFactory.java b/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapterFactory.java
index c177a58..6b08f3a 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapterFactory.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/external/library/adapter/TestTypedAdapterFactory.java
@@ -18,26 +18,30 @@
  */
 package org.apache.asterix.external.library.adapter;
 
+import java.io.InputStream;
 import java.util.Map;
 
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.common.feeds.api.IIntakeProgressTracker;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory.SupportedOperation;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.external.parser.ADMDataParser;
+import org.apache.asterix.external.util.DataflowUtils;
 import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory.InputDataFormat;
 import org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint;
 import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksCommonContext;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.std.file.ITupleParser;
 import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
 
-public class TestTypedAdapterFactory implements IFeedAdapterFactory {
+public class TestTypedAdapterFactory implements IAdapterFactory {
 
     private static final long serialVersionUID = 1L;
 
-    public static final String NAME = "test_typed_adapter";
-
     private ARecordType outputType;
 
     public static final String KEY_NUM_OUTPUT_RECORDS = "num_output_records";
@@ -45,13 +49,8 @@ public class TestTypedAdapterFactory implements IFeedAdapterFactory {
     private Map<String, String> configuration;
 
     @Override
-    public SupportedOperation getSupportedOperations() {
-        return SupportedOperation.READ;
-    }
-
-    @Override
-    public String getName() {
-        return NAME;
+    public String getAlias() {
+        return "test_typed";
     }
 
     @Override
@@ -60,9 +59,47 @@ public class TestTypedAdapterFactory implements IFeedAdapterFactory {
     }
 
     @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        ITupleParserFactory tupleParserFactory = new AsterixTupleParserFactory(configuration, outputType,
-                InputDataFormat.ADM);
+    public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
+        ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public ITupleParser createTupleParser(final IHyracksCommonContext ctx) throws HyracksDataException {
+                ADMDataParser parser;
+                ITupleForwarder forwarder;
+                ArrayTupleBuilder tb;
+                try {
+                    parser = new ADMDataParser();
+                    forwarder = DataflowUtils.getTupleForwarder(configuration);
+                    forwarder.configure(configuration);
+                    tb = new ArrayTupleBuilder(1);
+                } catch (AsterixException e) {
+                    throw new HyracksDataException(e);
+                }
+                return new ITupleParser() {
+
+                    @Override
+                    public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
+                        try {
+                            parser.configure(configuration, outputType);
+                            parser.setInputStream(in);
+                            forwarder.initialize(ctx, writer);
+                            while (true) {
+                                tb.reset();
+                                if (!parser.parse(tb.getDataOutput())) {
+                                    break;
+                                }
+                                tb.addFieldEndOffset();
+                                forwarder.addTuple(tb);
+                            }
+                            forwarder.close();
+                        } catch (Exception e) {
+                            throw new HyracksDataException(e);
+                        }
+                    }
+                };
+            }
+        };
         return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
     }
 
@@ -77,14 +114,4 @@ public class TestTypedAdapterFactory implements IFeedAdapterFactory {
         this.outputType = outputType;
     }
 
-    @Override
-    public boolean isRecordTrackingEnabled() {
-        return false;
-    }
-
-    @Override
-    public IIntakeProgressTracker createIntakeProgressTracker() {
-        return null;
-    }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java b/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
new file mode 100644
index 0000000..698e414
--- /dev/null
+++ b/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.operator.file;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.asterix.external.parser.ADMDataParser;
+import org.apache.asterix.om.base.AMutableInterval;
+import org.junit.Assert;
+import org.junit.Test;
+
+import junit.extensions.PA;
+
+public class ADMDataParserTest {
+
+    @Test
+    public void test() {
+        String[] dateIntervals = { "-9537-08-04, 9656-06-03", "-9537-04-04, 9656-06-04", "-9537-10-04, 9626-09-05" };
+        AMutableInterval[] parsedDateIntervals = new AMutableInterval[] {
+                new AMutableInterval(-4202630, 2807408, (byte) 17), new AMutableInterval(-4202752, 2807409, (byte) 17),
+                new AMutableInterval(-4202569, 2796544, (byte) 17), };
+
+        String[] timeIntervals = { "12:04:45.689Z, 12:41:59.002Z", "12:10:45.169Z, 15:37:48.736Z",
+                "04:16:42.321Z, 12:22:56.816Z" };
+        AMutableInterval[] parsedTimeIntervals = new AMutableInterval[] {
+                new AMutableInterval(43485689, 45719002, (byte) 18),
+                new AMutableInterval(43845169, 56268736, (byte) 18),
+                new AMutableInterval(15402321, 44576816, (byte) 18), };
+
+        String[] dateTimeIntervals = { "-2640-10-11T17:32:15.675Z, 4104-02-01T05:59:11.902Z",
+                "0534-12-08T08:20:31.487Z, 6778-02-16T22:40:21.653Z",
+                "2129-12-12T13:18:35.758Z, 8647-07-01T13:10:19.691Z" };
+        AMutableInterval[] parsedDateTimeIntervals = new AMutableInterval[] {
+                new AMutableInterval(-145452954464325L, 67345192751902L, (byte) 16),
+                new AMutableInterval(-45286270768513L, 151729886421653L, (byte) 16),
+                new AMutableInterval(5047449515758L, 210721439419691L, (byte) 16) };
+
+        Thread[] threads = new Thread[16];
+        AtomicInteger errorCount = new AtomicInteger(0);
+        for (int i = 0; i < threads.length; ++i) {
+            threads[i] = new Thread(new Runnable() {
+                ADMDataParser parser = new ADMDataParser();
+                ByteArrayOutputStream bos = new ByteArrayOutputStream();
+                DataOutput dos = new DataOutputStream(bos);
+
+                @Override
+                public void run() {
+                    try {
+                        int round = 0;
+                        while (round++ < 10000) {
+                            // Test parseDateInterval.
+                            for (int index = 0; index < dateIntervals.length; ++index) {
+                                PA.invokeMethod(parser, "parseDateInterval(java.lang.String, java.io.DataOutput)",
+                                        dateIntervals[index], dos);
+                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
+                                Assert.assertTrue(aInterval.equals(parsedDateIntervals[index]));
+                            }
+
+                            // Tests parseTimeInterval.
+                            for (int index = 0; index < timeIntervals.length; ++index) {
+                                PA.invokeMethod(parser, "parseTimeInterval(java.lang.String, java.io.DataOutput)",
+                                        timeIntervals[index], dos);
+                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
+                                Assert.assertTrue(aInterval.equals(parsedTimeIntervals[index]));
+                            }
+
+                            // Tests parseDateTimeInterval.
+                            for (int index = 0; index < dateTimeIntervals.length; ++index) {
+                                PA.invokeMethod(parser, "parseDateTimeInterval(java.lang.String, java.io.DataOutput)",
+                                        dateTimeIntervals[index], dos);
+                                AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
+                                Assert.assertTrue(aInterval.equals(parsedDateTimeIntervals[index]));
+                            }
+                        }
+                    } catch (Exception e) {
+                        errorCount.incrementAndGet();
+                        e.printStackTrace();
+                    }
+                }
+            });
+            // Kicks off test threads.
+            threads[i].start();
+        }
+
+        // Joins all the threads.
+        try {
+            for (int i = 0; i < threads.length; ++i) {
+                threads[i].join();
+            }
+        } catch (InterruptedException e) {
+            throw new IllegalStateException(e);
+        }
+        // Asserts no failure.
+        Assert.assertTrue(errorCount.get() == 0);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/AbstractExecutionIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AbstractExecutionIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AbstractExecutionIT.java
index 0613498..42827b4 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AbstractExecutionIT.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AbstractExecutionIT.java
@@ -20,7 +20,7 @@ import java.util.Collection;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
+import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.IdentitiyResolverFactory;
 import org.apache.asterix.test.aql.TestExecutor;
 import org.apache.asterix.test.runtime.HDFSCluster;
@@ -43,8 +43,8 @@ public abstract class AbstractExecutionIT {
     protected static final Logger LOGGER = Logger.getLogger(AbstractExecutionIT.class.getName());
 
     protected static final String PATH_ACTUAL = "ittest" + File.separator;
-    protected static final String PATH_BASE = StringUtils.join(new String[] { "..", "asterix-app", "src", "test",
-            "resources", "runtimets" }, File.separator);
+    protected static final String PATH_BASE = StringUtils
+            .join(new String[] { "..", "asterix-app", "src", "test", "resources", "runtimets" }, File.separator);
 
     protected static final String HDFS_BASE = "../asterix-app/";
 
@@ -63,21 +63,21 @@ public abstract class AbstractExecutionIT {
 
         //This is nasty but there is no very nice way to set a system property on each NC that I can figure.
         //The main issue is that we need the NC resolver to be the IdentityResolver and not the DNSResolver.
-        FileUtils.copyFile(
-                new File(StringUtils.join(new String[] { "src", "test", "resources", "integrationts", "asterix-configuration.xml" }, File.separator)),
+        FileUtils
+                .copyFile(
+                        new File(StringUtils.join(new String[] { "src", "test", "resources", "integrationts",
+                                "asterix-configuration.xml" }, File.separator)),
                 new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/conf/asterix-configuration.xml"));
 
         AsterixLifecycleIT.setUp();
 
-
         FileUtils.copyDirectoryStructure(
                 new File(StringUtils.join(new String[] { "..", "asterix-app", "data" }, File.separator)),
                 new File(AsterixInstallerIntegrationUtil.getManagixHome() + "/clusters/local/working_dir/data"));
 
-
         // Set the node resolver to be the identity resolver that expects node names
         // to be node controller ids; a valid assumption in test environment.
-        System.setProperty(FileSystemBasedAdapter.NODE_RESOLVER_FACTORY_PROPERTY,
+        System.setProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY,
                 IdentitiyResolverFactory.class.getName());
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixExternalLibraryIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixExternalLibraryIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixExternalLibraryIT.java
index 438bb05..1da01c3 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixExternalLibraryIT.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixExternalLibraryIT.java
@@ -43,16 +43,21 @@ public class AsterixExternalLibraryIT {
 
     @BeforeClass
     public static void setUp() throws Exception {
-        AsterixInstallerIntegrationUtil.init();
-        File asterixInstallerProjectDir = new File(System.getProperty("user.dir"));
-        String asterixExternalLibraryPath = asterixInstallerProjectDir.getParentFile().getAbsolutePath()
-                + File.separator + LIBRARY_PATH;
-        LOGGER.info("Installing library :" + LIBRARY_NAME + " located at " + asterixExternalLibraryPath
-                + " in dataverse " + LIBRARY_DATAVERSE);
-        AsterixInstallerIntegrationUtil.installLibrary(LIBRARY_NAME, LIBRARY_DATAVERSE, asterixExternalLibraryPath);
-        AsterixInstallerIntegrationUtil.transformIntoRequiredState(State.ACTIVE);
-        TestCaseContext.Builder b = new TestCaseContext.Builder();
-        testCaseCollection = b.build(new File(PATH_BASE));
+        try {
+            AsterixInstallerIntegrationUtil.init();
+            File asterixInstallerProjectDir = new File(System.getProperty("user.dir"));
+            String asterixExternalLibraryPath = asterixInstallerProjectDir.getParentFile().getAbsolutePath()
+                    + File.separator + LIBRARY_PATH;
+            LOGGER.info("Installing library :" + LIBRARY_NAME + " located at " + asterixExternalLibraryPath
+                    + " in dataverse " + LIBRARY_DATAVERSE);
+            AsterixInstallerIntegrationUtil.installLibrary(LIBRARY_NAME, LIBRARY_DATAVERSE, asterixExternalLibraryPath);
+            AsterixInstallerIntegrationUtil.transformIntoRequiredState(State.ACTIVE);
+            TestCaseContext.Builder b = new TestCaseContext.Builder();
+            testCaseCollection = b.build(new File(PATH_BASE));
+        } catch (Throwable th) {
+            th.printStackTrace();
+            throw th;
+        }
     }
 
     @AfterClass

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixInstallerIntegrationUtil.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixInstallerIntegrationUtil.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixInstallerIntegrationUtil.java
index c000d55..34a8733 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixInstallerIntegrationUtil.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/AsterixInstallerIntegrationUtil.java
@@ -93,6 +93,7 @@ public class AsterixInstallerIntegrationUtil {
         String command = "shutdown";
         cmdHandler.processCommand(command.split(" "));
 
+        //TODO: This must be fixed, an arbitrary wait for 2s is not a reliable way to make sure the process have completed successfully.
         Thread.sleep(2000);
 
         // start zookeeper

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/ClusterExecutionIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ClusterExecutionIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ClusterExecutionIT.java
index 93e9f6d..cf69e1a 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ClusterExecutionIT.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ClusterExecutionIT.java
@@ -18,10 +18,12 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.logging.Level;
-import java.util.logging.Logger;
 
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.IdentitiyResolverFactory;
 import org.apache.asterix.test.aql.TestExecutor;
 import org.apache.asterix.test.runtime.HDFSCluster;
+import org.apache.asterix.testframework.context.TestCaseContext;
 import org.apache.commons.lang3.StringUtils;
 import org.codehaus.plexus.util.FileUtils;
 import org.junit.AfterClass;
@@ -31,20 +33,16 @@ import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameters;
 
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
-import org.apache.asterix.external.util.IdentitiyResolverFactory;
-import org.apache.asterix.testframework.context.TestCaseContext;
-
 /**
  * Runs the runtime test cases under 'asterix-app/src/test/resources/runtimets'.
  */
 @RunWith(Parameterized.class)
-public class ClusterExecutionIT extends AbstractExecutionIT{
+public class ClusterExecutionIT extends AbstractExecutionIT {
 
     private static final String CLUSTER_CC_ADDRESS = "10.10.0.2";
     private static final int CLUSTER_CC_API_PORT = 19002;
 
-    private final static TestExecutor testExecutor = new TestExecutor(CLUSTER_CC_ADDRESS,CLUSTER_CC_API_PORT);
+    private final static TestExecutor testExecutor = new TestExecutor(CLUSTER_CC_ADDRESS, CLUSTER_CC_API_PORT);
 
     @BeforeClass
     public static void setUp() throws Exception {
@@ -60,13 +58,14 @@ public class ClusterExecutionIT extends AbstractExecutionIT{
         AsterixClusterLifeCycleIT.setUp();
 
         FileUtils.copyDirectoryStructure(
-                new File(StringUtils.join(new String[] { "..", "asterix-app", "data" }, File.separator)), new File(
-                StringUtils.join(new String[] { "src", "test", "resources", "clusterts", "managix-working", "data" },
+                new File(StringUtils.join(new String[] { "..", "asterix-app", "data" }, File.separator)),
+                new File(StringUtils.join(
+                        new String[] { "src", "test", "resources", "clusterts", "managix-working", "data" },
                         File.separator)));
 
         // Set the node resolver to be the identity resolver that expects node names
         // to be node controller ids; a valid assumption in test environment.
-        System.setProperty(FileSystemBasedAdapter.NODE_RESOLVER_FACTORY_PROPERTY,
+        System.setProperty(ExternalDataConstants.NODE_RESOLVER_FACTORY_PROPERTY,
                 IdentitiyResolverFactory.class.getName());
     }
 
@@ -100,6 +99,7 @@ public class ClusterExecutionIT extends AbstractExecutionIT{
         this.tcCtx = tcCtx;
     }
 
+    @Override
     @Test
     public void test() throws Exception {
         testExecutor.executeTest(PATH_ACTUAL, tcCtx, null, false);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixExecutionIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixExecutionIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixExecutionIT.java
index 17184c7..492f173 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixExecutionIT.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixExecutionIT.java
@@ -14,25 +14,9 @@
  */
 package org.apache.asterix.installer.test;
 
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
-import org.apache.asterix.external.util.IdentitiyResolverFactory;
-import org.apache.asterix.test.aql.TestExecutor;
 import org.apache.asterix.testframework.context.TestCaseContext;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.asterix.test.runtime.HDFSCluster;
-import org.codehaus.plexus.util.FileUtils;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
 
 /**
  * Runs the runtime test cases under 'asterix-app/src/test/resources/runtimets'.
@@ -40,7 +24,6 @@ import org.junit.runners.Parameterized.Parameters;
 @RunWith(Parameterized.class)
 public class ManagixExecutionIT extends AbstractExecutionIT {
 
-
     private TestCaseContext tcCtx;
 
     public ManagixExecutionIT(TestCaseContext tcCtx) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixSqlppExecutionIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixSqlppExecutionIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixSqlppExecutionIT.java
index 2e66afd..b9c2072 100644
--- a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixSqlppExecutionIT.java
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ManagixSqlppExecutionIT.java
@@ -17,19 +17,8 @@ package org.apache.asterix.installer.test;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 
-import org.apache.asterix.external.dataset.adapter.FileSystemBasedAdapter;
-import org.apache.asterix.external.util.IdentitiyResolverFactory;
-import org.apache.asterix.test.aql.TestExecutor;
-import org.apache.asterix.test.runtime.HDFSCluster;
 import org.apache.asterix.testframework.context.TestCaseContext;
-import org.apache.commons.lang3.StringUtils;
-import org.codehaus.plexus.util.FileUtils;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameters;
@@ -38,7 +27,7 @@ import org.junit.runners.Parameterized.Parameters;
  * Runs the runtime test cases under 'asterix-app/src/test/resources/runtimets'.
  */
 @RunWith(Parameterized.class)
-public class ManagixSqlppExecutionIT extends ManagixExecutionIT{
+public class ManagixSqlppExecutionIT extends ManagixExecutionIT {
 
     @Parameters
     public static Collection<Object[]> tests() throws Exception {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-lang-aql/src/main/java/org/apache/asterix/lang/aql/statement/SubscribeFeedStatement.java
----------------------------------------------------------------------
diff --git a/asterix-lang-aql/src/main/java/org/apache/asterix/lang/aql/statement/SubscribeFeedStatement.java b/asterix-lang-aql/src/main/java/org/apache/asterix/lang/aql/statement/SubscribeFeedStatement.java
index a62abaa..71c762a 100644
--- a/asterix-lang-aql/src/main/java/org/apache/asterix/lang/aql/statement/SubscribeFeedStatement.java
+++ b/asterix-lang-aql/src/main/java/org/apache/asterix/lang/aql/statement/SubscribeFeedStatement.java
@@ -29,7 +29,7 @@ import org.apache.asterix.common.feeds.FeedConnectionRequest;
 import org.apache.asterix.common.feeds.FeedId;
 import org.apache.asterix.common.feeds.FeedPolicyAccessor;
 import org.apache.asterix.common.functions.FunctionSignature;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.lang.aql.parser.AQLParserFactory;
 import org.apache.asterix.lang.common.base.IParser;
 import org.apache.asterix.lang.common.base.IParserFactory;
@@ -188,7 +188,7 @@ public class SubscribeFeedStatement implements Statement {
         try {
             switch (feed.getFeedType()) {
                 case PRIMARY:
-                    Triple<IFeedAdapterFactory, ARecordType, AdapterType> factoryOutput = null;
+                    Triple<IAdapterFactory, ARecordType, AdapterType> factoryOutput = null;
 
                     factoryOutput = FeedUtil.getPrimaryFeedFactoryAndOutput((PrimaryFeed) feed, policyAccessor,
                             mdTxnCtx);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
----------------------------------------------------------------------
diff --git a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
index 3e21653..a113864 100644
--- a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
+++ b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
@@ -72,11 +72,34 @@ public class [LEXER_NAME] {
 // ================================================================================
 //  Public interface
 // ================================================================================
-    
+
     public [LEXER_NAME](java.io.Reader stream) throws IOException{
         reInit(stream);
     }
 
+    public [LEXER_NAME]() throws IOException{
+        reInit();
+    }
+
+    public void setBuffer(char[] buffer){
+        this.buffer = buffer;
+        tokenBegin = bufpos = 0;
+        containsEscapes = false;
+        line++;
+        tokenBegin = -1;
+    }
+
+    public void reInit(){
+        bufsize        = Integer.MAX_VALUE;
+        endOf_UNUSED_Buffer = bufsize;
+        endOf_USED_Buffer = bufsize;
+        line           = 0;
+        prevCharIsCR   = false;
+        prevCharIsLF   = false;
+        tokenBegin     = -1;
+        maxUnusedBufferSize = bufsize;
+    }
+
     public void reInit(java.io.Reader stream) throws IOException{
         done();
         inputStream    = stream;
@@ -239,5 +262,5 @@ public class [LEXER_NAME] {
       bufsize += maxUnusedBufferSize;
       endOf_UNUSED_Buffer = bufsize;
       tokenBegin = 0;
-    }    
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
index 5317fc2..a73a236 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
@@ -39,9 +39,13 @@ import org.apache.asterix.common.config.DatasetConfig.IndexType;
 import org.apache.asterix.common.config.GlobalConfig;
 import org.apache.asterix.common.config.IAsterixPropertiesProvider;
 import org.apache.asterix.common.context.BaseOperationTracker;
+import org.apache.asterix.common.context.CorrelatedPrefixMergePolicyFactory;
 import org.apache.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactory;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
+import org.apache.asterix.external.adapter.factory.GenericAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.runtime.GenericSocketFeedAdapterFactory;
+import org.apache.asterix.external.runtime.SocketClientAdapterFactory;
 import org.apache.asterix.metadata.IDatasetDetails;
 import org.apache.asterix.metadata.MetadataException;
 import org.apache.asterix.metadata.MetadataManager;
@@ -76,12 +80,14 @@ import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
 import org.apache.hyracks.api.dataflow.value.ITypeTraits;
 import org.apache.hyracks.api.io.FileReference;
 import org.apache.hyracks.api.io.IIOManager;
-import org.apache.hyracks.storage.am.common.util.IndexFileNameUtil;
 import org.apache.hyracks.storage.am.lsm.btree.impls.LSMBTree;
 import org.apache.hyracks.storage.am.lsm.btree.util.LSMBTreeUtils;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory;
 import org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
 import org.apache.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
+import org.apache.hyracks.storage.am.lsm.common.impls.ConstantMergePolicyFactory;
+import org.apache.hyracks.storage.am.lsm.common.impls.NoMergePolicyFactory;
+import org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicyFactory;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 import org.apache.hyracks.storage.common.file.ILocalResourceFactory;
@@ -316,19 +322,8 @@ public class MetadataBootstrap {
     }
 
     private static void insertInitialAdapters(MetadataTransactionContext mdTxnCtx) throws Exception {
-        String[] builtInAdapterClassNames = new String[] {
-                "org.apache.asterix.external.adapter.factory.PullBasedAzureTwitterAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.NCFileSystemAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.HDFSAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.HiveAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.PullBasedTwitterAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.PushBasedTwitterAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.RSSFeedAdapterFactory",
-                "org.apache.asterix.external.adapter.factory.CNNFeedAdapterFactory",
-                "org.apache.asterix.tools.external.data.RateControlledFileSystemBasedAdapterFactory",
-                "org.apache.asterix.tools.external.data.TwitterFirehoseFeedAdapterFactory",
-                "org.apache.asterix.tools.external.data.GenericSocketFeedAdapterFactory",
-                "org.apache.asterix.tools.external.data.SocketClientAdapterFactory" };
+        String[] builtInAdapterClassNames = new String[] { GenericAdapterFactory.class.getName(),
+                GenericSocketFeedAdapterFactory.class.getName(), SocketClientAdapterFactory.class.getName() };
         DatasourceAdapter adapter;
         for (String adapterClassName : builtInAdapterClassNames) {
             adapter = getAdapter(adapterClassName);
@@ -349,11 +344,9 @@ public class MetadataBootstrap {
     }
 
     private static void insertInitialCompactionPolicies(MetadataTransactionContext mdTxnCtx) throws Exception {
-        String[] builtInCompactionPolicyClassNames = new String[] {
-                "org.apache.hyracks.storage.am.lsm.common.impls.ConstantMergePolicyFactory",
-                "org.apache.hyracks.storage.am.lsm.common.impls.PrefixMergePolicyFactory",
-                "org.apache.hyracks.storage.am.lsm.common.impls.NoMergePolicyFactory",
-                "org.apache.asterix.common.context.CorrelatedPrefixMergePolicyFactory" };
+        String[] builtInCompactionPolicyClassNames = new String[] { ConstantMergePolicyFactory.class.getName(),
+                PrefixMergePolicyFactory.class.getName(), NoMergePolicyFactory.class.getName(),
+                CorrelatedPrefixMergePolicyFactory.class.getName() };
         CompactionPolicy compactionPolicy;
         for (String policyClassName : builtInCompactionPolicyClassNames) {
             compactionPolicy = getCompactionPolicyEntity(policyClassName);
@@ -362,7 +355,7 @@ public class MetadataBootstrap {
     }
 
     private static DatasourceAdapter getAdapter(String adapterFactoryClassName) throws Exception {
-        String adapterName = ((IAdapterFactory) (Class.forName(adapterFactoryClassName).newInstance())).getName();
+        String adapterName = ((IAdapterFactory) (Class.forName(adapterFactoryClassName).newInstance())).getAlias();
         return new DatasourceAdapter(new AdapterIdentifier(MetadataConstants.METADATA_DATAVERSE_NAME, adapterName),
                 adapterFactoryClassName, DatasourceAdapter.AdapterType.INTERNAL);
     }
@@ -378,8 +371,7 @@ public class MetadataBootstrap {
         ClusterPartition metadataPartition = propertiesProvider.getMetadataProperties().getMetadataPartition();
         int metadataDeviceId = metadataPartition.getIODeviceNum();
         String metadataPartitionPath = SplitsAndConstraintsUtil.prepareStoragePartitionPath(
-                AsterixClusterProperties.INSTANCE.getStorageDirectoryName(),
-                metadataPartition.getPartitionId());
+                AsterixClusterProperties.INSTANCE.getStorageDirectoryName(), metadataPartition.getPartitionId());
         String resourceName = metadataPartitionPath + File.separator + index.getFileNameRelativePath();
         FileReference file = ioManager.getAbsoluteFileRef(metadataDeviceId, resourceName);
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
index 745f436..c9157df 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
@@ -23,7 +23,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -52,17 +51,17 @@ import org.apache.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactor
 import org.apache.asterix.common.ioopcallbacks.LSMBTreeWithBuddyIOOperationCallbackFactory;
 import org.apache.asterix.common.ioopcallbacks.LSMInvertedIndexIOOperationCallbackFactory;
 import org.apache.asterix.common.ioopcallbacks.LSMRTreeIOOperationCallbackFactory;
-import org.apache.asterix.common.parse.IParseFileSplitsDecl;
 import org.apache.asterix.common.transactions.IRecoveryManager.ResourceType;
 import org.apache.asterix.common.transactions.JobId;
 import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlPrimitiveValueProviderFactory;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory.SupportedOperation;
-import org.apache.asterix.external.adapter.factory.IFeedAdapterFactory;
+import org.apache.asterix.external.adapter.factory.LookupAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
 import org.apache.asterix.external.indexing.ExternalFile;
 import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.dataflow.HDFSLookupAdapterFactory;
-import org.apache.asterix.external.indexing.operators.ExternalLoopkupOperatorDiscriptor;
+import org.apache.asterix.external.operators.ExternalBTreeSearchOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalLookupOperatorDescriptor;
+import org.apache.asterix.external.operators.ExternalRTreeSearchOperatorDescriptor;
+import org.apache.asterix.external.provider.AdapterFactoryProvider;
 import org.apache.asterix.formats.base.IDataFormat;
 import org.apache.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import org.apache.asterix.formats.nontagged.AqlLinearizeComparatorFactoryProvider;
@@ -100,8 +99,6 @@ import org.apache.asterix.om.util.AsterixAppContextInfo;
 import org.apache.asterix.om.util.AsterixClusterProperties;
 import org.apache.asterix.om.util.NonTaggedFormatUtil;
 import org.apache.asterix.runtime.base.AsterixTupleFilterFactory;
-import org.apache.asterix.runtime.external.ExternalBTreeSearchOperatorDescriptor;
-import org.apache.asterix.runtime.external.ExternalRTreeSearchOperatorDescriptor;
 import org.apache.asterix.runtime.formats.FormatUtils;
 import org.apache.asterix.runtime.formats.NonTaggedDataFormat;
 import org.apache.asterix.runtime.job.listener.JobEventListenerFactory;
@@ -153,11 +150,8 @@ import org.apache.hyracks.api.job.JobSpecification;
 import org.apache.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
 import org.apache.hyracks.data.std.primitive.ShortPointable;
 import org.apache.hyracks.dataflow.common.data.marshalling.ShortSerializerDeserializer;
-import org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider;
-import org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
 import org.apache.hyracks.dataflow.std.file.FileSplit;
 import org.apache.hyracks.dataflow.std.file.IFileSplitProvider;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
 import org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
 import org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
 import org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory;
@@ -202,8 +196,6 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
 
     private final AsterixStorageProperties storageProperties;
 
-    public static final Map<String, String> adapterFactoryMapping = initializeAdapterFactoryMapping();
-
     public String getPropertyValue(String propertyName) {
         return config.get(propertyName);
     }
@@ -490,10 +482,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
     private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildLoadableDatasetScan(JobSpecification jobSpec,
             LoadableDataSource alds, IAdapterFactory adapterFactory, RecordDescriptor rDesc, boolean isPKAutoGenerated,
             List<List<String>> primaryKeys, ARecordType recType, int pkIndex) throws AlgebricksException {
-        if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ)
-                || adapterFactory.getSupportedOperations().equals(SupportedOperation.READ_WRITE))) {
-            throw new AlgebricksException(" External dataset adapter does not support read operation");
-        }
+
         ExternalDataScanOperatorDescriptor dataScanner = new ExternalDataScanOperatorDescriptor(jobSpec, rDesc,
                 adapterFactory);
         AlgebricksPartitionConstraint constraint;
@@ -552,24 +541,9 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
     private IAdapterFactory getConfiguredAdapterFactory(Dataset dataset, String adapterName,
             Map<String, String> configuration, IAType itemType, boolean isPKAutoGenerated,
             List<List<String>> primaryKeys) throws AlgebricksException {
-        IAdapterFactory adapterFactory;
-        DatasourceAdapter adapterEntity;
-        String adapterFactoryClassname;
         try {
-            adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME,
-                    adapterName);
-            if (adapterEntity != null) {
-                adapterFactoryClassname = adapterEntity.getClassname();
-                adapterFactory = (IAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
-            } else {
-                adapterFactoryClassname = adapterFactoryMapping.get(adapterName);
-                if (adapterFactoryClassname == null) {
-                    throw new AlgebricksException(" Unknown adapter :" + adapterName);
-                }
-                adapterFactory = (IAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
-            }
-
-            adapterFactory.configure(configuration, (ARecordType) itemType);
+            IAdapterFactory adapterFactory = AdapterFactoryProvider.getAdapterFactory(adapterName, configuration,
+                    (ARecordType) itemType);
 
             // check to see if dataset is indexed
             Index filesIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
@@ -602,11 +576,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
             throw new AlgebricksException("Can only scan datasets of records.");
         }
 
-        if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ)
-                || adapterFactory.getSupportedOperations().equals(SupportedOperation.READ_WRITE))) {
-            throw new AlgebricksException(" External dataset adapter does not support read operation");
-        }
-
+        @SuppressWarnings("rawtypes")
         ISerializerDeserializer payloadSerde = format.getSerdeProvider().getSerializerDeserializer(itemType);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { payloadSerde });
 
@@ -623,33 +593,11 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
         return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(dataScanner, constraint);
     }
 
-    @SuppressWarnings("rawtypes")
-    public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildScannerRuntime(JobSpecification jobSpec,
-            IAType itemType, IParseFileSplitsDecl decl, IDataFormat format) throws AlgebricksException {
-        if (itemType.getTypeTag() != ATypeTag.RECORD) {
-            throw new AlgebricksException("Can only scan datasets of records.");
-        }
-        ARecordType rt = (ARecordType) itemType;
-        ITupleParserFactory tupleParser = format.createTupleParser(rt, decl);
-        FileSplit[] splits = decl.getSplits();
-        IFileSplitProvider scannerSplitProvider = new ConstantFileSplitProvider(splits);
-        ISerializerDeserializer payloadSerde = format.getSerdeProvider().getSerializerDeserializer(itemType);
-        RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { payloadSerde });
-        IOperatorDescriptor scanner = new FileScanOperatorDescriptor(jobSpec, scannerSplitProvider, tupleParser,
-                scannerDesc);
-        String[] locs = new String[splits.length];
-        for (int i = 0; i < splits.length; i++) {
-            locs[i] = splits[i].getNodeName();
-        }
-        AlgebricksPartitionConstraint apc = new AlgebricksAbsolutePartitionConstraint(locs);
-        return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(scanner, apc);
-    }
-
-    public Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IFeedAdapterFactory> buildFeedIntakeRuntime(
+    public Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IAdapterFactory> buildFeedIntakeRuntime(
             JobSpecification jobSpec, PrimaryFeed primaryFeed, FeedPolicyAccessor policyAccessor) throws Exception {
-        Triple<IFeedAdapterFactory, ARecordType, AdapterType> factoryOutput = null;
+        Triple<IAdapterFactory, ARecordType, AdapterType> factoryOutput = null;
         factoryOutput = FeedUtil.getPrimaryFeedFactoryAndOutput(primaryFeed, policyAccessor, mdTxnCtx);
-        IFeedAdapterFactory adapterFactory = factoryOutput.first;
+        IAdapterFactory adapterFactory = factoryOutput.first;
         FeedIntakeOperatorDescriptor feedIngestor = null;
         switch (factoryOutput.third) {
             case INTERNAL:
@@ -665,7 +613,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
         }
 
         AlgebricksPartitionConstraint partitionConstraint = adapterFactory.getPartitionConstraint();
-        return new Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IFeedAdapterFactory>(feedIngestor,
+        return new Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IAdapterFactory>(feedIngestor,
                 partitionConstraint, adapterFactory);
     }
 
@@ -1515,7 +1463,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
                     dataverseName, datasetName, indexName, dataset.getDatasetDetails().isTemp());
 
             // Generate Output Record format
-            ISerializerDeserializer[] tokenKeyPairFields = new ISerializerDeserializer[numTokenKeyPairFields];
+            ISerializerDeserializer<?>[] tokenKeyPairFields = new ISerializerDeserializer[numTokenKeyPairFields];
             ITypeTraits[] tokenKeyPairTypeTraits = new ITypeTraits[numTokenKeyPairFields];
             ISerializerDeserializerProvider serdeProvider = FormatUtils.getDefaultFormat().getSerdeProvider();
 
@@ -2102,7 +2050,6 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
      * Calculate an estimate size of the bloom filter. Note that this is an
      * estimation which assumes that the data is going to be uniformly
      * distributed across all partitions.
-     *
      * @param dataset
      * @return Number of elements that will be used to create a bloom filter per
      *         dataset per partition
@@ -2147,24 +2094,6 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
         return SplitsAndConstraintsUtil.splitsForDataset(mdTxnCtx, dataverseName, datasetName, targetIdxName, temp);
     }
 
-    private static Map<String, String> initializeAdapterFactoryMapping() {
-        Map<String, String> adapterFactoryMapping = new HashMap<String, String>();
-        adapterFactoryMapping.put("org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter",
-                "org.apache.asterix.external.adapter.factory.NCFileSystemAdapterFactory");
-        adapterFactoryMapping.put("org.apache.asterix.external.dataset.adapter.HDFSAdapter",
-                "org.apache.asterix.external.adapter.factory.HDFSAdapterFactory");
-        adapterFactoryMapping.put("org.apache.asterix.external.dataset.adapter.PullBasedTwitterAdapter",
-                "org.apache.asterix.external.dataset.adapter.PullBasedTwitterAdapterFactory");
-        adapterFactoryMapping.put("org.apache.asterix.external.dataset.adapter.RSSFeedAdapter",
-                "org.apache.asterix.external.dataset.adapter..RSSFeedAdapterFactory");
-        adapterFactoryMapping.put("org.apache.asterix.external.dataset.adapter.CNNFeedAdapter",
-                "org.apache.asterix.external.dataset.adapter.CNNFeedAdapterFactory");
-        adapterFactoryMapping.put("org.apache.asterix.tools.external.data.RateControlledFileSystemBasedAdapter",
-                "org.apache.asterix.tools.external.data.RateControlledFileSystemBasedAdapterFactory");
-
-        return adapterFactoryMapping;
-    }
-
     public DatasourceAdapter getAdapter(MetadataTransactionContext mdTxnCtx, String dataverseName, String adapterName)
             throws MetadataException {
         DatasourceAdapter adapter = null;
@@ -2232,35 +2161,6 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
         return FormatUtils.getDefaultFormat();
     }
 
-    /**
-     * Add HDFS scheduler and the cluster location constraint into the scheduler
-     *
-     * @param properties
-     *            the original dataset properties
-     * @return a new map containing the original dataset properties and the
-     *         scheduler/locations
-     */
-    private static Map<String, Object> wrapProperties(Map<String, String> properties) {
-        Map<String, Object> wrappedProperties = new HashMap<String, Object>();
-        wrappedProperties.putAll(properties);
-        // wrappedProperties.put(SCHEDULER, hdfsScheduler);
-        // wrappedProperties.put(CLUSTER_LOCATIONS, getClusterLocations());
-        return wrappedProperties;
-    }
-
-    /**
-     * Adapt the original properties to a string-object map
-     *
-     * @param properties
-     *            the original properties
-     * @return the new stirng-object map
-     */
-    private static Map<String, Object> wrapPropertiesEmpty(Map<String, String> properties) {
-        Map<String, Object> wrappedProperties = new HashMap<String, Object>();
-        wrappedProperties.putAll(properties);
-        return wrappedProperties;
-    }
-
     public Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitProviderAndPartitionConstraintsForFilesIndex(
             String dataverseName, String datasetName, String targetIdxName, boolean create) throws AlgebricksException {
         return SplitsAndConstraintsUtil.splitProviderAndPartitionConstraintsForFilesIndex(mdTxnCtx, dataverseName,
@@ -2284,67 +2184,54 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId, Strin
             IVariableTypeEnvironment typeEnv, List<LogicalVariable> outputVars, IOperatorSchema opSchema,
             JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull)
                     throws AlgebricksException {
-        // Get data type
-        IAType itemType = null;
         try {
+            // Get data type
+            IAType itemType = null;
             itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(),
                     dataset.getDataverseName(), dataset.getItemTypeName()).getDatatype();
-        } catch (MetadataException e) {
-            e.printStackTrace();
-            throw new AlgebricksException("Unable to get item type from metadata " + e);
-        }
-        if (itemType.getTypeTag() != ATypeTag.RECORD) {
-            throw new AlgebricksException("Can only scan datasets of records.");
-        }
 
-        // Create the adapter factory <- right now there is only one. if there are more in the future, we can create a map->
-        ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
-        HDFSLookupAdapterFactory adapterFactory = new HDFSLookupAdapterFactory();
-        adapterFactory.configure(itemType, retainInput, ridIndexes, datasetDetails.getProperties(), retainNull);
+            // Create the adapter factory <- right now there is only one. if there are more in the future, we can create a map->
+            ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
+            LookupAdapterFactory<?> adapterFactory = AdapterFactoryProvider.getAdapterFactory(
+                    datasetDetails.getProperties(), (ARecordType) itemType, ridIndexes, retainInput, retainNull,
+                    context.getNullWriterFactory());
 
-        Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo;
-        try {
-            compactionInfo = DatasetUtils.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
-        } catch (MetadataException e) {
-            throw new AlgebricksException(" Unabel to create merge policy factory for external dataset", e);
-        }
-
-        boolean temp = dataset.getDatasetDetails().isTemp();
-        // Create the file index data flow helper
-        ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory(
-                compactionInfo.first, compactionInfo.second,
-                new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()),
-                AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE,
-                metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(),
-                ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, metadataProvider), !temp);
-
-        // Create the out record descriptor, appContext and fileSplitProvider for the files index
-        RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
-        IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
-        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc;
-        try {
-            spPc = metadataProvider.splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(),
-                    dataset.getDatasetName(), dataset.getDatasetName().
-
-            concat(IndexingConstants.EXTERNAL_FILE_INDEX_NAME_SUFFIX), false);
-        } catch (
+            Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo;
+            try {
+                compactionInfo = DatasetUtils.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
+            } catch (MetadataException e) {
+                throw new AlgebricksException(" Unabel to create merge policy factory for external dataset", e);
+            }
 
-        Exception e)
+            boolean temp = datasetDetails.isTemp();
+            // Create the file index data flow helper
+            ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory(
+                    compactionInfo.first, compactionInfo.second,
+                    new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()),
+                    AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE,
+                    metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(),
+                    ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, metadataProvider), !temp);
 
-        {
+            // Create the out record descriptor, appContext and fileSplitProvider for the files index
+            RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
+            IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
+            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc;
+            spPc = metadataProvider.splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(),
+                    dataset.getDatasetName(),
+                    dataset.getDatasetName().concat(IndexingConstants.EXTERNAL_FILE_INDEX_NAME_SUFFIX), false);
+            ISearchOperationCallbackFactory searchOpCallbackFactory = temp ? NoOpOperationCallbackFactory.INSTANCE
+                    : new SecondaryIndexSearchOperationCallbackFactory();
+            // Create the operator
+            ExternalLookupOperatorDescriptor op = new ExternalLookupOperatorDescriptor(jobSpec, adapterFactory,
+                    outRecDesc, indexDataflowHelperFactory, retainInput, appContext.getIndexLifecycleManagerProvider(),
+                    appContext.getStorageManagerInterface(), spPc.first, dataset.getDatasetId(),
+                    metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(), searchOpCallbackFactory,
+                    retainNull, context.getNullWriterFactory());
+
+            // Return value
+            return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(op, spPc.second);
+        } catch (Exception e) {
             throw new AlgebricksException(e);
         }
-
-        ISearchOperationCallbackFactory searchOpCallbackFactory = temp ? NoOpOperationCallbackFactory.INSTANCE
-                : new SecondaryIndexSearchOperationCallbackFactory();
-        // Create the operator
-        ExternalLoopkupOperatorDiscriptor op = new ExternalLoopkupOperatorDiscriptor(jobSpec, adapterFactory,
-                outRecDesc, indexDataflowHelperFactory, retainInput, appContext.getIndexLifecycleManagerProvider(),
-                appContext.getStorageManagerInterface(), spPc.first, dataset.getDatasetId(),
-                metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(), searchOpCallbackFactory,
-                retainNull, context.getNullWriterFactory());
-
-        // Return value
-        return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(op, spPc.second);
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapter.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapter.java
deleted file mode 100644
index f4484cf..0000000
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapter.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.metadata.declared;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.dataflow.data.nontagged.serde.ARecordSerializerDeserializer;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.util.NonTaggedFormatUtil;
-import org.apache.hyracks.algebricks.runtime.operators.base.AbstractOneInputOneOutputOneFramePushRuntime;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class FieldExtractingAdapter implements IDatasourceAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private final RecordDescriptor inRecDesc;
-
-    private final RecordDescriptor outRecDesc;
-
-    private final IDatasourceAdapter wrappedAdapter;
-
-    private final FieldExtractingPushRuntime fefw;
-
-    public FieldExtractingAdapter(IHyracksTaskContext ctx, RecordDescriptor inRecDesc, RecordDescriptor outRecDesc,
-            int[][] extractFields, ARecordType rType, IDatasourceAdapter wrappedAdapter) {
-        this.inRecDesc = inRecDesc;
-        this.outRecDesc = outRecDesc;
-        this.wrappedAdapter = wrappedAdapter;
-        fefw = new FieldExtractingPushRuntime(ctx, extractFields, rType);
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        fefw.setInputRecordDescriptor(0, inRecDesc);
-        fefw.setFrameWriter(0, writer, outRecDesc);
-        fefw.open();
-        try {
-            wrappedAdapter.start(partition, fefw);
-        } catch (Throwable t) {
-            fefw.fail();
-            throw t;
-        } finally {
-            fefw.close();
-        }
-    }
-
-    private static class FieldExtractingPushRuntime extends AbstractOneInputOneOutputOneFramePushRuntime {
-
-        private final IHyracksTaskContext ctx;
-
-        private final int[][] extractFields;
-
-        private final ARecordType rType;
-
-        private final int nullBitmapSize;
-
-        private final ArrayTupleBuilder tb;
-
-        public FieldExtractingPushRuntime(IHyracksTaskContext ctx, int[][] extractFields, ARecordType rType) {
-            this.ctx = ctx;
-            this.extractFields = extractFields;
-            this.rType = rType;
-            nullBitmapSize = ARecordType.computeNullBitmapSize(rType);
-            tb = new ArrayTupleBuilder(extractFields.length + 1);
-        }
-
-        @Override
-        public void open() throws HyracksDataException {
-            initAccessAppendRef(ctx);
-        }
-
-        @Override
-        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
-            tAccess.reset(buffer);
-            for (int i = 0; i < tAccess.getTupleCount(); ++i) {
-                tb.reset();
-                tRef.reset(tAccess, i);
-                byte[] record = tRef.getFieldData(0);
-                int recStart = tRef.getFieldStart(0);
-                int recLength = tRef.getFieldLength(0);
-                for (int f = 0; f < extractFields.length; ++f) {
-                    try {
-                        byte[] subRecord = record;
-                        int subFStart = recStart;
-                        int subFOffset = 0;
-                        boolean isNull = false;
-                        IAType subFType = rType;
-                        int subFLen = recLength;
-                        int subBitMapSize = nullBitmapSize;
-                        byte[] subRecordTmp;
-
-                        for (int j = 0; j < extractFields[f].length; j++) {
-                            //Get offset for subfield
-                            subFOffset = ARecordSerializerDeserializer.getFieldOffsetById(subRecord, subFStart,
-                                    extractFields[f][j], subBitMapSize, ((ARecordType) subFType).isOpen());
-                            if (subFOffset == 0) {
-                                tb.getDataOutput().write(ATypeTag.NULL.serialize());
-                                isNull = true;
-                                break;
-                            } else {
-                                //Get type of subfield
-                                subFType = ((ARecordType) subFType).getFieldTypes()[extractFields[f][j]];
-                                try {
-                                    //Get length of subfield
-                                    subFLen = NonTaggedFormatUtil.getFieldValueLength(subRecord,
-                                            subFStart + subFOffset, subFType.getTypeTag(), false);
-
-                                    if (j < extractFields[f].length - 1) {
-                                        subRecordTmp = new byte[subFLen + 1];
-                                        subRecordTmp[0] = subFType.getTypeTag().serialize();
-                                        System.arraycopy(subRecord, subFStart + subFOffset, subRecordTmp, 1, subFLen);
-                                        subRecord = subRecordTmp;
-                                        subFStart = 0;
-                                        subBitMapSize = ARecordType.computeNullBitmapSize((ARecordType) subFType);
-                                    }
-
-                                } catch (AsterixException e) {
-                                    throw new HyracksDataException(e);
-                                }
-                            }
-                        }
-
-                        if (!isNull) {
-                            tb.getDataOutput().write(subFType.getTypeTag().serialize());
-                            tb.getDataOutput().write(subRecord, subFStart + subFOffset, subFLen);
-                        }
-
-                    } catch (IOException e) {
-                        throw new HyracksDataException(e);
-                    }
-                    tb.addFieldEndOffset();
-                }
-                tb.addField(record, recStart, tRef.getFieldLength(0));
-                appendToFrameFromTupleBuilder(tb);
-            }
-        }
-
-        @Override
-        public void close() throws HyracksDataException {
-            flushIfNotFailed();
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapterFactory.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapterFactory.java
deleted file mode 100644
index 989e4a3..0000000
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/FieldExtractingAdapterFactory.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.metadata.declared;
-
-import java.util.Map;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory;
-import org.apache.asterix.external.adapter.factory.IAdapterFactory.SupportedOperation;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-
-public class FieldExtractingAdapterFactory implements IAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private final IAdapterFactory wrappedAdapterFactory;
-
-    private final RecordDescriptor inRecDesc;
-
-    private final RecordDescriptor outRecDesc;
-
-    private final int[][] extractFields;
-
-    private final ARecordType rType;
-
-    public FieldExtractingAdapterFactory(IAdapterFactory wrappedAdapterFactory, RecordDescriptor inRecDesc,
-            RecordDescriptor outRecDesc, int[][] extractFields, ARecordType rType) {
-        this.wrappedAdapterFactory = wrappedAdapterFactory;
-        this.inRecDesc = inRecDesc;
-        this.outRecDesc = outRecDesc;
-        this.extractFields = extractFields;
-        this.rType = rType;
-    }
-
-    @Override
-    public SupportedOperation getSupportedOperations() {
-        return wrappedAdapterFactory.getSupportedOperations();
-    }
-
-    @Override
-    public String getName() {
-        return "FieldExtractingAdapter[ " + wrappedAdapterFactory.getName() + " ]";
-    }
-
-  
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return wrappedAdapterFactory.getPartitionConstraint();
-    }
-
-    @Override
-    public IDatasourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws Exception {
-        IDatasourceAdapter wrappedAdapter = wrappedAdapterFactory.createAdapter(ctx, partition);
-        return new FieldExtractingAdapter(ctx, inRecDesc, outRecDesc, extractFields, rType, wrappedAdapter);
-    }
-    
-    @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType) throws Exception {
-        wrappedAdapterFactory.configure(configuration, outputType);
-    }
-
-    @Override
-    public ARecordType getAdapterOutputType() {
-        return wrappedAdapterFactory.getAdapterOutputType();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapter.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapter.java
deleted file mode 100644
index e0c5fc0..0000000
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/PKGeneratingAdapter.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.metadata.declared;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.asterix.builders.RecordBuilder;
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.om.base.AMutableUUID;
-import org.apache.asterix.om.base.AUUID;
-import org.apache.asterix.om.pointables.ARecordVisitablePointable;
-import org.apache.asterix.om.pointables.PointableAllocator;
-import org.apache.asterix.om.pointables.base.IVisitablePointable;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.algebricks.runtime.operators.base.AbstractOneInputOneOutputOneFramePushRuntime;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class PKGeneratingAdapter implements IDatasourceAdapter {
-
-    private static final long serialVersionUID = 1L;
-    private final RecordDescriptor inRecDesc;
-    private final RecordDescriptor outRecDesc;
-    private final IDatasourceAdapter wrappedAdapter;
-    private final PKGeneratingPushRuntime pkRuntime;
-    private final int pkIndex;
-
-    public PKGeneratingAdapter(IHyracksTaskContext ctx, RecordDescriptor inRecDesc, RecordDescriptor outRecDesc,
-            ARecordType inRecType, ARecordType outRecType, IDatasourceAdapter wrappedAdapter, int pkIndex) {
-        this.inRecDesc = inRecDesc;
-        this.outRecDesc = outRecDesc;
-        this.wrappedAdapter = wrappedAdapter;
-        this.pkRuntime = new PKGeneratingPushRuntime(ctx, inRecType, outRecType);
-        this.pkIndex = pkIndex;
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        pkRuntime.setInputRecordDescriptor(0, inRecDesc);
-        pkRuntime.setFrameWriter(0, writer, outRecDesc);
-        pkRuntime.open();
-        try {
-            wrappedAdapter.start(partition, pkRuntime);
-        } catch (Throwable t) {
-            pkRuntime.fail();
-            throw t;
-        } finally {
-            pkRuntime.close();
-        }
-    }
-
-    private class PKGeneratingPushRuntime extends AbstractOneInputOneOutputOneFramePushRuntime {
-        private final IHyracksTaskContext ctx;
-        private final ARecordType outRecType;
-        private final ArrayTupleBuilder tb;
-        private final AMutableUUID aUUID = new AMutableUUID(0, 0);
-        private final byte AUUIDTag = ATypeTag.UUID.serialize();
-        private final byte[] serializedUUID = new byte[16];
-        private final PointableAllocator pa = new PointableAllocator();
-        private final ARecordVisitablePointable recordPointable;
-        private final IAType[] outClosedTypes;
-
-        private final RecordBuilder recBuilder;
-
-        public PKGeneratingPushRuntime(IHyracksTaskContext ctx, ARecordType inRecType, ARecordType outRecType) {
-            this.ctx = ctx;
-            this.outRecType = outRecType;
-            this.tb = new ArrayTupleBuilder(2);
-            this.recBuilder = new RecordBuilder();
-            this.recordPointable = (ARecordVisitablePointable) pa.allocateRecordValue(inRecType);
-            this.outClosedTypes = outRecType.getFieldTypes();
-        }
-
-        /*
-         * We write this method in low level instead of using pre-existing libraries since this will be called for each record and to avoid 
-         * size validation
-         */
-        private void serializeUUID(AUUID aUUID, byte[] serializedUUID) {
-            long v = aUUID.getLeastSignificantBits();
-            serializedUUID[0] = (byte) (v >>> 56);
-            serializedUUID[1] = (byte) (v >>> 48);
-            serializedUUID[2] = (byte) (v >>> 40);
-            serializedUUID[3] = (byte) (v >>> 32);
-            serializedUUID[4] = (byte) (v >>> 24);
-            serializedUUID[5] = (byte) (v >>> 16);
-            serializedUUID[6] = (byte) (v >>> 8);
-            serializedUUID[7] = (byte) (v >>> 0);
-            v = aUUID.getMostSignificantBits();
-            serializedUUID[8] = (byte) (v >>> 56);
-            serializedUUID[9] = (byte) (v >>> 48);
-            serializedUUID[10] = (byte) (v >>> 40);
-            serializedUUID[11] = (byte) (v >>> 32);
-            serializedUUID[12] = (byte) (v >>> 24);
-            serializedUUID[13] = (byte) (v >>> 16);
-            serializedUUID[14] = (byte) (v >>> 8);
-            serializedUUID[15] = (byte) (v >>> 0);
-        }
-
-        @Override
-        public void open() throws HyracksDataException {
-            initAccessAppendRef(ctx);
-            recBuilder.reset(outRecType);
-            recBuilder.init();
-        }
-
-        @Override
-        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
-            try {
-                tAccess.reset(buffer);
-                for (int i = 0; i < tAccess.getTupleCount(); ++i) {
-                    tb.reset();
-                    tRef.reset(tAccess, i);
-
-                    // We need to do the following:
-                    // 1. generate a UUID
-                    // 2. fill in the first field with the UUID
-                    aUUID.nextUUID();
-                    tb.getDataOutput().writeByte(AUUIDTag);
-                    serializeUUID(aUUID, serializedUUID);
-                    tb.getDataOutput().write(serializedUUID);
-                    tb.addFieldEndOffset();
-                    // 3. fill in the second field with the record after adding to it the UUID
-                    recordPointable.set(tRef.getFieldData(0), tRef.getFieldStart(0), tRef.getFieldLength(0));
-                    // Start by closed fields
-                    int inIndex = 0;
-                    for (int f = 0; f < outClosedTypes.length; f++) {
-                        if (f == pkIndex) {
-                            recBuilder.addField(f, serializedUUID);
-                        } else {
-                            recBuilder.addField(f, recordPointable.getFieldValues().get(inIndex));
-                            inIndex++;
-                        }
-                    }
-
-                    // Add open fields
-                    if (outRecType.isOpen()) {
-                        List<IVisitablePointable> fp = recordPointable.getFieldNames();
-                        if (fp.size() >= outClosedTypes.length) {
-                            int index = outClosedTypes.length - 1;
-                            while (index < fp.size()) {
-                                recBuilder.addField(fp.get(index), recordPointable.getFieldValues().get(index));
-                                index++;
-                            }
-                        }
-                    }
-                    //write the record
-                    recBuilder.write(tb.getDataOutput(), true);
-                    tb.addFieldEndOffset();
-                    appendToFrameFromTupleBuilder(tb);
-                }
-            } catch (Exception e) {
-                throw new HyracksDataException("Error in the auto id generation and merge of the record", e);
-            }
-        }
-
-        @Override
-        public void close() throws HyracksDataException {
-            flushIfNotFailed();
-        }
-    }
-
-}


[18/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IResultCollector.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IResultCollector.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IResultCollector.java
new file mode 100755
index 0000000..9f14ec0
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IResultCollector.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.DataOutput;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.ARecord;
+import org.apache.asterix.om.base.IAObject;
+
+public interface IResultCollector {
+
+    public void writeIntResult(int result) throws AsterixException;
+
+    public void writeFloatResult(float result) throws AsterixException;
+
+    public void writeDoubleResult(double result) throws AsterixException;
+
+    public void writeStringResult(String result) throws AsterixException;
+
+    public void writeRecordResult(ARecord result) throws AsterixException;
+
+    public void writeListResult(AOrderedList list) throws AsterixException;
+
+    public IAObject getComplexTypeResultHolder();
+
+    public DataOutput getDataOutput();
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParser.java
new file mode 100644
index 0000000..31d6317
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParser.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import java.io.DataOutput;
+import java.io.InputStream;
+
+public interface IStreamDataParser extends IDataParser {
+    /**
+     * Sets the inputStream for the parser. called only for parsers that support InputStreams
+     */
+    public void setInputStream(InputStream in) throws Exception;
+
+    /**
+     * Parse data into output AsterixDataModel binary records.
+     * Used with parsers that support stream sources
+     *
+     * @param out
+     *            DataOutput instance that for writing the parser output.
+     */
+
+    public boolean parse(DataOutput out) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParserFactory.java
new file mode 100644
index 0000000..828f71e
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamDataParserFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public interface IStreamDataParserFactory extends IDataParserFactory {
+
+    public IStreamDataParser createInputStreamParser(IHyracksTaskContext ctx, int partition)
+            throws HyracksDataException, AsterixException;
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamFlowController.java
new file mode 100644
index 0000000..d368c48
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/api/IStreamFlowController.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.api;
+
+public interface IStreamFlowController extends IDataFlowController {
+    public void setStreamParser(IStreamDataParser dataParser);
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
new file mode 100644
index 0000000..d06161e
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.asterix.external.api.IDataFlowController;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public abstract class AbstractDataFlowController implements IDataFlowController {
+
+    protected ITupleForwarder tupleForwarder;
+    protected IHyracksTaskContext ctx;
+    protected Map<String, String> configuration;
+
+    @Override
+    public ITupleForwarder getTupleForwarder() {
+        return tupleForwarder;
+    }
+
+    @Override
+    public void setTupleForwarder(ITupleForwarder tupleForwarder) {
+        this.tupleForwarder = tupleForwarder;
+    }
+
+    protected void initializeTupleForwarder(IFrameWriter writer) throws HyracksDataException {
+        tupleForwarder.initialize(ctx, writer);
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, IHyracksTaskContext ctx) throws IOException {
+        this.configuration = configuration;
+        this.ctx = ctx;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/CounterTimerTupleForwarder.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/CounterTimerTupleForwarder.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/CounterTimerTupleForwarder.java
new file mode 100644
index 0000000..116ec09
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/CounterTimerTupleForwarder.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.hyracks.api.comm.IFrame;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksCommonContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
+
+public class CounterTimerTupleForwarder implements ITupleForwarder {
+
+    public static final String BATCH_SIZE = "batch-size";
+    public static final String BATCH_INTERVAL = "batch-interval";
+
+    private static final Logger LOGGER = Logger.getLogger(CounterTimerTupleForwarder.class.getName());
+
+    private FrameTupleAppender appender;
+    private IFrame frame;
+    private IFrameWriter writer;
+    private int batchSize;
+    private long batchInterval;
+    private int tuplesInFrame = 0;
+    private TimeBasedFlushTask flushTask;
+    private Timer timer;
+    private Object lock = new Object();
+    private boolean activeTimer = false;
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        String propValue = configuration.get(BATCH_SIZE);
+        if (propValue != null) {
+            batchSize = Integer.parseInt(propValue);
+        } else {
+            batchSize = -1;
+        }
+
+        propValue = configuration.get(BATCH_INTERVAL);
+        if (propValue != null) {
+            batchInterval = Long.parseLong(propValue);
+            activeTimer = true;
+        }
+    }
+
+    @Override
+    public void initialize(IHyracksCommonContext ctx, IFrameWriter writer) throws HyracksDataException {
+        this.appender = new FrameTupleAppender();
+        this.frame = new VSizeFrame(ctx);
+        appender.reset(frame, true);
+        this.writer = writer;
+        if (activeTimer) {
+            this.timer = new Timer();
+            this.flushTask = new TimeBasedFlushTask(writer, lock);
+            timer.scheduleAtFixedRate(flushTask, 0, batchInterval);
+        }
+    }
+
+    @Override
+    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
+        if (activeTimer) {
+            synchronized (lock) {
+                addTupleToFrame(tb);
+            }
+        } else {
+            addTupleToFrame(tb);
+        }
+        tuplesInFrame++;
+    }
+
+    private void addTupleToFrame(ArrayTupleBuilder tb) throws HyracksDataException {
+        if (tuplesInFrame == batchSize
+                || !appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("flushing frame containg (" + tuplesInFrame + ") tuples");
+            }
+            FrameUtils.flushFrame(frame.getBuffer(), writer);
+            tuplesInFrame = 0;
+            appender.reset(frame, true);
+            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        if (appender.getTupleCount() > 0) {
+            if (activeTimer) {
+                synchronized (lock) {
+                    FrameUtils.flushFrame(frame.getBuffer(), writer);
+                }
+            } else {
+                FrameUtils.flushFrame(frame.getBuffer(), writer);
+            }
+        }
+
+        if (timer != null) {
+            timer.cancel();
+        }
+    }
+
+    private class TimeBasedFlushTask extends TimerTask {
+
+        private IFrameWriter writer;
+        private final Object lock;
+
+        public TimeBasedFlushTask(IFrameWriter writer, Object lock) {
+            this.writer = writer;
+            this.lock = lock;
+        }
+
+        @Override
+        public void run() {
+            try {
+                if (tuplesInFrame > 0) {
+                    if (LOGGER.isLoggable(Level.INFO)) {
+                        LOGGER.info("TTL expired flushing frame (" + tuplesInFrame + ")");
+                    }
+                    synchronized (lock) {
+                        FrameUtils.flushFrame(frame.getBuffer(), writer);
+                        appender.reset(frame, true);
+                        tuplesInFrame = 0;
+                    }
+                }
+            } catch (HyracksDataException e) {
+                e.printStackTrace();
+            }
+        }
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/FrameFullTupleForwarder.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/FrameFullTupleForwarder.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/FrameFullTupleForwarder.java
new file mode 100644
index 0000000..36d41b4
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/FrameFullTupleForwarder.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import java.util.Map;
+
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.hyracks.api.comm.IFrame;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksCommonContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
+
+public class FrameFullTupleForwarder implements ITupleForwarder {
+
+    private FrameTupleAppender appender;
+    private IFrame frame;
+    private IFrameWriter writer;
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        // no-op
+    }
+
+    @Override
+    public void initialize(IHyracksCommonContext ctx, IFrameWriter writer) throws HyracksDataException {
+        this.appender = new FrameTupleAppender();
+        this.frame = new VSizeFrame(ctx);
+        this.writer = writer;
+        appender.reset(frame, true);
+    }
+
+    @Override
+    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
+        boolean success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+        if (!success) {
+            FrameUtils.flushFrame(frame.getBuffer(), writer);
+            appender.reset(frame, true);
+            success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+            if (!success) {
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        if (appender.getTupleCount() > 0) {
+            FrameUtils.flushFrame(frame.getBuffer(), writer);
+        }
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/IndexingDataFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/IndexingDataFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/IndexingDataFlowController.java
new file mode 100644
index 0000000..68c6f9b
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/IndexingDataFlowController.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IIndexingDatasource;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class IndexingDataFlowController<T> extends RecordDataFlowController<T> {
+    IExternalIndexer indexer;
+
+    @Override
+    protected void appendOtherTupleFields(ArrayTupleBuilder tb) throws Exception {
+        indexer.index(tb);
+    }
+
+    @Override
+    public void setRecordReader(IRecordReader<T> recordReader) throws Exception {
+        super.setRecordReader(recordReader);
+        indexer = ((IIndexingDatasource) recordReader).getIndexer();
+        numOfTupleFields += indexer.getNumberOfFields();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RateControlledTupleForwarder.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RateControlledTupleForwarder.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RateControlledTupleForwarder.java
new file mode 100644
index 0000000..99cc3d1
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RateControlledTupleForwarder.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import java.util.Map;
+
+import org.apache.asterix.common.parse.ITupleForwarder;
+import org.apache.hyracks.api.comm.IFrame;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksCommonContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
+
+public class RateControlledTupleForwarder implements ITupleForwarder {
+
+    private FrameTupleAppender appender;
+    private IFrame frame;
+    private IFrameWriter writer;
+    private long interTupleInterval;
+    private boolean delayConfigured;
+
+    public static final String INTER_TUPLE_INTERVAL = "tuple-interval";
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        String propValue = configuration.get(INTER_TUPLE_INTERVAL);
+        if (propValue != null) {
+            interTupleInterval = Long.parseLong(propValue);
+        }
+        delayConfigured = interTupleInterval != 0;
+    }
+
+    @Override
+    public void initialize(IHyracksCommonContext ctx, IFrameWriter writer) throws HyracksDataException {
+        this.appender = new FrameTupleAppender();
+        this.frame = new VSizeFrame(ctx);
+        this.writer = writer;
+        appender.reset(frame, true);
+    }
+
+    @Override
+    public void addTuple(ArrayTupleBuilder tb) throws HyracksDataException {
+        if (delayConfigured) {
+            try {
+                Thread.sleep(interTupleInterval);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+        boolean success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+        if (!success) {
+            FrameUtils.flushFrame(frame.getBuffer(), writer);
+            appender.reset(frame, true);
+            success = appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+            if (!success) {
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        if (appender.getTupleCount() > 0) {
+            FrameUtils.flushFrame(frame.getBuffer(), writer);
+        }
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
new file mode 100644
index 0000000..ad8e791
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordFlowController;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class RecordDataFlowController<T> extends AbstractDataFlowController implements IRecordFlowController<T> {
+
+    protected IRecordDataParser<T> dataParser;
+    protected IRecordReader<? extends T> recordReader;
+    protected int numOfTupleFields = 1;
+
+    @Override
+    public void start(IFrameWriter writer) throws HyracksDataException {
+        try {
+            ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields);
+            initializeTupleForwarder(writer);
+            while (recordReader.hasNext()) {
+                IRawRecord<? extends T> record = recordReader.next();
+                tb.reset();
+                dataParser.parse(record, tb.getDataOutput());
+                tb.addFieldEndOffset();
+                appendOtherTupleFields(tb);
+                tupleForwarder.addTuple(tb);
+            }
+            tupleForwarder.close();
+            recordReader.close();
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    protected void appendOtherTupleFields(ArrayTupleBuilder tb) throws Exception {
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+    @Override
+    public boolean handleException(Throwable th) {
+        return false;
+    }
+
+    @Override
+    public void setRecordParser(IRecordDataParser<T> dataParser) {
+        this.dataParser = dataParser;
+    }
+
+    @Override
+    public void setRecordReader(IRecordReader<T> recordReader) throws Exception {
+        this.recordReader = recordReader;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
new file mode 100644
index 0000000..3016470
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataflow;
+
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.api.IStreamFlowController;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class StreamDataFlowController extends AbstractDataFlowController implements IStreamFlowController {
+    private IStreamDataParser dataParser;
+    private static final int NUMBER_OF_TUPLE_FIELDS = 1;
+
+    @Override
+    public void start(IFrameWriter writer) throws HyracksDataException {
+        try {
+            ArrayTupleBuilder tb = new ArrayTupleBuilder(NUMBER_OF_TUPLE_FIELDS);
+            initializeTupleForwarder(writer);
+            while (true) {
+                tb.reset();
+                if (!dataParser.parse(tb.getDataOutput())) {
+                    break;
+                }
+                tb.addFieldEndOffset();
+                tupleForwarder.addTuple(tb);
+            }
+            tupleForwarder.close();
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    @Override
+    public boolean stop() {
+        return false;
+    }
+
+    @Override
+    public boolean handleException(Throwable th) {
+        return false;
+    }
+
+    @Override
+    public void setStreamParser(IStreamDataParser dataParser) {
+        this.dataParser = dataParser;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetEntity.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetEntity.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetEntity.java
deleted file mode 100644
index ede820d..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetEntity.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import com.microsoft.windowsazure.services.table.client.TableServiceEntity;
-
-public class AzureTweetEntity extends TableServiceEntity {
-
-    private String postingType;
-    private String json;
-
-    public AzureTweetEntity() {
-    }
-
-    public AzureTweetEntity(String userID, String postingID) {
-        this.partitionKey = userID;
-        this.rowKey = postingID;
-    }
-
-    public String getPostingType() {
-        return postingType;
-    }
-
-    public void setPostingType(String postingType) {
-        this.postingType = postingType;
-    }
-
-    public void setJSON(String json) {
-        this.json = json;
-    }
-
-    public String getJSON() {
-        return json;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetMetadataEntity.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetMetadataEntity.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetMetadataEntity.java
deleted file mode 100644
index ddda897..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/AzureTweetMetadataEntity.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import com.microsoft.windowsazure.services.table.client.TableServiceEntity;
-
-public class AzureTweetMetadataEntity extends TableServiceEntity {
-    private String creationTimestamp;
-    private String postingType;
-    private String productId;
-    private String ethnicity;
-    private String gender;
-    private String sentiment;
-    private String location;
-
-    public AzureTweetMetadataEntity() {
-    }
-
-    public AzureTweetMetadataEntity(String partitionKey, String rowKey) {
-        this.partitionKey = partitionKey;
-        this.rowKey = rowKey;
-    }
-
-    public String getCreationTimestamp() {
-        return creationTimestamp;
-    }
-
-    public void setCreationTimestamp(String creationTimestamp) {
-        this.creationTimestamp = creationTimestamp;
-    }
-
-    public String getPostingType() {
-        return postingType;
-    }
-
-    public void setPostingType(String postingType) {
-        this.postingType = postingType;
-    }
-
-    public String getProductId() {
-        return productId;
-    }
-
-    public void setProductId(String productId) {
-        this.productId = productId;
-    }
-
-    public String getEthnicity() {
-        return ethnicity;
-    }
-
-    public void setEthnicity(String ethnicity) {
-        this.ethnicity = ethnicity;
-    }
-
-    public String getGender() {
-        return gender;
-    }
-
-    public void setGender(String gender) {
-        this.gender = gender;
-    }
-
-    public String getSentiment() {
-        return sentiment;
-    }
-
-    public void setSentiment(String sentiment) {
-        this.sentiment = sentiment;
-    }
-
-    public String getLocation() {
-        return location;
-    }
-
-    public void setLocation(String location) {
-        this.location = location;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/ClientBasedFeedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/ClientBasedFeedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/ClientBasedFeedAdapter.java
deleted file mode 100644
index a197368..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/ClientBasedFeedAdapter.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.common.feeds.api.IFeedAdapter;
-import org.apache.asterix.common.parse.ITupleForwardPolicy;
-import org.apache.asterix.external.dataset.adapter.IFeedClient.InflowState;
-import org.apache.asterix.external.feeds.FeedPolicyEnforcer;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrame;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-
-/**
- * Acts as an abstract class for all pull-based external data adapters. Captures
- * the common logic for obtaining bytes from an external source and packing them
- * into frames as tuples.
- */
-public abstract class ClientBasedFeedAdapter implements IFeedAdapter {
-
-    private static final long serialVersionUID = 1L;
-    private static final Logger LOGGER = Logger.getLogger(ClientBasedFeedAdapter.class.getName());
-    private static final int timeout = 5; // seconds
-
-    protected ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(1);
-    protected IFeedClient pullBasedFeedClient;
-    protected ARecordType adapterOutputType;
-    protected boolean continueIngestion = true;
-    protected Map<String, String> configuration;
-
-    private FrameTupleAppender appender;
-    private IFrame frame;
-    private long tupleCount = 0;
-    private final IHyracksTaskContext ctx;
-    private int frameTupleCount = 0;
-
-    protected FeedPolicyEnforcer policyEnforcer;
-
-    public FeedPolicyEnforcer getPolicyEnforcer() {
-        return policyEnforcer;
-    }
-
-    public void setFeedPolicyEnforcer(FeedPolicyEnforcer policyEnforcer) {
-        this.policyEnforcer = policyEnforcer;
-    }
-
-    public abstract IFeedClient getFeedClient(int partition) throws Exception;
-
-    public abstract ITupleForwardPolicy getTupleParserPolicy();
-
-    public ClientBasedFeedAdapter(Map<String, String> configuration, IHyracksTaskContext ctx) {
-        this.ctx = ctx;
-        this.configuration = configuration;
-    }
-
-    public long getIngestedRecordsCount() {
-        return tupleCount;
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        appender = new FrameTupleAppender();
-        frame = new VSizeFrame(ctx);
-        appender.reset(frame, true);
-        ITupleForwardPolicy policy = getTupleParserPolicy();
-        policy.configure(configuration);
-        pullBasedFeedClient = getFeedClient(partition);
-        InflowState inflowState = null;
-        policy.initialize(ctx, writer);
-        while (continueIngestion) {
-            tupleBuilder.reset();
-            try {
-                inflowState = pullBasedFeedClient.nextTuple(tupleBuilder.getDataOutput(), timeout);
-                switch (inflowState) {
-                    case DATA_AVAILABLE:
-                        tupleBuilder.addFieldEndOffset();
-                        policy.addTuple(tupleBuilder);
-                        frameTupleCount++;
-                        break;
-                    case NO_MORE_DATA:
-                        if (LOGGER.isLoggable(Level.INFO)) {
-                            LOGGER.info("Reached end of feed");
-                        }
-                        policy.close();
-                        tupleCount += frameTupleCount;
-                        frameTupleCount = 0;
-                        continueIngestion = false;
-                        break;
-                    case DATA_NOT_AVAILABLE:
-                        if (LOGGER.isLoggable(Level.WARNING)) {
-                            LOGGER.warning("Timed out on obtaining data from pull based adapter. Trying again!");
-                        }
-                        break;
-                }
-
-            } catch (Exception failureException) {
-                try {
-                    failureException.printStackTrace();
-                    boolean continueIngestion = policyEnforcer.continueIngestionPostSoftwareFailure(failureException);
-                    if (continueIngestion) {
-                        tupleBuilder.reset();
-                        continue;
-                    } else {
-                        throw failureException;
-                    }
-                } catch (Exception recoveryException) {
-                    throw new Exception(recoveryException);
-                }
-            }
-        }
-    }
-
-    /**
-     * Discontinue the ingestion of data and end the feed.
-     * 
-     * @throws Exception
-     */
-    @Override
-    public void stop() throws Exception {
-        continueIngestion = false;
-    }
-
-    public Map<String, String> getConfiguration() {
-        return configuration;
-    }
-
-    @Override
-    public boolean handleException(Exception e) {
-        return false;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedClient.java
deleted file mode 100644
index e321b67..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedClient.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.asterix.builders.IARecordBuilder;
-import org.apache.asterix.builders.OrderedListBuilder;
-import org.apache.asterix.builders.RecordBuilder;
-import org.apache.asterix.builders.UnorderedListBuilder;
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.dataflow.data.nontagged.serde.ARecordSerializerDeserializer;
-import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
-import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.base.AInt32;
-import org.apache.asterix.om.base.AMutableDateTime;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.asterix.om.base.AMutableOrderedList;
-import org.apache.asterix.om.base.AMutablePoint;
-import org.apache.asterix.om.base.AMutableRecord;
-import org.apache.asterix.om.base.AMutableString;
-import org.apache.asterix.om.base.AMutableUnorderedList;
-import org.apache.asterix.om.base.AString;
-import org.apache.asterix.om.base.IACursor;
-import org.apache.asterix.om.base.IAObject;
-import org.apache.asterix.om.types.AOrderedListType;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.AUnorderedListType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-
-public abstract class FeedClient implements IFeedClient {
-
-    protected static final Logger LOGGER = Logger.getLogger(FeedClient.class.getName());
-
-    protected ARecordSerializerDeserializer recordSerDe;
-    protected AMutableRecord mutableRecord;
-    protected boolean messageReceived;
-    protected boolean continueIngestion = true;
-    protected IARecordBuilder recordBuilder = new RecordBuilder();
-
-    protected AMutableString aString = new AMutableString("");
-    protected AMutableInt32 aInt32 = new AMutableInt32(0);
-    protected AMutablePoint aPoint = new AMutablePoint(0, 0);
-    protected AMutableDateTime aDateTime = new AMutableDateTime(0);
-
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AString> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ASTRING);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<ABoolean> booleanSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ABOOLEAN);
-    @SuppressWarnings("unchecked")
-    protected ISerializerDeserializer<AInt32> int32Serde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT32);
-
-    public abstract InflowState retrieveNextRecord() throws Exception;
-
-    @Override
-    public InflowState nextTuple(DataOutput dataOutput, int timeout) throws AsterixException {
-        try {
-            InflowState state = null;
-            int waitCount = 0;
-            boolean continueWait = true;
-            while ((state == null || state.equals(InflowState.DATA_NOT_AVAILABLE)) && continueWait) {
-                state = retrieveNextRecord();
-                switch (state) {
-                    case DATA_AVAILABLE:
-                        recordBuilder.reset(mutableRecord.getType());
-                        recordBuilder.init();
-                        writeRecord(mutableRecord, dataOutput, recordBuilder);
-                        break;
-                    case DATA_NOT_AVAILABLE:
-                        if (waitCount > timeout) {
-                            continueWait = false;
-                        } else {
-                            if (LOGGER.isLoggable(Level.WARNING)) {
-                                LOGGER.warning("Waiting to obtain data from pull based adaptor");
-                            }
-                            Thread.sleep(1000);
-                            waitCount++;
-                        }
-                        break;
-                    case NO_MORE_DATA:
-                        break;
-                }
-            }
-            return state;
-        } catch (Exception e) {
-            throw new AsterixException(e);
-        }
-
-    }
-
-    private void writeRecord(AMutableRecord record, DataOutput dataOutput, IARecordBuilder recordBuilder)
-            throws IOException, AsterixException {
-        ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
-        int numFields = record.getType().getFieldNames().length;
-        for (int pos = 0; pos < numFields; pos++) {
-            fieldValue.reset();
-            IAObject obj = record.getValueByPos(pos);
-            writeObject(obj, fieldValue.getDataOutput());
-            recordBuilder.addField(pos, fieldValue);
-        }
-        recordBuilder.write(dataOutput, true);
-    }
-
-    private void writeObject(IAObject obj, DataOutput dataOutput) throws IOException, AsterixException {
-        switch (obj.getType().getTypeTag()) {
-            case RECORD: {
-                IARecordBuilder recordBuilder = new RecordBuilder();
-                recordBuilder.reset((ARecordType) obj.getType());
-                recordBuilder.init();
-                writeRecord((AMutableRecord) obj, dataOutput, recordBuilder);
-                break;
-            }
-
-            case ORDEREDLIST: {
-                OrderedListBuilder listBuilder = new OrderedListBuilder();
-                listBuilder.reset((AOrderedListType) ((AMutableOrderedList) obj).getType());
-                IACursor cursor = ((AMutableOrderedList) obj).getCursor();
-                ArrayBackedValueStorage listItemValue = new ArrayBackedValueStorage();
-                while (cursor.next()) {
-                    listItemValue.reset();
-                    IAObject item = cursor.get();
-                    writeObject(item, listItemValue.getDataOutput());
-                    listBuilder.addItem(listItemValue);
-                }
-                listBuilder.write(dataOutput, true);
-                break;
-            }
-
-            case UNORDEREDLIST: {
-                UnorderedListBuilder listBuilder = new UnorderedListBuilder();
-                listBuilder.reset((AUnorderedListType) ((AMutableUnorderedList) obj).getType());
-                IACursor cursor = ((AMutableUnorderedList) obj).getCursor();
-                ArrayBackedValueStorage listItemValue = new ArrayBackedValueStorage();
-                while (cursor.next()) {
-                    listItemValue.reset();
-                    IAObject item = cursor.get();
-                    writeObject(item, listItemValue.getDataOutput());
-                    listBuilder.addItem(listItemValue);
-                }
-                listBuilder.write(dataOutput, true);
-                break;
-            }
-
-            default:
-                AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(obj.getType()).serialize(obj,
-                        dataOutput);
-                break;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FileSystemBasedAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
deleted file mode 100644
index ff9af0c..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.asterix.common.feeds.api.IDatasourceAdapter;
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public abstract class FileSystemBasedAdapter implements IDatasourceAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    public static final String NODE_RESOLVER_FACTORY_PROPERTY = "node.Resolver";
-
-    public abstract InputStream getInputStream(int partition) throws IOException;
-
-    protected final ITupleParserFactory parserFactory;
-    protected ITupleParser tupleParser;
-    protected final IAType sourceDatatype;
-    protected IHyracksTaskContext ctx;
-
-    public FileSystemBasedAdapter(ITupleParserFactory parserFactory, IAType sourceDatatype, IHyracksTaskContext ctx)
-            throws HyracksDataException {
-        this.parserFactory = parserFactory;
-        this.sourceDatatype = sourceDatatype;
-        this.ctx = ctx;
-    }
-
-    @Override
-    public void start(int partition, IFrameWriter writer) throws Exception {
-        tupleParser = parserFactory.createTupleParser(ctx);
-        InputStream in = getInputStream(partition);
-        tupleParser.parse(in, writer);
-    }
-
-    public String getFilename(int partition) {
-        return null;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
new file mode 100644
index 0000000..74e98dd
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataset.adapter;
+
+import org.apache.asterix.common.feeds.api.IDataSourceAdapter;
+import org.apache.asterix.external.api.IDataFlowController;
+import org.apache.hyracks.api.comm.IFrameWriter;
+
+public class GenericAdapter implements IDataSourceAdapter {
+
+    private static final long serialVersionUID = 1L;
+    private final IDataFlowController controller;
+
+    public GenericAdapter(IDataFlowController controller) {
+        this.controller = controller;
+    }
+
+    @Override
+    public void start(int partition, IFrameWriter writer) throws Exception {
+        controller.start(writer);
+    }
+
+    @Override
+    public boolean stop() throws Exception {
+        return controller.stop();
+    }
+
+    @Override
+    public boolean handleException(Throwable e) {
+        return controller.handleException(e);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSAdapter.java
deleted file mode 100644
index 5f1b1ae..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSAdapter.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.input.GenericFileAwareRecordReader;
-import org.apache.asterix.external.indexing.input.GenericRecordReader;
-import org.apache.asterix.external.indexing.input.TextualDataReader;
-import org.apache.asterix.external.indexing.input.TextualFullScanDataReader;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * Provides functionality for fetching external data stored in an HDFS instance.
- */
-
-public class HDFSAdapter extends FileSystemBasedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private transient String[] readSchedule;
-    private transient boolean executed[];
-    private transient InputSplit[] inputSplits;
-    private transient JobConf conf;
-    private transient String nodeName;
-    private transient List<ExternalFile> files;
-    private transient Map<String, String> configuration;
-
-    public HDFSAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits, JobConf conf,
-            String nodeName, ITupleParserFactory parserFactory, IHyracksTaskContext ctx,
-            Map<String, String> configuration, List<ExternalFile> files) throws HyracksDataException {
-        super(parserFactory, atype, ctx);
-        this.readSchedule = readSchedule;
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.conf = conf;
-        this.nodeName = nodeName;
-        this.files = files;
-        this.configuration = configuration;
-    }
-
-    /*
-     * The method below was modified to take care of the following
-     * 1. when target files are not null, it generates a file aware input stream that validate against the files
-     * 2. if the data is binary, it returns a generic reader
-     */
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        if ((conf.getInputFormat() instanceof TextInputFormat
-                || conf.getInputFormat() instanceof SequenceFileInputFormat)
-                && (AsterixTupleParserFactory.FORMAT_ADM
-                        .equalsIgnoreCase(configuration.get(AsterixTupleParserFactory.KEY_FORMAT))
-                        || AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT
-                                .equalsIgnoreCase(configuration.get(AsterixTupleParserFactory.KEY_FORMAT)))) {
-            if (files != null) {
-                return new TextualDataReader(inputSplits, readSchedule, nodeName, conf, executed, files);
-            } else {
-                return new TextualFullScanDataReader(executed, inputSplits, readSchedule, nodeName, conf);
-            }
-        } else {
-            if (files != null) {
-                return new GenericFileAwareRecordReader(inputSplits, readSchedule, nodeName, conf, executed, files);
-            } else {
-                return new GenericRecordReader(inputSplits, readSchedule, nodeName, conf, executed);
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSIndexingAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSIndexingAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSIndexingAdapter.java
deleted file mode 100644
index 92a049d0..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HDFSIndexingAdapter.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.input.GenericFileAwareRecordReader;
-import org.apache.asterix.external.indexing.input.RCFileDataReader;
-import org.apache.asterix.external.indexing.input.TextualDataReader;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public class HDFSIndexingAdapter extends FileSystemBasedAdapter {
-
-    private static final long serialVersionUID = 1L;
-    private transient String[] readSchedule;
-    private transient boolean executed[];
-    private transient InputSplit[] inputSplits;
-    private transient JobConf conf;
-    private final List<ExternalFile> files;
-    private transient String nodeName;
-    // file input-format <text, seq, rc>
-    private String inputFormat;
-    // content format <adm, delimited-text, binary>
-    private String format;
-
-    public HDFSIndexingAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits,
-            JobConf conf, AlgebricksPartitionConstraint clusterLocations, List<ExternalFile> files,
-            ITupleParserFactory parserFactory, IHyracksTaskContext ctx, String nodeName, String inputFormat,
-            String format) throws IOException {
-        super(parserFactory, atype, ctx);
-        this.nodeName = nodeName;
-        this.readSchedule = readSchedule;
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.conf = conf;
-        this.files = files;
-        this.inputFormat = inputFormat;
-        this.format = format;
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        if (inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_RC)) {
-            return new RCFileDataReader(inputSplits, readSchedule, nodeName, conf, executed, files);
-        } else if (format.equals(AsterixTupleParserFactory.FORMAT_ADM)
-                || format.equals(AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT)) {
-            return new TextualDataReader(inputSplits, readSchedule, nodeName, conf, executed, files);
-        } else {
-            return new GenericFileAwareRecordReader(inputSplits, readSchedule, nodeName, conf, executed, files);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HiveAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HiveAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HiveAdapter.java
deleted file mode 100644
index 1b8024d..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/HiveAdapter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * Provides the functionality of fetching data in form of ADM records from a Hive dataset.
- */
-public class HiveAdapter extends FileSystemBasedAdapter {
-
-    private static final long serialVersionUID = 1L;
-
-    private HDFSAdapter hdfsAdapter;
-
-    public HiveAdapter(IAType atype, HDFSAdapter hdfsAdapter, ITupleParserFactory parserFactory, IHyracksTaskContext ctx)
-            throws HyracksDataException {
-        super(parserFactory, atype, ctx);
-        this.hdfsAdapter = hdfsAdapter;
-    }
-
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
-        return hdfsAdapter.getInputStream(partition);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IControlledAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IControlledAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IControlledAdapter.java
deleted file mode 100644
index e71f10c..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IControlledAdapter.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-/**
- *
- * @author alamouda
- *
- */
-public interface IControlledAdapter extends Serializable {
-    
-    /**
-     * 
-     * @param ctx
-     * @param recordDescriptors 
-     * @throws Exception
-     */
-    public void initialize(IHyracksTaskContext ctx, INullWriterFactory iNullWriterFactory) throws Exception;
-
-    /**
-     * 
-     * @param buffer
-     * @param writer
-     * @throws HyracksDataException
-     */
-    public void nextFrame(ByteBuffer buffer, IFrameWriter writer) throws Exception;
-
-    /**
-     * 
-     * @param writer
-     * @throws HyracksDataException
-     */
-    public void close(IFrameWriter writer) throws Exception;
-    
-    /**
-     * Gives the adapter a chance to clean up
-     * @param writer
-     * @throws HyracksDataException
-     */
-    public void fail() throws Exception;
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClient.java
deleted file mode 100644
index 6377960..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClient.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.DataOutput;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-
-public interface IFeedClient {
-
-    public enum InflowState {
-        NO_MORE_DATA,
-        DATA_AVAILABLE,
-        DATA_NOT_AVAILABLE
-    }
-
-    /**
-     * Writes the next fetched tuple into the provided instance of DatatOutput. Invocation of this method blocks until
-     * a new tuple has been written or the specified time has expired.
-     * 
-     * @param dataOutput
-     *            The receiving channel for the feed client to write ADM records to.
-     * @param timeout
-     *            Threshold time (expressed in seconds) for the next tuple to be obtained from the external source.
-     * @return
-     * @throws AsterixException
-     */
-    public InflowState nextTuple(DataOutput dataOutput, int timeout) throws AsterixException;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClientFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClientFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClientFactory.java
deleted file mode 100644
index b175518..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IFeedClientFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.util.Map;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-
-public interface IFeedClientFactory {
-
-    public IPullBasedFeedClient createFeedClient(IHyracksTaskContext ctx, Map<String, String> configuration)
-            throws Exception;
-
-    public ARecordType getRecordType() throws AsterixException;
-
-    public FeedClientType getFeedClientType();
-
-    public enum FeedClientType {
-        GENERIC,
-        TYPED
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IPullBasedFeedClient.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IPullBasedFeedClient.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IPullBasedFeedClient.java
deleted file mode 100644
index f6c9dad..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/IPullBasedFeedClient.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.dataset.adapter;
-
-import java.io.DataOutput;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-
-public interface IPullBasedFeedClient {
-
-    public enum InflowState {
-        NO_MORE_DATA,
-        DATA_AVAILABLE,
-        DATA_NOT_AVAILABLE
-    }
-
-    /**
-     * Writes the next fetched tuple into the provided instance of DatatOutput. Invocation of this method blocks until
-     * a new tuple has been written or the specified time has expired.
-     * 
-     * @param dataOutput
-     *            The receiving channel for the feed client to write ADM records to.
-     * @param timeout
-     *            Threshold time (expressed in seconds) for the next tuple to be obtained from the externa source.
-     * @return
-     * @throws AsterixException
-     */
-    public InflowState nextTuple(DataOutput dataOutput, int timeout) throws AsterixException;
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/LookupAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/LookupAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/LookupAdapter.java
new file mode 100644
index 0000000..ba6f83c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/LookupAdapter.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.dataset.adapter;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.asterix.external.api.ILookupRecordReader;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.indexing.RecordId;
+import org.apache.asterix.external.indexing.RecordIdReader;
+import org.apache.asterix.external.util.DataflowUtils;
+import org.apache.hyracks.api.comm.IFrameWriter;
+import org.apache.hyracks.api.comm.VSizeFrame;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.INullWriter;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+
+public final class LookupAdapter<T> implements IFrameWriter {
+
+    private boolean propagateInput;
+    private int[] propagatedFields;
+    private boolean retainNull;
+    private ArrayTupleBuilder tb;
+    private FrameTupleAppender appender;
+    private IRecordDataParser<T> dataParser;
+    private ILookupRecordReader<? extends T> recordReader;
+    private RecordIdReader ridReader;
+    private FrameTupleAccessor tupleAccessor;
+    private IFrameWriter writer;
+    private FrameTupleReference frameTuple;
+    private ArrayTupleBuilder nullTupleBuild;
+
+    public LookupAdapter(IRecordDataParser<T> dataParser, ILookupRecordReader<? extends T> recordReader,
+            RecordDescriptor inRecDesc, RecordIdReader ridReader, boolean propagateInput, int[] propagatedFields,
+            boolean retainNull, INullWriterFactory iNullWriterFactory, IHyracksTaskContext ctx, IFrameWriter writer)
+                    throws HyracksDataException {
+        this.dataParser = dataParser;
+        this.recordReader = recordReader;
+        this.propagateInput = propagateInput;
+        this.propagatedFields = propagatedFields;
+        this.retainNull = retainNull;
+        this.tupleAccessor = new FrameTupleAccessor(inRecDesc);
+        this.ridReader = ridReader;
+        ridReader.set(tupleAccessor, inRecDesc);
+        configurePropagation(iNullWriterFactory);
+        appender = new FrameTupleAppender(new VSizeFrame(ctx));
+        this.writer = writer;
+    }
+
+    private void configurePropagation(INullWriterFactory iNullWriterFactory) {
+        if (propagateInput) {
+            tb = new ArrayTupleBuilder(propagatedFields.length + 1);
+            frameTuple = new FrameTupleReference();
+        } else {
+            tb = new ArrayTupleBuilder(1);
+        }
+        if (retainNull) {
+            INullWriter nullWriter = iNullWriterFactory.createNullWriter();
+            nullTupleBuild = new ArrayTupleBuilder(1);
+            DataOutput out = nullTupleBuild.getDataOutput();
+            try {
+                nullWriter.writeNull(out);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        } else {
+            nullTupleBuild = null;
+        }
+    }
+
+    @Override
+    public void fail() throws HyracksDataException {
+        try {
+            recordReader.fail();
+        } catch (Throwable th) {
+            throw new HyracksDataException(th);
+        } finally {
+            writer.fail();
+        }
+    }
+
+    @Override
+    public void open() throws HyracksDataException {
+        writer.open();
+
+    }
+
+    @Override
+    public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+        try {
+            tupleAccessor.reset(buffer);
+            int tupleIndex = 0;
+            int tupleCount = tupleAccessor.getTupleCount();
+            while (tupleIndex < tupleCount) {
+                IRawRecord<? extends T> record = null;
+                RecordId rid = ridReader.read(tupleIndex);
+                if (rid != null) {
+                    record = recordReader.read(rid);
+                }
+                tb.reset();
+                if (propagateInput) {
+                    propagate(tupleIndex);
+                }
+                if (record != null) {
+                    dataParser.parse(record, tb.getDataOutput());
+                    tb.addFieldEndOffset();
+                    DataflowUtils.addTupleToFrame(appender, tb, writer);
+                } else if (retainNull) {
+                    tb.getDataOutput().write(nullTupleBuild.getByteArray());
+                    tb.addFieldEndOffset();
+                    DataflowUtils.addTupleToFrame(appender, tb, writer);
+                }
+                tupleIndex++;
+            }
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    private void propagate(int idx) throws IOException {
+        frameTuple.reset(tupleAccessor, idx);
+        for (int i = 0; i < propagatedFields.length; i++) {
+            tb.getDataOutput().write(frameTuple.getFieldData(propagatedFields[i]),
+                    frameTuple.getFieldStart(propagatedFields[i]), frameTuple.getFieldLength(propagatedFields[i]));
+            tb.addFieldEndOffset();
+        }
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        try {
+            appender.flush(writer, true);
+        } finally {
+            writer.close();
+        }
+    }
+}



[16/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RCRecordIdReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RCRecordIdReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RCRecordIdReader.java
new file mode 100644
index 0000000..07d09db
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RCRecordIdReader.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import org.apache.asterix.om.base.AInt32;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class RCRecordIdReader extends RecordIdReader {
+
+    public RCRecordIdReader(int[] ridFields) {
+        super(ridFields);
+    }
+
+    @Override
+    public RecordId read(int index) throws HyracksDataException {
+        if (super.read(index) == null) {
+            return null;
+        }
+        // Get row number
+        bbis.setByteBuffer(frameBuffer, tupleStartOffset
+                + tupleAccessor.getFieldStartOffset(index, ridFields[IndexingConstants.ROW_NUMBER_FIELD_INDEX]));
+        rid.setRow(
+                ((AInt32) inRecDesc.getFields()[ridFields[IndexingConstants.ROW_NUMBER_FIELD_INDEX]].deserialize(dis))
+                        .getIntegerValue());
+        return rid;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordColumnarIndexer.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordColumnarIndexer.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordColumnarIndexer.java
new file mode 100644
index 0000000..14235c0
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordColumnarIndexer.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.io.IOException;
+
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.input.record.reader.HDFSRecordReader;
+import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.base.AMutableInt64;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class RecordColumnarIndexer implements IExternalIndexer {
+
+    private static final long serialVersionUID = 1L;
+    public static final int NUM_OF_FIELDS = 3;
+    protected AMutableInt32 fileNumber = new AMutableInt32(0);
+    protected AMutableInt64 offset = new AMutableInt64(0);
+    protected long nextOffset;
+    protected AMutableInt32 rowNumber = new AMutableInt32(0);
+    protected RecordReader<?, Writable> recordReader;
+
+    @SuppressWarnings("unchecked")
+    private ISerializerDeserializer<IAObject> intSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT32);
+    @SuppressWarnings("unchecked")
+    private ISerializerDeserializer<IAObject> longSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT64);
+
+    @Override
+    public void reset(IRecordReader<?> reader) throws IOException {
+        //TODO: Make this more generic. right now, it works because we only index hdfs files.
+        @SuppressWarnings("unchecked")
+        HDFSRecordReader<?, Writable> hdfsReader = (HDFSRecordReader<?, Writable>) reader;
+        fileNumber.setValue(hdfsReader.getSnapshot().get(hdfsReader.getCurrentSplitIndex()).getFileNumber());
+        recordReader = hdfsReader.getReader();
+        offset.setValue(recordReader.getPos());
+        nextOffset = offset.getLongValue();
+        rowNumber.setValue(0);
+    }
+
+    @Override
+    public void index(ArrayTupleBuilder tb) throws IOException {
+        if (recordReader.getPos() != nextOffset) {
+            // start of a new group
+            offset.setValue(nextOffset);
+            nextOffset = recordReader.getPos();
+            rowNumber.setValue(0);
+        }
+        tb.addField(intSerde, fileNumber);
+        tb.addField(longSerde, offset);
+        tb.addField(intSerde, rowNumber);
+        rowNumber.setValue(rowNumber.getIntegerValue() + 1);
+    }
+
+    @Override
+    public int getNumberOfFields() {
+        return NUM_OF_FIELDS;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordId.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordId.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordId.java
new file mode 100644
index 0000000..9027101
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordId.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+public class RecordId {
+    public static enum RecordIdType {
+        OFFSET,
+        RC
+    }
+
+    private int fileId;
+    private long offset;
+    private int row;
+
+    public int getFileId() {
+        return fileId;
+    }
+
+    public void setFileId(int fileId) {
+        this.fileId = fileId;
+    }
+
+    public long getOffset() {
+        return offset;
+    }
+
+    public void setOffset(long offset) {
+        this.offset = offset;
+    }
+
+    public int getRow() {
+        return row;
+    }
+
+    public void setRow(int row) {
+        this.row = row;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReader.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReader.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReader.java
new file mode 100644
index 0000000..2b4cc9c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReader.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.asterix.om.base.AInt32;
+import org.apache.asterix.om.base.AInt64;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+
+public class RecordIdReader {
+
+    private final static byte nullByte = ATypeTag.NULL.serialize();
+    protected FrameTupleAccessor tupleAccessor;
+    protected int fieldSlotsLength;
+    protected int[] ridFields;
+    protected RecordId rid;
+    protected RecordDescriptor inRecDesc;
+    protected ByteBufferInputStream bbis;
+    protected DataInputStream dis;
+    protected int tupleStartOffset;
+    protected ByteBuffer frameBuffer;
+
+    public RecordIdReader(int[] ridFields) {
+        this.ridFields = ridFields;
+        this.rid = new RecordId();
+    }
+
+    public void set(FrameTupleAccessor accessor, RecordDescriptor inRecDesc) {
+        this.tupleAccessor = accessor;
+        this.fieldSlotsLength = accessor.getFieldSlotsLength();
+        this.inRecDesc = inRecDesc;
+        this.bbis = new ByteBufferInputStream();
+        this.dis = new DataInputStream(bbis);
+    }
+
+    public RecordId read(int index) throws HyracksDataException {
+        tupleStartOffset = tupleAccessor.getTupleStartOffset(index) + fieldSlotsLength;
+        int fileNumberStartOffset = tupleAccessor.getFieldStartOffset(index,
+                ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]);
+        frameBuffer = tupleAccessor.getBuffer();
+        if (frameBuffer.get(tupleStartOffset + fileNumberStartOffset) == nullByte) {
+            return null;
+        }
+        // Get file number
+        bbis.setByteBuffer(frameBuffer, tupleStartOffset + fileNumberStartOffset);
+        rid.setFileId(
+                ((AInt32) inRecDesc.getFields()[ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]].deserialize(dis))
+                        .getIntegerValue());
+        // Get record group offset
+        bbis.setByteBuffer(frameBuffer, tupleStartOffset
+                + tupleAccessor.getFieldStartOffset(index, ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]));
+        rid.setOffset(((AInt64) inRecDesc.getFields()[ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]]
+                .deserialize(dis)).getLongValue());
+        return rid;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReaderFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReaderFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReaderFactory.java
new file mode 100644
index 0000000..d0bf2ff
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/RecordIdReaderFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.indexing;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.util.HDFSUtils;
+
+public class RecordIdReaderFactory {
+
+    public static RecordIdReader create(Map<String, String> configuration, int[] ridFields) throws AsterixException {
+        switch (HDFSUtils.getRecordIdType(configuration)) {
+            case OFFSET:
+                return new RecordIdReader(ridFields);
+            case RC:
+                return new RCRecordIdReader(ridFields);
+            default:
+                throw new AsterixException("Unknown Record Id type: " + HDFSUtils.getRecordIdType(configuration));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AbstractIndexingTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AbstractIndexingTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AbstractIndexingTupleParser.java
deleted file mode 100644
index 07e09bd..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AbstractIndexingTupleParser.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataOutput;
-import java.io.InputStream;
-
-import org.apache.asterix.external.indexing.input.AbstractHDFSReader;
-import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-
-public abstract class AbstractIndexingTupleParser implements ITupleParser{
-
-    protected ArrayTupleBuilder tb;
-    protected DataOutput dos;
-    protected final FrameTupleAppender appender;
-    protected final ARecordType recType;
-    protected final IHyracksCommonContext ctx;
-    protected final IAsterixHDFSRecordParser deserializer;
-    protected final AMutableInt32 aMutableInt = new AMutableInt32(0);
-    protected final AMutableInt64 aMutableLong = new AMutableInt64(0);
-    
-    @SuppressWarnings("rawtypes")
-    protected final ISerializerDeserializer intSerde = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
-    @SuppressWarnings("rawtypes")
-    protected final ISerializerDeserializer longSerde = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
-    
-    public AbstractIndexingTupleParser(IHyracksCommonContext ctx, ARecordType recType, IAsterixHDFSRecordParser
-            deserializer) throws HyracksDataException {
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        this.recType = recType;
-        this.ctx = ctx;
-        this.deserializer = deserializer;
-    }
-
-    @Override
-    public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
-        AbstractHDFSReader inReader = (AbstractHDFSReader) in;
-        Object record;
-        try {
-            inReader.initialize();
-            record = inReader.readNext();
-            while (record != null) {
-                tb.reset();
-                deserializer.parse(record, tb.getDataOutput());
-                tb.addFieldEndOffset();
-                //append indexing fields
-                appendIndexingData(tb, inReader);
-                addTupleToFrame(writer);
-                record = inReader.readNext();
-            }
-            appender.flush(writer, true);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected abstract void appendIndexingData(ArrayTupleBuilder tb,
-            AbstractHDFSReader inReader) throws Exception;
-
-    protected void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException("Record is too big to fit in a frame");
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedControlledTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedControlledTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedControlledTupleParser.java
deleted file mode 100644
index c94be6a..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedControlledTupleParser.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.asterix.external.indexing.IndexingConstants;
-import org.apache.asterix.external.indexing.input.AbstractHDFSLookupInputStream;
-import org.apache.asterix.om.base.AInt32;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.runtime.operators.file.IDataParser;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.INullWriter;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
-import org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-/**
- * class implementation for IControlledTupleParser. It provides common
- * functionality involved in parsing data in an external text format (adm or delimited text) in a pipelined manner and packing
- * frames with formed tuples.
- */
-public class AdmOrDelimitedControlledTupleParser implements IControlledTupleParser {
-
-    private ArrayTupleBuilder tb;
-    private transient DataOutput dos;
-    private final FrameTupleAppender appender;
-    protected final ARecordType recType;
-    private IDataParser parser;
-    private boolean propagateInput;
-    private int[] propagatedFields;
-    private int[] ridFields;
-    private RecordDescriptor inRecDesc;
-    private FrameTupleAccessor tupleAccessor;
-    private FrameTupleReference frameTuple;
-    private ByteBufferInputStream bbis;
-    private DataInputStream dis;
-    private AbstractHDFSLookupInputStream in;
-    private boolean parserInitialized = false;
-    private boolean retainNull;
-    protected byte nullByte;
-    protected ArrayTupleBuilder nullTupleBuild;
-
-    public AdmOrDelimitedControlledTupleParser(IHyracksTaskContext ctx, ARecordType recType,
-            AbstractHDFSLookupInputStream in, boolean propagateInput, RecordDescriptor inRecDesc, IDataParser parser,
-            int[] propagatedFields, int[] ridFields, boolean retainNull, INullWriterFactory iNullWriterFactory)
-                    throws HyracksDataException {
-        this.recType = recType;
-        this.in = in;
-        this.propagateInput = propagateInput;
-        this.retainNull = retainNull;
-        this.inRecDesc = inRecDesc;
-        this.propagatedFields = propagatedFields;
-        this.ridFields = ridFields;
-        this.parser = parser;
-        this.tupleAccessor = new FrameTupleAccessor(inRecDesc);
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        if (propagateInput) {
-            tb = new ArrayTupleBuilder(propagatedFields.length + 1);
-        } else {
-            tb = new ArrayTupleBuilder(1);
-        }
-        frameTuple = new FrameTupleReference();
-        dos = tb.getDataOutput();
-        bbis = new ByteBufferInputStream();
-        dis = new DataInputStream(bbis);
-        nullByte = ATypeTag.NULL.serialize();
-        if (retainNull) {
-            INullWriter nullWriter = iNullWriterFactory.createNullWriter();
-            nullTupleBuild = new ArrayTupleBuilder(1);
-            DataOutput out = nullTupleBuild.getDataOutput();
-            try {
-                nullWriter.writeNull(out);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        } else {
-            nullTupleBuild = null;
-        }
-    }
-
-    @Override
-    public void close(IFrameWriter writer) throws Exception {
-        try {
-            in.close();
-            appender.flush(writer, true);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    @Override
-    public void parseNext(IFrameWriter writer, ByteBuffer frameBuffer) throws HyracksDataException {
-        try {
-            int tupleCount = 0;
-            int tupleIndex = 0;
-            tupleAccessor.reset(frameBuffer);
-            tupleCount = tupleAccessor.getTupleCount();
-            int fieldSlotsLength = tupleAccessor.getFieldSlotsLength();
-            // Loop over tuples
-            while (tupleIndex < tupleCount) {
-                boolean found = false;
-                int tupleStartOffset = tupleAccessor.getTupleStartOffset(tupleIndex) + fieldSlotsLength;
-                int fileNumberStartOffset = tupleAccessor.getFieldStartOffset(tupleIndex,
-                        ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]);
-                // Check if null <- for outer join ->
-                if (frameBuffer.get(tupleStartOffset + fileNumberStartOffset) == nullByte) {
-                } else {
-                    // Get file number
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + fileNumberStartOffset);
-                    int fileNumber = ((AInt32) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.FILE_NUMBER_FIELD_INDEX]].deserialize(dis))
-                                    .getIntegerValue();
-                    // Get record offset
-                    bbis.setByteBuffer(frameBuffer, tupleStartOffset + tupleAccessor.getFieldStartOffset(tupleIndex,
-                            ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]));
-                    long recordOffset = ((AInt64) inRecDesc
-                            .getFields()[ridFields[IndexingConstants.RECORD_OFFSET_FIELD_INDEX]].deserialize(dis))
-                                    .getLongValue();
-                    found = in.fetchRecord(fileNumber, recordOffset);
-                }
-                if (found) {
-                    // Since we now know the inputStream is ready, we can safely initialize the parser
-                    // We can't do that earlier since the parser will start pulling from the stream and if it is not ready,
-                    // The parser will automatically release its resources
-                    if (!parserInitialized) {
-                        parser.initialize(in, recType, true);
-                        parserInitialized = true;
-                    }
-                    tb.reset();
-                    if (propagateInput) {
-                        frameTuple.reset(tupleAccessor, tupleIndex);
-                        for (int i = 0; i < propagatedFields.length; i++) {
-                            dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                    frameTuple.getFieldStart(propagatedFields[i]),
-                                    frameTuple.getFieldLength(propagatedFields[i]));
-                            tb.addFieldEndOffset();
-                        }
-                    }
-                    parser.parse(tb.getDataOutput());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                } else if (propagateInput && retainNull) {
-                    tb.reset();
-                    frameTuple.reset(tupleAccessor, tupleIndex);
-                    for (int i = 0; i < propagatedFields.length; i++) {
-                        dos.write(frameTuple.getFieldData(propagatedFields[i]),
-                                frameTuple.getFieldStart(propagatedFields[i]),
-                                frameTuple.getFieldLength(propagatedFields[i]));
-                        tb.addFieldEndOffset();
-                    }
-                    dos.write(nullTupleBuild.getByteArray());
-                    tb.addFieldEndOffset();
-                    addTupleToFrame(writer);
-                }
-                tupleIndex++;
-            }
-        } catch (Exception e) {
-            // un expected error, we try to close the inputstream and throw an exception
-            try {
-                in.close();
-            } catch (IOException e1) {
-                e1.printStackTrace();
-            }
-            throw new HyracksDataException(e);
-        }
-    }
-
-    // For debugging
-    public void prettyPrint(FrameTupleAccessor tupleAccessor, RecordDescriptor recDesc) {
-        ByteBufferInputStream bbis = new ByteBufferInputStream();
-        DataInputStream dis = new DataInputStream(bbis);
-        int tc = tupleAccessor.getTupleCount();
-        System.err.println("TC: " + tc);
-        for (int i = 0; i < tc; ++i) {
-            System.err.print(
-                    i + ":(" + tupleAccessor.getTupleStartOffset(i) + ", " + tupleAccessor.getTupleEndOffset(i) + ")[");
-            for (int j = 0; j < tupleAccessor.getFieldCount(); ++j) {
-                System.err.print(j + ":(" + tupleAccessor.getFieldStartOffset(i, j) + ", "
-                        + tupleAccessor.getFieldEndOffset(i, j) + ") ");
-                System.err.print("{");
-                bbis.setByteBuffer(tupleAccessor.getBuffer(), tupleAccessor.getTupleStartOffset(i)
-                        + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(i, j));
-                try {
-                    byte tag = dis.readByte();
-                    if (tag == nullByte) {
-                        System.err.print("NULL");
-                    } else {
-                        bbis.setByteBuffer(tupleAccessor.getBuffer(), tupleAccessor.getTupleStartOffset(i)
-                                + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(i, j));
-                        System.err.print(recDesc.getFields()[j].deserialize(dis));
-                    }
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-                System.err.print("}");
-            }
-            System.err.println("]");
-        }
-    }
-
-    protected void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedIndexingTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedIndexingTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedIndexingTupleParser.java
deleted file mode 100644
index 6abcbb8..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/AdmOrDelimitedIndexingTupleParser.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.InputStream;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.indexing.input.AbstractHDFSReader;
-import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.asterix.runtime.operators.file.IDataParser;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-
-public class AdmOrDelimitedIndexingTupleParser implements ITupleParser {
-
-    private ArrayTupleBuilder tb;
-    private final FrameTupleAppender appender;
-    private final ARecordType recType;
-    private final IDataParser parser;
-    private final AMutableInt32 aMutableInt = new AMutableInt32(0);
-    private final AMutableInt64 aMutableLong = new AMutableInt64(0);
-
-    @SuppressWarnings("rawtypes")
-    private ISerializerDeserializer intSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT32);
-    @SuppressWarnings("rawtypes")
-    private ISerializerDeserializer longSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT64);
-
-    public AdmOrDelimitedIndexingTupleParser(IHyracksCommonContext ctx, ARecordType recType, IDataParser parser)
-            throws HyracksDataException {
-        this.parser = parser;
-        this.recType = recType;
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        tb = new ArrayTupleBuilder(3);
-    }
-
-    @Override
-    public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
-        // Cast the input stream to a record reader
-        AbstractHDFSReader inReader = (AbstractHDFSReader) in;
-        try {
-            parser.initialize(in, recType, true);
-            while (true) {
-                tb.reset();
-                if (!parser.parse(tb.getDataOutput())) {
-                    break;
-                }
-                tb.addFieldEndOffset();
-                appendIndexingData(tb, inReader);
-                addTupleToFrame(writer);
-            }
-            appender.flush(writer, true);
-        } catch (AsterixException ae) {
-            throw new HyracksDataException(ae);
-        } catch (Exception ioe) {
-            throw new HyracksDataException(ioe);
-        }
-    }
-
-    // This function is used to append RID to Hyracks tuple
-    @SuppressWarnings("unchecked")
-    private void appendIndexingData(ArrayTupleBuilder tb, AbstractHDFSReader inReader) throws Exception {
-        aMutableInt.setValue(inReader.getFileNumber());
-        aMutableLong.setValue(inReader.getReaderPosition());
-        tb.addField(intSerde, aMutableInt);
-        tb.addField(longSerde, aMutableLong);
-    }
-
-    private void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException("Record is too big to fit in a frame");
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
deleted file mode 100644
index 9271ebe..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.IOException;
-
-import org.apache.asterix.builders.RecordBuilder;
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.indexing.ExternalFile;
-import org.apache.asterix.external.indexing.FilesIndexDescription;
-import org.apache.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
-import org.apache.asterix.om.base.ADateTime;
-import org.apache.asterix.om.base.AInt64;
-import org.apache.asterix.om.base.AMutableDateTime;
-import org.apache.asterix.om.base.AMutableInt32;
-import org.apache.asterix.om.base.AMutableInt64;
-import org.apache.asterix.om.base.AMutableString;
-import org.apache.asterix.om.base.AString;
-import org.apache.asterix.om.types.BuiltinType;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
-import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-
-@SuppressWarnings("unchecked")
-public class FileIndexTupleTranslator {
-    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
-    private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(
-            filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
-    private RecordBuilder recordBuilder = new RecordBuilder();
-    private ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
-    private AMutableInt32 aInt32 = new AMutableInt32(0);
-    private AMutableInt64 aInt64 = new AMutableInt64(0);
-    private AMutableString aString = new AMutableString(null);
-    private AMutableDateTime aDateTime = new AMutableDateTime(0);
-    private ISerializerDeserializer<AString> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ASTRING);
-    private ISerializerDeserializer<ADateTime> dateTimeSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.ADATETIME);
-    private ISerializerDeserializer<AInt64> longSerde = AqlSerializerDeserializerProvider.INSTANCE
-            .getSerializerDeserializer(BuiltinType.AINT64);
-    private ArrayTupleReference tuple = new ArrayTupleReference();
-
-    public ITupleReference getTupleFromFile(ExternalFile file) throws IOException, AsterixException {
-        tupleBuilder.reset();
-        //File Number
-        aInt32.setValue(file.getFileNumber());
-        filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32,
-                tupleBuilder.getDataOutput());
-        tupleBuilder.addFieldEndOffset();
-
-        //File Record
-        recordBuilder.reset(filesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
-        // write field 0 (File Name)
-        fieldValue.reset();
-        aString.setValue(file.getFileName());
-        stringSerde.serialize(aString, fieldValue.getDataOutput());
-        recordBuilder.addField(0, fieldValue);
-
-        //write field 1 (File Size)
-        fieldValue.reset();
-        aInt64.setValue(file.getSize());
-        longSerde.serialize(aInt64, fieldValue.getDataOutput());
-        recordBuilder.addField(1, fieldValue);
-
-        //write field 2 (File Mod Date)
-        fieldValue.reset();
-        aDateTime.setValue(file.getLastModefiedTime().getTime());
-        dateTimeSerde.serialize(aDateTime, fieldValue.getDataOutput());
-        recordBuilder.addField(2, fieldValue);
-
-        //write the record
-        recordBuilder.write(tupleBuilder.getDataOutput(), true);
-        tupleBuilder.addFieldEndOffset();
-        tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
-        return tuple;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
deleted file mode 100644
index b38b835..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.util.Map;
-
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.external.adapter.factory.HDFSIndexingAdapterFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.runtime.operators.file.ADMDataParser;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.DelimitedDataParser;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-/**
- * This is the parser factory for parsers used to do indexing
- */
-public class HDFSIndexingParserFactory implements ITupleParserFactory {
-
-    private static final long serialVersionUID = 1L;
-    // file input-format <text, seq, rc>
-    private final String inputFormat;
-    // content format <adm, delimited-text, binary>
-    private final String format;
-    // delimiter in case of delimited text
-    private final char delimiter;
-    // quote in case of delimited text
-    private final char quote;
-    // parser class name in case of binary format
-    private final String parserClassName;
-    // the expected data type
-    private final ARecordType atype;
-    // the hadoop job conf
-    private transient JobConf jobConf;
-    // adapter arguments
-    private Map<String, String> arguments;
-
-    public HDFSIndexingParserFactory(ARecordType atype, String inputFormat, String format, char delimiter, char quote,
-            String parserClassName) {
-        this.inputFormat = inputFormat;
-        this.format = format;
-        this.parserClassName = parserClassName;
-        this.delimiter = delimiter;
-        this.quote = quote;
-        this.atype = atype;
-    }
-
-    @Override
-    public ITupleParser createTupleParser(IHyracksCommonContext ctx) throws HyracksDataException {
-        if (format == null) {
-            throw new IllegalArgumentException("Unspecified data format");
-        }
-        if (inputFormat == null) {
-            throw new IllegalArgumentException("Unspecified data format");
-        }
-        if (!inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC)
-                && !inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_TEXT)
-                && !inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE)) {
-            throw new IllegalArgumentException("External Indexing not supportd for format " + inputFormat);
-        }
-        // Do some real work here
-        /*
-         * Choices are:
-         * 1. TxtOrSeq (Object) indexing tuple parser
-         * 2. RC indexing tuple parser
-         * 3. textual data tuple parser
-         */
-        if (format.equalsIgnoreCase(AsterixTupleParserFactory.FORMAT_ADM)) {
-            // choice 3 with adm data parser
-            ADMDataParser dataParser = new ADMDataParser();
-            return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
-        } else if (format.equalsIgnoreCase(AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT)) {
-            // choice 3 with delimited data parser
-            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDelimitedDataParser(atype, delimiter, quote);
-            return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
-        }
-
-        // binary data with a special parser --> create the parser
-        IAsterixHDFSRecordParser objectParser;
-        if (parserClassName.equalsIgnoreCase(HDFSAdapterFactory.PARSER_HIVE)) {
-            objectParser = new HiveObjectParser();
-        } else {
-            try {
-                objectParser = (IAsterixHDFSRecordParser) Class.forName(parserClassName).newInstance();
-            } catch (Exception e) {
-                throw new HyracksDataException("Unable to create object parser", e);
-            }
-        }
-        try {
-            objectParser.initialize(atype, arguments, jobConf);
-        } catch (Exception e) {
-            throw new HyracksDataException("Unable to initialize object parser", e);
-        }
-
-        if (inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC)) {
-            // Case 2
-            return new RCFileIndexingTupleParser(ctx, atype, objectParser);
-        } else {
-            // Case 1
-            return new TextOrSeqIndexingTupleParser(ctx, atype, objectParser);
-        }
-    }
-
-    public JobConf getJobConf() {
-        return jobConf;
-    }
-
-    public void setJobConf(JobConf jobConf) {
-        this.jobConf = jobConf;
-    }
-
-    public Map<String, String> getArguments() {
-        return arguments;
-    }
-
-    public void setArguments(Map<String, String> arguments) {
-        this.arguments = arguments;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapter.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
deleted file mode 100644
index d9ce7aa..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.util.Map;
-
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.external.adapter.factory.HDFSIndexingAdapterFactory;
-import org.apache.asterix.external.dataset.adapter.IControlledAdapter;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.asterix.external.indexing.input.RCFileLookupReader;
-import org.apache.asterix.external.indexing.input.SequenceFileLookupInputStream;
-import org.apache.asterix.external.indexing.input.SequenceFileLookupReader;
-import org.apache.asterix.external.indexing.input.TextFileLookupInputStream;
-import org.apache.asterix.external.indexing.input.TextFileLookupReader;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.runtime.operators.file.ADMDataParser;
-import org.apache.asterix.runtime.operators.file.AsterixTupleParserFactory;
-import org.apache.asterix.runtime.operators.file.DelimitedDataParser;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
-public class HDFSLookupAdapter implements IControlledAdapter, Serializable {
-
-    private static final long serialVersionUID = 1L;
-
-    private RecordDescriptor inRecDesc;
-    private boolean propagateInput;
-    private int[] ridFields;
-    private int[] propagatedFields;
-    private IAType atype;
-    private Map<String, String> configuration;
-    private IHyracksTaskContext ctx;
-    private IControlledTupleParser parser;
-    private ExternalFileIndexAccessor fileIndexAccessor;
-    private boolean retainNull;
-
-    public HDFSLookupAdapter(IAType atype, RecordDescriptor inRecDesc, Map<String, String> adapterConfiguration,
-            boolean propagateInput, int[] ridFields, int[] propagatedFields, IHyracksTaskContext ctx,
-            ExternalFileIndexAccessor fileIndexAccessor, boolean retainNull) {
-        this.configuration = adapterConfiguration;
-        this.atype = atype;
-        this.ctx = ctx;
-        this.inRecDesc = inRecDesc;
-        this.propagatedFields = propagatedFields;
-        this.propagateInput = propagateInput;
-        this.propagatedFields = propagatedFields;
-        this.fileIndexAccessor = fileIndexAccessor;
-        this.ridFields = ridFields;
-        this.retainNull = retainNull;
-    }
-
-    /*
-     * This function is not easy to read and could be refactored into a better structure but for now it works
-     */
-    @Override
-    public void initialize(IHyracksTaskContext ctx, INullWriterFactory iNullWriterFactory) throws Exception {
-        JobConf jobConf = HDFSAdapterFactory.configureJobConf(configuration);
-        // Create the lookup reader and the controlled parser
-        if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_RC)) {
-            configureRCFile(jobConf, iNullWriterFactory);
-        } else if (configuration.get(AsterixTupleParserFactory.KEY_FORMAT)
-                .equals(AsterixTupleParserFactory.FORMAT_ADM)) {
-            // create an adm parser
-            ADMDataParser dataParser = new ADMDataParser();
-            if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)) {
-                // Text input format
-                TextFileLookupInputStream in = new TextFileLookupInputStream(fileIndexAccessor, jobConf);
-                parser = new AdmOrDelimitedControlledTupleParser(ctx, (ARecordType) atype, in, propagateInput,
-                        inRecDesc, dataParser, propagatedFields, ridFields, retainNull, iNullWriterFactory);
-            } else {
-                // Sequence input format
-                SequenceFileLookupInputStream in = new SequenceFileLookupInputStream(fileIndexAccessor, jobConf);
-                parser = new AdmOrDelimitedControlledTupleParser(ctx, (ARecordType) atype, in, propagateInput,
-                        inRecDesc, dataParser, propagatedFields, ridFields, retainNull, iNullWriterFactory);
-            }
-        } else if (configuration.get(AsterixTupleParserFactory.KEY_FORMAT)
-                .equals(AsterixTupleParserFactory.FORMAT_DELIMITED_TEXT)) {
-            // create a delimited text parser
-            char delimiter = AsterixTupleParserFactory.getDelimiter(configuration);
-            char quote = AsterixTupleParserFactory.getQuote(configuration, delimiter);
-
-            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDelimitedDataParser((ARecordType) atype,
-                    delimiter, quote);
-            if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)) {
-                // Text input format
-                TextFileLookupInputStream in = new TextFileLookupInputStream(fileIndexAccessor, jobConf);
-                parser = new AdmOrDelimitedControlledTupleParser(ctx, (ARecordType) atype, in, propagateInput,
-                        inRecDesc, dataParser, propagatedFields, ridFields, retainNull, iNullWriterFactory);
-            } else {
-                // Sequence input format
-                SequenceFileLookupInputStream in = new SequenceFileLookupInputStream(fileIndexAccessor, jobConf);
-                parser = new AdmOrDelimitedControlledTupleParser(ctx, (ARecordType) atype, in, propagateInput,
-                        inRecDesc, dataParser, propagatedFields, ridFields, retainNull, iNullWriterFactory);
-            }
-        } else {
-            configureGenericSeqOrText(jobConf, iNullWriterFactory);
-        }
-    }
-
-    private void configureGenericSeqOrText(JobConf jobConf, INullWriterFactory iNullWriterFactory) throws IOException {
-        if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)) {
-            // Text input format
-            TextFileLookupReader reader = new TextFileLookupReader(fileIndexAccessor, jobConf);
-            parser = new SeqOrTxtControlledTupleParser(ctx, createRecordParser(jobConf), reader, propagateInput,
-                    propagatedFields, inRecDesc, ridFields, retainNull, iNullWriterFactory);
-        } else {
-            // Sequence input format
-            SequenceFileLookupReader reader = new SequenceFileLookupReader(fileIndexAccessor, jobConf);
-            parser = new SeqOrTxtControlledTupleParser(ctx, createRecordParser(jobConf), reader, propagateInput,
-                    propagatedFields, inRecDesc, ridFields, retainNull, iNullWriterFactory);
-        }
-    }
-
-    @Override
-    public void nextFrame(ByteBuffer buffer, IFrameWriter writer) throws Exception {
-        parser.parseNext(writer, buffer);
-    }
-
-    @Override
-    public void close(IFrameWriter writer) throws Exception {
-        parser.close(writer);
-    }
-
-    @Override
-    public void fail() throws Exception {
-        // Do nothing
-    }
-
-    private void configureRCFile(Configuration jobConf, INullWriterFactory iNullWriterFactory)
-            throws IOException, Exception {
-        // RCFileLookupReader
-        RCFileLookupReader reader = new RCFileLookupReader(fileIndexAccessor,
-                HDFSAdapterFactory.configureJobConf(configuration));
-        parser = new RCFileControlledTupleParser(ctx, createRecordParser(jobConf), reader, propagateInput,
-                propagatedFields, inRecDesc, ridFields, retainNull, iNullWriterFactory);
-    }
-
-    private IAsterixHDFSRecordParser createRecordParser(Configuration jobConf) throws HyracksDataException {
-        // Create the record parser
-        // binary data with a special parser --> create the parser
-        IAsterixHDFSRecordParser objectParser;
-        if (configuration.get(HDFSAdapterFactory.KEY_PARSER).equals(HDFSAdapterFactory.PARSER_HIVE)) {
-            objectParser = new HiveObjectParser();
-        } else {
-            try {
-                objectParser = (IAsterixHDFSRecordParser) Class
-                        .forName(configuration.get(HDFSAdapterFactory.KEY_PARSER)).newInstance();
-            } catch (Exception e) {
-                throw new HyracksDataException("Unable to create object parser", e);
-            }
-        }
-        // initialize the parser
-        try {
-            objectParser.initialize((ARecordType) atype, configuration, jobConf);
-        } catch (Exception e) {
-            throw new HyracksDataException("Unable to initialize object parser", e);
-        }
-
-        return objectParser;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java
deleted file mode 100644
index fab507d..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.util.Map;
-
-import org.apache.asterix.external.adapter.factory.IControlledAdapterFactory;
-import org.apache.asterix.external.dataset.adapter.IControlledAdapter;
-import org.apache.asterix.external.indexing.ExternalFileIndexAccessor;
-import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.context.IHyracksTaskContext;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-
-// This class takes care of creating the adapter based on the formats and input format
-public class HDFSLookupAdapterFactory implements IControlledAdapterFactory {
-
-    private static final long serialVersionUID = 1L;
-
-    private Map<String, String> adapterConfiguration;
-    private IAType atype;
-    private boolean propagateInput;
-    private int[] ridFields;
-    private int[] propagatedFields;
-    private boolean retainNull;
-
-    @Override
-    public void configure(IAType atype, boolean propagateInput, int[] ridFields,
-            Map<String, String> adapterConfiguration, boolean retainNull) {
-        this.adapterConfiguration = adapterConfiguration;
-        this.atype = atype;
-        this.propagateInput = propagateInput;
-        this.ridFields = ridFields;
-        this.retainNull = retainNull;
-    }
-
-    @Override
-    public IControlledAdapter createAdapter(IHyracksTaskContext ctx, ExternalFileIndexAccessor fileIndexAccessor,
-            RecordDescriptor inRecDesc) {
-        if (propagateInput) {
-            configurePropagatedFields(inRecDesc);
-        }
-        return new HDFSLookupAdapter(atype, inRecDesc, adapterConfiguration, propagateInput, ridFields,
-                propagatedFields, ctx, fileIndexAccessor, retainNull);
-    }
-
-    private void configurePropagatedFields(RecordDescriptor inRecDesc) {
-        int ptr = 0;
-        boolean skip = false;
-        propagatedFields = new int[inRecDesc.getFieldCount() - ridFields.length];
-        for (int i = 0; i < inRecDesc.getFieldCount(); i++) {
-            if (ptr < ridFields.length) {
-                skip = false;
-                for (int j = 0; j < ridFields.length; j++) {
-                    if (ridFields[j] == i) {
-                        ptr++;
-                        skip = true;
-                        break;
-                    }
-                }
-                if (!skip)
-                    propagatedFields[i - ptr] = i;
-            } else {
-                propagatedFields[i - ptr] = i;
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParser.java
deleted file mode 100644
index f42a6d1..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParser.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.io.InputStream;
-
-import org.apache.asterix.common.exceptions.AsterixException;
-import org.apache.asterix.external.indexing.input.AbstractHDFSReader;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.comm.IFrameWriter;
-import org.apache.hyracks.api.comm.VSizeFrame;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-
-/*
- * This class is used with hdfs objects instead of hdfs
- */
-public class HDFSObjectTupleParser implements ITupleParser{
-
-    private ArrayTupleBuilder tb;
-    private final FrameTupleAppender appender;
-    private IAsterixHDFSRecordParser deserializer;
-
-    public HDFSObjectTupleParser(IHyracksCommonContext ctx, ARecordType recType, IAsterixHDFSRecordParser deserializer)
-            throws HyracksDataException {
-        appender = new FrameTupleAppender(new VSizeFrame(ctx));
-        this.deserializer = deserializer;
-        tb = new ArrayTupleBuilder(1);
-    }
-
-    @Override
-    public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
-        AbstractHDFSReader reader = (AbstractHDFSReader) in;
-        Object object;
-        try {
-            reader.initialize();
-            object = reader.readNext();
-            while (object!= null) {
-                tb.reset();
-                deserializer.parse(object, tb.getDataOutput());
-                tb.addFieldEndOffset();
-                addTupleToFrame(writer);
-                object = reader.readNext();
-            }
-            appender.flush(writer, true);
-        } catch (AsterixException ae) {
-            throw new HyracksDataException(ae);
-        } catch (Exception e) {
-            throw new HyracksDataException(e);
-        }
-    }
-
-    protected void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
-        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-            appender.flush(writer, true);
-            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
-                throw new IllegalStateException();
-            }
-        }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParserFactory.java
deleted file mode 100644
index ac3a92f..0000000
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSObjectTupleParserFactory.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.indexing.dataflow;
-
-import java.util.Map;
-
-import org.apache.asterix.external.adapter.factory.HDFSAdapterFactory;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.hyracks.api.context.IHyracksCommonContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.dataflow.std.file.ITupleParser;
-import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;
-
-public class HDFSObjectTupleParserFactory implements ITupleParserFactory{
-    private static final long serialVersionUID = 1L;
-    // parser class name in case of binary format
-    private String parserClassName;
-    // the expected data type
-    private ARecordType atype;
-    // the hadoop job conf
-    private HDFSAdapterFactory adapterFactory;
-    // adapter arguments
-    private Map<String,String> arguments;
-
-    public HDFSObjectTupleParserFactory(ARecordType atype, HDFSAdapterFactory adapterFactory, Map<String,String> arguments){
-        this.parserClassName = (String) arguments.get(HDFSAdapterFactory.KEY_PARSER);
-        this.atype = atype;
-        this.arguments = arguments;
-        this.adapterFactory = adapterFactory;
-    }
-
-    @Override
-    public ITupleParser createTupleParser(IHyracksCommonContext ctx) throws HyracksDataException {
-        IAsterixHDFSRecordParser objectParser;
-        if (parserClassName.equals(HDFSAdapterFactory.PARSER_HIVE)) {
-            objectParser = new HiveObjectParser();
-        } else {
-            try {
-                objectParser = (IAsterixHDFSRecordParser) Class.forName(parserClassName).newInstance();
-            } catch (Exception e) {
-                throw new HyracksDataException("Unable to create object parser", e);
-            }
-        }
-        try {
-            objectParser.initialize(atype, arguments, adapterFactory.getJobConf());
-        } catch (Exception e) {
-            throw new HyracksDataException("Unable to initialize object parser", e);
-        }
-
-        return new HDFSObjectTupleParser(ctx, atype, objectParser);
-    }
-
-}


[09/21] incubator-asterixdb git commit: First stage of external data cleanup

Posted by am...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
new file mode 100644
index 0000000..146064a
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Map;
+
+import org.apache.asterix.builders.IARecordBuilder;
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.dataflow.data.nontagged.serde.ANullSerializerDeserializer;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.util.NonTaggedFormatUtil;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
+import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+
+public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
+
+    private final IValueParserFactory[] valueParserFactories;
+    private final char fieldDelimiter;
+    private final char quote;
+    private final boolean hasHeader;
+    private ARecordType recordType;
+    private IARecordBuilder recBuilder;
+    private ArrayBackedValueStorage fieldValueBuffer;
+    private DataOutput fieldValueBufferOutput;
+    private IValueParser[] valueParsers;
+    private FieldCursorForDelimitedDataParser cursor;
+    private byte[] fieldTypeTags;
+    private int[] fldIds;
+    private ArrayBackedValueStorage[] nameBuffers;
+    private boolean areAllNullFields;
+    private boolean isStreamParser = true;
+
+    public DelimitedDataParser(IValueParserFactory[] valueParserFactories, char fieldDelimter, char quote,
+            boolean hasHeader) {
+        this.valueParserFactories = valueParserFactories;
+        this.fieldDelimiter = fieldDelimter;
+        this.quote = quote;
+        this.hasHeader = hasHeader;
+    }
+
+    @Override
+    public boolean parse(DataOutput out) throws AsterixException, IOException {
+        while (cursor.nextRecord()) {
+            parseRecord(out);
+            if (!areAllNullFields) {
+                recBuilder.write(out, true);
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private void parseRecord(DataOutput out) throws AsterixException, IOException {
+        recBuilder.reset(recordType);
+        recBuilder.init();
+        areAllNullFields = true;
+
+        for (int i = 0; i < valueParsers.length; ++i) {
+            if (!cursor.nextField()) {
+                break;
+            }
+            fieldValueBuffer.reset();
+
+            if (cursor.fStart == cursor.fEnd && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
+                    && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
+                // if the field is empty and the type is optional, insert
+                // NULL. Note that string type can also process empty field as an
+                // empty string
+                if (!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) {
+                    throw new AsterixException("At record: " + cursor.recordCount + " - Field " + cursor.fieldCount
+                            + " is not an optional type so it cannot accept null value. ");
+                }
+                fieldValueBufferOutput.writeByte(ATypeTag.NULL.serialize());
+                ANullSerializerDeserializer.INSTANCE.serialize(ANull.NULL, out);
+            } else {
+                fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
+                // Eliminate doule quotes in the field that we are going to parse
+                if (cursor.isDoubleQuoteIncludedInThisField) {
+                    cursor.eliminateDoubleQuote(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart);
+                    cursor.fEnd -= cursor.doubleQuoteCount;
+                    cursor.isDoubleQuoteIncludedInThisField = false;
+                }
+                valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart,
+                        fieldValueBufferOutput);
+                areAllNullFields = false;
+            }
+            if (fldIds[i] < 0) {
+                recBuilder.addField(nameBuffers[i], fieldValueBuffer);
+            } else {
+                recBuilder.addField(fldIds[i], fieldValueBuffer);
+            }
+        }
+    }
+
+    protected void fieldNameToBytes(String fieldName, AMutableString str, ArrayBackedValueStorage buffer)
+            throws HyracksDataException {
+        buffer.reset();
+        DataOutput out = buffer.getDataOutput();
+        str.setValue(fieldName);
+        try {
+            stringSerde.serialize(str, out);
+        } catch (IOException e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return isStreamParser ? DataSourceType.STREAM : DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType recordType) throws HyracksDataException {
+        this.recordType = recordType;
+        valueParsers = new IValueParser[valueParserFactories.length];
+        for (int i = 0; i < valueParserFactories.length; ++i) {
+            valueParsers[i] = valueParserFactories[i].createValueParser();
+        }
+
+        fieldValueBuffer = new ArrayBackedValueStorage();
+        fieldValueBufferOutput = fieldValueBuffer.getDataOutput();
+        recBuilder = new RecordBuilder();
+        recBuilder.reset(recordType);
+        recBuilder.init();
+
+        int n = recordType.getFieldNames().length;
+        fieldTypeTags = new byte[n];
+        for (int i = 0; i < n; i++) {
+            ATypeTag tag = recordType.getFieldTypes()[i].getTypeTag();
+            fieldTypeTags[i] = tag.serialize();
+        }
+
+        fldIds = new int[n];
+        nameBuffers = new ArrayBackedValueStorage[n];
+        AMutableString str = new AMutableString(null);
+        for (int i = 0; i < n; i++) {
+            String name = recordType.getFieldNames()[i];
+            fldIds[i] = recBuilder.getFieldId(name);
+            if (fldIds[i] < 0) {
+                if (!recordType.isOpen()) {
+                    throw new HyracksDataException("Illegal field " + name + " in closed type " + recordType);
+                } else {
+                    nameBuffers[i] = new ArrayBackedValueStorage();
+                    fieldNameToBytes(name, str, nameBuffers[i]);
+                }
+            }
+        }
+        isStreamParser = ExternalDataUtils.isDataSourceStreamProvider(configuration);
+        if (!isStreamParser) {
+            cursor = new FieldCursorForDelimitedDataParser(null, fieldDelimiter, quote);
+        }
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends char[]> record, DataOutput out) throws Exception {
+        cursor.nextRecord(record.get(), record.size());
+        parseRecord(out);
+        if (!areAllNullFields) {
+            recBuilder.write(out, true);
+        }
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public void setInputStream(InputStream in) throws Exception {
+        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote);
+        if (in != null && hasHeader) {
+            cursor.nextRecord();
+            while (cursor.nextField());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/HiveRecordParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/HiveRecordParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/HiveRecordParser.java
new file mode 100644
index 0000000..fb61339
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/HiveRecordParser.java
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.asterix.builders.IARecordBuilder;
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.builders.UnorderedListBuilder;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.AUnorderedListType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.util.NonTaggedFormatUtil;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.util.string.UTF8StringWriter;
+
+@SuppressWarnings("deprecation")
+public class HiveRecordParser implements IRecordDataParser<Writable> {
+
+    private ARecordType aRecord;
+    private SerDe hiveSerde;
+    private StructObjectInspector oi;
+    private IARecordBuilder recBuilder;
+    private ArrayBackedValueStorage fieldValueBuffer;
+    private ArrayBackedValueStorage listItemBuffer;
+    private byte[] fieldTypeTags;
+    private IAType[] fieldTypes;
+    private OrderedListBuilder orderedListBuilder;
+    private UnorderedListBuilder unorderedListBuilder;
+    private List<? extends StructField> fieldRefs;
+    private UTF8StringWriter utf8Writer = new UTF8StringWriter();
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType recordType) throws HyracksDataException {
+        try {
+            this.aRecord = recordType;
+            int n = aRecord.getFieldNames().length;
+            fieldTypes = aRecord.getFieldTypes();
+            JobConf hadoopConfiguration = HDFSUtils.configureHDFSJobConf(configuration);
+            //create the hive table schema.
+            Properties tbl = new Properties();
+            tbl.put(Constants.LIST_COLUMNS, getCommaDelimitedColNames(aRecord));
+            tbl.put(Constants.LIST_COLUMN_TYPES, getColTypes(aRecord));
+            String hiveSerdeClassName = configuration.get(ExternalDataConstants.KEY_HIVE_SERDE);
+            if (hiveSerdeClassName == null) {
+                throw new IllegalArgumentException("no hive serde provided for hive deserialized records");
+            }
+            hiveSerde = (SerDe) Class.forName(hiveSerdeClassName).newInstance();
+            hiveSerde.initialize(hadoopConfiguration, tbl);
+            oi = (StructObjectInspector) hiveSerde.getObjectInspector();
+
+            fieldValueBuffer = new ArrayBackedValueStorage();
+            recBuilder = new RecordBuilder();
+            recBuilder.reset(aRecord);
+            recBuilder.init();
+            fieldTypeTags = new byte[n];
+            for (int i = 0; i < n; i++) {
+                ATypeTag tag = aRecord.getFieldTypes()[i].getTypeTag();
+                fieldTypeTags[i] = tag.serialize();
+            }
+            fieldRefs = oi.getAllStructFieldRefs();
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends Writable> record, DataOutput out) throws Exception {
+        Writable hiveRawRecord = record.get();
+        Object hiveObject = hiveSerde.deserialize(hiveRawRecord);
+        int n = aRecord.getFieldNames().length;
+        List<Object> attributesValues = oi.getStructFieldsDataAsList(hiveObject);
+        recBuilder.reset(aRecord);
+        recBuilder.init();
+        for (int i = 0; i < n; i++) {
+            final Object value = attributesValues.get(i);
+            final ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector();
+            fieldValueBuffer.reset();
+            final DataOutput dataOutput = fieldValueBuffer.getDataOutput();
+            dataOutput.writeByte(fieldTypeTags[i]);
+            //get field type
+            parseItem(fieldTypes[i], value, foi, dataOutput, false);
+            recBuilder.addField(i, fieldValueBuffer);
+        }
+        recBuilder.write(out, true);
+
+    }
+
+    private void parseItem(IAType itemType, Object value, ObjectInspector foi, DataOutput dataOutput,
+            boolean primitiveOnly) throws IOException {
+        switch (itemType.getTypeTag()) {
+            case BOOLEAN:
+                parseBoolean(value, (BooleanObjectInspector) foi, dataOutput);
+                break;
+            case TIME:
+                parseTime(value, (TimestampObjectInspector) foi, dataOutput);
+                break;
+            case DATE:
+                parseDate(value, (TimestampObjectInspector) foi, dataOutput);
+                break;
+            case DATETIME:
+                parseDateTime(value, (TimestampObjectInspector) foi, dataOutput);
+                break;
+            case DOUBLE:
+                parseDouble(value, (DoubleObjectInspector) foi, dataOutput);
+                break;
+            case FLOAT:
+                parseFloat(value, (FloatObjectInspector) foi, dataOutput);
+                break;
+            case INT8:
+                parseInt8(value, (ByteObjectInspector) foi, dataOutput);
+                break;
+            case INT16:
+                parseInt16(value, (ShortObjectInspector) foi, dataOutput);
+                break;
+            case INT32:
+                parseInt32(value, (IntObjectInspector) foi, dataOutput);
+                break;
+            case INT64:
+                parseInt64(value, (LongObjectInspector) foi, dataOutput);
+                break;
+            case STRING:
+                parseString(value, (StringObjectInspector) foi, dataOutput);
+                break;
+            case ORDEREDLIST:
+                if (primitiveOnly) {
+                    throw new HyracksDataException("doesn't support hive data with list of non-primitive types");
+                }
+                parseOrderedList((AOrderedListType) itemType, value, (ListObjectInspector) foi);
+                break;
+            case UNORDEREDLIST:
+                if (primitiveOnly) {
+                    throw new HyracksDataException("doesn't support hive data with list of non-primitive types");
+                }
+                parseUnorderedList((AUnorderedListType) itemType, value, (ListObjectInspector) foi);
+                break;
+            default:
+                throw new HyracksDataException("Can't get hive type for field of type " + itemType.getTypeTag());
+        }
+    }
+
+    @Override
+    public Class<? extends Writable> getRecordClass() {
+        return Writable.class;
+    }
+
+    private Object getColTypes(ARecordType record) throws Exception {
+        int n = record.getFieldTypes().length;
+        if (n < 1) {
+            throw new HyracksDataException("Failed to get columns of record");
+        }
+        //First Column
+        String cols = getHiveTypeString(record.getFieldTypes(), 0);
+        for (int i = 1; i < n; i++) {
+            cols = cols + "," + getHiveTypeString(record.getFieldTypes(), i);
+        }
+        return cols;
+    }
+
+    private String getCommaDelimitedColNames(ARecordType record) throws Exception {
+        if (record.getFieldNames().length < 1) {
+            throw new HyracksDataException("Can't deserialize hive records with no closed columns");
+        }
+
+        String cols = record.getFieldNames()[0];
+        for (int i = 1; i < record.getFieldNames().length; i++) {
+            cols = cols + "," + record.getFieldNames()[i];
+        }
+        return cols;
+    }
+
+    private String getHiveTypeString(IAType[] types, int i) throws Exception {
+        final IAType type = types[i];
+        ATypeTag tag = type.getTypeTag();
+        if (tag == ATypeTag.UNION) {
+            if (NonTaggedFormatUtil.isOptional(type)) {
+                throw new NotImplementedException("Non-optional UNION type is not supported.");
+            }
+            tag = ((AUnionType) type).getNullableType().getTypeTag();
+        }
+        if (tag == null) {
+            throw new NotImplementedException("Failed to get the type information for field " + i + ".");
+        }
+        switch (tag) {
+            case BOOLEAN:
+                return Constants.BOOLEAN_TYPE_NAME;
+            case DATE:
+                return Constants.DATE_TYPE_NAME;
+            case DATETIME:
+                return Constants.DATETIME_TYPE_NAME;
+            case DOUBLE:
+                return Constants.DOUBLE_TYPE_NAME;
+            case FLOAT:
+                return Constants.FLOAT_TYPE_NAME;
+            case INT16:
+                return Constants.SMALLINT_TYPE_NAME;
+            case INT32:
+                return Constants.INT_TYPE_NAME;
+            case INT64:
+                return Constants.BIGINT_TYPE_NAME;
+            case INT8:
+                return Constants.TINYINT_TYPE_NAME;
+            case ORDEREDLIST:
+                return Constants.LIST_TYPE_NAME;
+            case STRING:
+                return Constants.STRING_TYPE_NAME;
+            case TIME:
+                return Constants.DATETIME_TYPE_NAME;
+            case UNORDEREDLIST:
+                return Constants.LIST_TYPE_NAME;
+            default:
+                throw new HyracksDataException("Can't get hive type for field of type " + tag);
+        }
+    }
+
+    private void parseInt64(Object obj, LongObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeLong(foi.get(obj));
+    }
+
+    private void parseInt32(Object obj, IntObjectInspector foi, DataOutput dataOutput) throws IOException {
+        if (obj == null) {
+            throw new HyracksDataException("can't parse null field");
+        }
+        dataOutput.writeInt(foi.get(obj));
+    }
+
+    private void parseInt16(Object obj, ShortObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeShort(foi.get(obj));
+    }
+
+    private void parseFloat(Object obj, FloatObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeFloat(foi.get(obj));
+    }
+
+    private void parseDouble(Object obj, DoubleObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeDouble(foi.get(obj));
+    }
+
+    private void parseDateTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeLong(foi.getPrimitiveJavaObject(obj).getTime());
+    }
+
+    private void parseDate(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
+        long chrononTimeInMs = foi.getPrimitiveJavaObject(obj).getTime();
+        short temp = 0;
+        if (chrononTimeInMs < 0 && chrononTimeInMs % GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
+            temp = 1;
+        }
+        dataOutput.writeInt((int) (chrononTimeInMs / GregorianCalendarSystem.CHRONON_OF_DAY) - temp);
+    }
+
+    private void parseBoolean(Object obj, BooleanObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeBoolean(foi.get(obj));
+    }
+
+    private void parseInt8(Object obj, ByteObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeByte(foi.get(obj));
+    }
+
+    private void parseString(Object obj, StringObjectInspector foi, DataOutput dataOutput) throws IOException {
+        utf8Writer.writeUTF8(foi.getPrimitiveJavaObject(obj), dataOutput);
+    }
+
+    private void parseTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {
+        dataOutput.writeInt((int) (foi.getPrimitiveJavaObject(obj).getTime() % 86400000));
+    }
+
+    private void parseOrderedList(AOrderedListType aOrderedListType, Object obj, ListObjectInspector foi)
+            throws IOException {
+        OrderedListBuilder orderedListBuilder = getOrderedListBuilder();
+        IAType itemType = null;
+        if (aOrderedListType != null)
+            itemType = aOrderedListType.getItemType();
+        orderedListBuilder.reset(aOrderedListType);
+
+        int n = foi.getListLength(obj);
+        for (int i = 0; i < n; i++) {
+            Object element = foi.getListElement(obj, i);
+            ObjectInspector eoi = foi.getListElementObjectInspector();
+            if (element == null) {
+                throw new HyracksDataException("can't parse hive list with null values");
+            }
+            parseItem(itemType, element, eoi, listItemBuffer.getDataOutput(), true);
+            orderedListBuilder.addItem(listItemBuffer);
+        }
+        orderedListBuilder.write(fieldValueBuffer.getDataOutput(), true);
+    }
+
+    private void parseUnorderedList(AUnorderedListType uoltype, Object obj, ListObjectInspector oi) throws IOException {
+        UnorderedListBuilder unorderedListBuilder = getUnorderedListBuilder();
+        IAType itemType = null;
+        if (uoltype != null)
+            itemType = uoltype.getItemType();
+        byte tagByte = itemType.getTypeTag().serialize();
+        unorderedListBuilder.reset(uoltype);
+
+        int n = oi.getListLength(obj);
+        for (int i = 0; i < n; i++) {
+            Object element = oi.getListElement(obj, i);
+            ObjectInspector eoi = oi.getListElementObjectInspector();
+            if (element == null) {
+                throw new HyracksDataException("can't parse hive list with null values");
+            }
+            listItemBuffer.reset();
+            final DataOutput dataOutput = listItemBuffer.getDataOutput();
+            dataOutput.writeByte(tagByte);
+            parseItem(itemType, element, eoi, dataOutput, true);
+            unorderedListBuilder.addItem(listItemBuffer);
+        }
+        unorderedListBuilder.write(fieldValueBuffer.getDataOutput(), true);
+    }
+
+    private OrderedListBuilder getOrderedListBuilder() {
+        if (orderedListBuilder != null)
+            return orderedListBuilder;
+        else {
+            orderedListBuilder = new OrderedListBuilder();
+            return orderedListBuilder;
+        }
+    }
+
+    private UnorderedListBuilder getUnorderedListBuilder() {
+        if (unorderedListBuilder != null)
+            return unorderedListBuilder;
+        else {
+            unorderedListBuilder = new UnorderedListBuilder();
+            return unorderedListBuilder;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/RSSParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/RSSParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/RSSParser.java
new file mode 100644
index 0000000..4d93dc5
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/RSSParser.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.external.api.IDataParser;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.om.base.AMutableRecord;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+import com.sun.syndication.feed.synd.SyndEntryImpl;
+
+public class RSSParser implements IRecordDataParser<SyndEntryImpl> {
+    private long id = 0;
+    private String idPrefix;
+    private AMutableString[] mutableFields;
+    private String[] tupleFieldValues;
+    private AMutableRecord mutableRecord;
+    private RecordBuilder recordBuilder = new RecordBuilder();
+    private int numFields;
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType recordType)
+            throws HyracksDataException, IOException {
+        mutableFields = new AMutableString[] { new AMutableString(null), new AMutableString(null),
+                new AMutableString(null), new AMutableString(null) };
+        mutableRecord = new AMutableRecord(recordType, mutableFields);
+        tupleFieldValues = new String[recordType.getFieldNames().length];
+        numFields = recordType.getFieldNames().length;
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends SyndEntryImpl> record, DataOutput out) throws Exception {
+        SyndEntryImpl entry = record.get();
+        tupleFieldValues[0] = idPrefix + ":" + id;
+        tupleFieldValues[1] = entry.getTitle();
+        tupleFieldValues[2] = entry.getDescription().getValue();
+        tupleFieldValues[3] = entry.getLink();
+        for (int i = 0; i < numFields; i++) {
+            mutableFields[i].setValue(tupleFieldValues[i]);
+            mutableRecord.setValueAtPos(i, mutableFields[i]);
+        }
+        recordBuilder.reset(mutableRecord.getType());
+        recordBuilder.init();
+        IDataParser.writeRecord(mutableRecord, out, recordBuilder);
+        id++;
+    }
+
+    @Override
+    public Class<? extends SyndEntryImpl> getRecordClass() {
+        return SyndEntryImpl.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
new file mode 100644
index 0000000..b9cd60b
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/TweetParser.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.asterix.builders.RecordBuilder;
+import org.apache.asterix.external.api.IDataParser;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.library.java.JObjectUtil;
+import org.apache.asterix.external.util.Datatypes.Tweet;
+import org.apache.asterix.om.base.AMutableDouble;
+import org.apache.asterix.om.base.AMutableInt32;
+import org.apache.asterix.om.base.AMutableRecord;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+import twitter4j.Status;
+import twitter4j.User;
+
+public class TweetParser implements IRecordDataParser<Status> {
+
+    private IAObject[] mutableTweetFields;
+    private IAObject[] mutableUserFields;
+    private AMutableRecord mutableRecord;
+    private AMutableRecord mutableUser;
+    private final Map<String, Integer> userFieldNameMap = new HashMap<>();
+    private final Map<String, Integer> tweetFieldNameMap = new HashMap<>();
+    private RecordBuilder recordBuilder = new RecordBuilder();
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration, ARecordType recordType)
+            throws HyracksDataException, IOException {
+        initFieldNames(recordType);
+        mutableUserFields = new IAObject[] { new AMutableString(null), new AMutableString(null), new AMutableInt32(0),
+                new AMutableInt32(0), new AMutableString(null), new AMutableInt32(0) };
+        mutableUser = new AMutableRecord((ARecordType) recordType.getFieldTypes()[tweetFieldNameMap.get(Tweet.USER)],
+                mutableUserFields);
+
+        mutableTweetFields = new IAObject[] { new AMutableString(null), mutableUser, new AMutableDouble(0),
+                new AMutableDouble(0), new AMutableString(null), new AMutableString(null) };
+        mutableRecord = new AMutableRecord(recordType, mutableTweetFields);
+
+    }
+
+    // Initialize the hashmap values for the field names and positions
+    private void initFieldNames(ARecordType recordType) {
+        String tweetFields[] = recordType.getFieldNames();
+        for (int i = 0; i < tweetFields.length; i++) {
+            tweetFieldNameMap.put(tweetFields[i], i);
+            if (tweetFields[i].equals(Tweet.USER)) {
+                IAType fieldType = recordType.getFieldTypes()[i];
+                if (fieldType.getTypeTag() == ATypeTag.RECORD) {
+                    String userFields[] = ((ARecordType) fieldType).getFieldNames();
+                    for (int j = 0; j < userFields.length; j++) {
+                        userFieldNameMap.put(userFields[j], j);
+                    }
+                }
+
+            }
+        }
+    }
+
+    @Override
+    public void parse(IRawRecord<? extends Status> record, DataOutput out) throws Exception {
+        Status tweet = record.get();
+        User user = tweet.getUser();
+        // Tweet user data
+        ((AMutableString) mutableUserFields[userFieldNameMap.get(Tweet.SCREEN_NAME)])
+                .setValue(JObjectUtil.getNormalizedString(user.getScreenName()));
+        ((AMutableString) mutableUserFields[userFieldNameMap.get(Tweet.LANGUAGE)])
+                .setValue(JObjectUtil.getNormalizedString(user.getLang()));
+        ((AMutableInt32) mutableUserFields[userFieldNameMap.get(Tweet.FRIENDS_COUNT)]).setValue(user.getFriendsCount());
+        ((AMutableInt32) mutableUserFields[userFieldNameMap.get(Tweet.STATUS_COUNT)]).setValue(user.getStatusesCount());
+        ((AMutableString) mutableUserFields[userFieldNameMap.get(Tweet.NAME)])
+                .setValue(JObjectUtil.getNormalizedString(user.getName()));
+        ((AMutableInt32) mutableUserFields[userFieldNameMap.get(Tweet.FOLLOWERS_COUNT)])
+                .setValue(user.getFollowersCount());
+
+        // Tweet data
+        ((AMutableString) mutableTweetFields[tweetFieldNameMap.get(Tweet.ID)]).setValue(String.valueOf(tweet.getId()));
+
+        int userPos = tweetFieldNameMap.get(Tweet.USER);
+        for (int i = 0; i < mutableUserFields.length; i++) {
+            ((AMutableRecord) mutableTweetFields[userPos]).setValueAtPos(i, mutableUserFields[i]);
+        }
+        if (tweet.getGeoLocation() != null) {
+            ((AMutableDouble) mutableTweetFields[tweetFieldNameMap.get(Tweet.LATITUDE)])
+                    .setValue(tweet.getGeoLocation().getLatitude());
+            ((AMutableDouble) mutableTweetFields[tweetFieldNameMap.get(Tweet.LONGITUDE)])
+                    .setValue(tweet.getGeoLocation().getLongitude());
+        } else {
+            ((AMutableDouble) mutableTweetFields[tweetFieldNameMap.get(Tweet.LATITUDE)]).setValue(0);
+            ((AMutableDouble) mutableTweetFields[tweetFieldNameMap.get(Tweet.LONGITUDE)]).setValue(0);
+        }
+        ((AMutableString) mutableTweetFields[tweetFieldNameMap.get(Tweet.CREATED_AT)])
+                .setValue(JObjectUtil.getNormalizedString(tweet.getCreatedAt().toString()));
+        ((AMutableString) mutableTweetFields[tweetFieldNameMap.get(Tweet.MESSAGE)])
+                .setValue(JObjectUtil.getNormalizedString(tweet.getText()));
+
+        for (int i = 0; i < mutableTweetFields.length; i++) {
+            mutableRecord.setValueAtPos(i, mutableTweetFields[i]);
+        }
+        recordBuilder.reset(mutableRecord.getType());
+        recordBuilder.init();
+        IDataParser.writeRecord(mutableRecord, out, recordBuilder);
+    }
+
+    @Override
+    public Class<? extends Status> getRecordClass() {
+        return Status.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
new file mode 100644
index 0000000..4634278
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.parser.ADMDataParser;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class ADMDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException {
+        return createParser();
+    }
+
+    private ADMDataParser createParser() throws HyracksDataException {
+        try {
+            ADMDataParser parser = new ADMDataParser();
+            parser.configure(configuration, recordType);
+            return parser;
+        } catch (Exception e) {
+            throw new HyracksDataException(e);
+        }
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public IStreamDataParser createInputStreamParser(IHyracksTaskContext ctx, int partition)
+            throws HyracksDataException {
+        return createParser();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java
new file mode 100644
index 0000000..43af455
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/AbstractRecordStreamParserFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.api.IStreamDataParserFactory;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.om.types.ARecordType;
+
+public abstract class AbstractRecordStreamParserFactory<T>
+        implements IStreamDataParserFactory, IRecordDataParserFactory<T> {
+
+    private static final long serialVersionUID = 1L;
+    protected ARecordType recordType;
+    protected Map<String, String> configuration;
+
+    @Override
+    public DataSourceType getDataSourceType() throws AsterixException {
+        return ExternalDataUtils.getDataSourceType(configuration);
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public void setRecordType(ARecordType recordType) {
+        this.recordType = recordType;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
new file mode 100644
index 0000000..fa63d45
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.parser.DelimitedDataParser;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+
+public class DelimitedDataParserFactory extends AbstractRecordStreamParserFactory<char[]> {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx)
+            throws HyracksDataException, AsterixException {
+        return createParser();
+    }
+
+    private DelimitedDataParser createParser() throws HyracksDataException, AsterixException {
+        IValueParserFactory[] valueParserFactories = ExternalDataUtils.getValueParserFactories(recordType);
+        Character delimiter = DelimitedDataParserFactory.getDelimiter(configuration);
+        char quote = DelimitedDataParserFactory.getQuote(configuration, delimiter);
+        boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
+        DelimitedDataParser parser = new DelimitedDataParser(valueParserFactories, delimiter, quote, hasHeader);
+        parser.configure(configuration, recordType);
+        return parser;
+    }
+
+    @Override
+    public Class<? extends char[]> getRecordClass() {
+        return char[].class;
+    }
+
+    @Override
+    public IStreamDataParser createInputStreamParser(IHyracksTaskContext ctx, int partition)
+            throws HyracksDataException, AsterixException {
+        return createParser();
+    }
+
+    // Get a delimiter from the given configuration
+    public static char getDelimiter(Map<String, String> configuration) throws AsterixException {
+        String delimiterValue = configuration.get(ExternalDataConstants.KEY_DELIMITER);
+        if (delimiterValue == null) {
+            delimiterValue = ExternalDataConstants.DEFAULT_DELIMITER;
+        } else if (delimiterValue.length() != 1) {
+            throw new AsterixException(
+                    "'" + delimiterValue + "' is not a valid delimiter. The length of a delimiter should be 1.");
+        }
+        return delimiterValue.charAt(0);
+    }
+
+    // Get a quote from the given configuration when the delimiter is given
+    // Need to pass delimiter to check whether they share the same character
+    public static char getQuote(Map<String, String> configuration, char delimiter) throws AsterixException {
+        String quoteValue = configuration.get(ExternalDataConstants.KEY_QUOTE);
+        if (quoteValue == null) {
+            quoteValue = ExternalDataConstants.DEFAULT_QUOTE;
+        } else if (quoteValue.length() != 1) {
+            throw new AsterixException("'" + quoteValue + "' is not a valid quote. The length of a quote should be 1.");
+        }
+
+        // Since delimiter (char type value) can't be null,
+        // we only check whether delimiter and quote use the same character
+        if (quoteValue.charAt(0) == delimiter) {
+            throw new AsterixException(
+                    "Quote '" + quoteValue + "' cannot be used with the delimiter '" + delimiter + "'. ");
+        }
+
+        return quoteValue.charAt(0);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
new file mode 100644
index 0000000..f07ba4c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.parser.HiveRecordParser;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hadoop.io.Writable;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class HiveDataParserFactory implements IRecordDataParserFactory<Writable> {
+
+    private static final long serialVersionUID = 1L;
+    private Map<String, String> configuration;
+    private ARecordType recordType;
+
+    @Override
+    public DataSourceType getDataSourceType() {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public void setRecordType(ARecordType recordType) {
+        this.recordType = recordType;
+    }
+
+    @Override
+    public IRecordDataParser<Writable> createRecordParser(IHyracksTaskContext ctx)
+            throws HyracksDataException, AsterixException {
+        HiveRecordParser hiveParser = new HiveRecordParser();
+        hiveParser.configure(configuration, recordType);
+        return hiveParser;
+    }
+
+    @Override
+    public Class<? extends Writable> getRecordClass() {
+        return Writable.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
new file mode 100644
index 0000000..fecb0de
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.parser.RSSParser;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+import com.sun.syndication.feed.synd.SyndEntryImpl;
+
+public class RSSParserFactory implements IRecordDataParserFactory<SyndEntryImpl> {
+
+    private static final long serialVersionUID = 1L;
+    private ARecordType recordType;
+    private Map<String, String> configuration;
+
+    @Override
+    public DataSourceType getDataSourceType() throws AsterixException {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public void setRecordType(ARecordType recordType) {
+        this.recordType = recordType;
+    }
+
+    @Override
+    public IRecordDataParser<SyndEntryImpl> createRecordParser(IHyracksTaskContext ctx)
+            throws AsterixException, IOException {
+        RSSParser dataParser = new RSSParser();
+        dataParser.configure(configuration, recordType);
+        return dataParser;
+    }
+
+    @Override
+    public Class<? extends SyndEntryImpl> getRecordClass() {
+        return SyndEntryImpl.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
new file mode 100644
index 0000000..0f3b309
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.factory;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.parser.TweetParser;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+import twitter4j.Status;
+
+public class TweetParserFactory implements IRecordDataParserFactory<Status> {
+
+    private static final long serialVersionUID = 1L;
+    private ARecordType recordType;
+    private Map<String, String> configuration;
+
+    @Override
+    public DataSourceType getDataSourceType() throws AsterixException {
+        return DataSourceType.RECORDS;
+    }
+
+    @Override
+    public void configure(Map<String, String> configuration) throws Exception {
+        this.configuration = configuration;
+    }
+
+    @Override
+    public void setRecordType(ARecordType recordType) {
+        this.recordType = recordType;
+    }
+
+    @Override
+    public IRecordDataParser<Status> createRecordParser(IHyracksTaskContext ctx) throws AsterixException, IOException {
+        TweetParser dataParser = new TweetParser();
+        dataParser.configure(configuration, recordType);
+        return dataParser;
+    }
+
+    @Override
+    public Class<? extends Status> getRecordClass() {
+        return Status.class;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
new file mode 100644
index 0000000..649ca43
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/AdapterFactoryProvider.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.provider;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.adapter.factory.GenericAdapterFactory;
+import org.apache.asterix.external.adapter.factory.LookupAdapterFactory;
+import org.apache.asterix.external.api.IAdapterFactory;
+import org.apache.asterix.external.api.IIndexingAdapterFactory;
+import org.apache.asterix.external.dataset.adapter.GenericAdapter;
+import org.apache.asterix.external.indexing.ExternalFile;
+import org.apache.asterix.external.library.ExternalLibraryManager;
+import org.apache.asterix.external.runtime.GenericSocketFeedAdapter;
+import org.apache.asterix.external.runtime.GenericSocketFeedAdapterFactory;
+import org.apache.asterix.external.runtime.SocketClientAdapter;
+import org.apache.asterix.external.runtime.SocketClientAdapterFactory;
+import org.apache.asterix.external.util.ExternalDataCompatibilityUtils;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.dataflow.value.INullWriterFactory;
+
+public class AdapterFactoryProvider {
+
+    public static final Map<String, Class<? extends IAdapterFactory>> adapterFactories = initializeAdapterFactoryMapping();
+
+    private static Map<String, Class<? extends IAdapterFactory>> initializeAdapterFactoryMapping() {
+        Map<String, Class<? extends IAdapterFactory>> adapterFactories = new HashMap<String, Class<? extends IAdapterFactory>>();
+        // Class names
+        adapterFactories.put(GenericAdapter.class.getName(), GenericAdapterFactory.class);
+        adapterFactories.put(GenericSocketFeedAdapter.class.getName(), GenericSocketFeedAdapterFactory.class);
+        adapterFactories.put(SocketClientAdapter.class.getName(), SocketClientAdapterFactory.class);
+
+        // Aliases
+        adapterFactories.put(ExternalDataConstants.ALIAS_GENERIC_ADAPTER, GenericAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ALIAS_HDFS_ADAPTER, GenericAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ALIAS_LOCALFS_ADAPTER, GenericAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ALIAS_SOCKET_ADAPTER, GenericSocketFeedAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ALIAS_SOCKET_CLIENT_ADAPTER, SocketClientAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ALIAS_FILE_FEED_ADAPTER, GenericAdapterFactory.class);
+
+        // Compatability
+        adapterFactories.put(ExternalDataConstants.ADAPTER_HDFS_CLASSNAME, GenericAdapterFactory.class);
+        adapterFactories.put(ExternalDataConstants.ADAPTER_LOCALFS_CLASSNAME, GenericAdapterFactory.class);
+        return adapterFactories;
+    }
+
+    public static IAdapterFactory getAdapterFactory(String adapterClassname, Map<String, String> configuration,
+            ARecordType itemType) throws Exception {
+        ExternalDataCompatibilityUtils.addCompatabilityParameters(adapterClassname, itemType, configuration);
+        if (!adapterFactories.containsKey(adapterClassname)) {
+            throw new AsterixException("Unknown adapter: " + adapterClassname);
+        }
+        IAdapterFactory adapterFactory = adapterFactories.get(adapterClassname).newInstance();
+        adapterFactory.configure(configuration, itemType);
+        return adapterFactory;
+    }
+
+    public static IIndexingAdapterFactory getAdapterFactory(String adapterClassname, Map<String, String> configuration,
+            ARecordType itemType, List<ExternalFile> snapshot, boolean indexingOp)
+                    throws AsterixException, InstantiationException, IllegalAccessException {
+        ExternalDataCompatibilityUtils.addCompatabilityParameters(adapterClassname, itemType, configuration);
+        if (!adapterFactories.containsKey(adapterClassname)) {
+            throw new AsterixException("Unknown adapter");
+        }
+        try {
+            IIndexingAdapterFactory adapterFactory = (IIndexingAdapterFactory) adapterFactories.get(adapterClassname)
+                    .newInstance();
+            adapterFactory.setSnapshot(snapshot, indexingOp);
+            adapterFactory.configure(configuration, itemType);
+            return adapterFactory;
+        } catch (Exception e) {
+            throw new AsterixException("Failed to create indexing adapter factory.", e);
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public static void addNewAdapter(String dataverseName, String adapterClassName, String adapterAlias,
+            String adapterFactoryClassName, String libraryName) throws ClassNotFoundException {
+        ClassLoader classLoader = ExternalLibraryManager.getLibraryClassLoader(dataverseName, libraryName);
+        Class<? extends IAdapterFactory> adapterFactoryClass = (Class<? extends IAdapterFactory>) classLoader
+                .loadClass(adapterFactoryClassName);
+        adapterFactories.put(adapterClassName, adapterFactoryClass);
+        adapterFactories.put(adapterAlias, adapterFactoryClass);
+    }
+
+    public static LookupAdapterFactory<?> getAdapterFactory(Map<String, String> configuration, ARecordType recordType,
+            int[] ridFields, boolean retainInput, boolean retainNull, INullWriterFactory iNullWriterFactory)
+                    throws Exception {
+        LookupAdapterFactory<?> adapterFactory = new LookupAdapterFactory<>(recordType, ridFields, retainInput,
+                retainNull, iNullWriterFactory);
+        adapterFactory.configure(configuration);
+        return adapterFactory;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DataflowControllerProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DataflowControllerProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DataflowControllerProvider.java
new file mode 100644
index 0000000..68a3942
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DataflowControllerProvider.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.provider;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataFlowController;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.api.IExternalDataSourceFactory;
+import org.apache.asterix.external.api.IInputStreamProvider;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.IRecordDataParser;
+import org.apache.asterix.external.api.IRecordDataParserFactory;
+import org.apache.asterix.external.api.IRecordReader;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.api.IStreamDataParser;
+import org.apache.asterix.external.api.IStreamDataParserFactory;
+import org.apache.asterix.external.dataflow.IndexingDataFlowController;
+import org.apache.asterix.external.dataflow.RecordDataFlowController;
+import org.apache.asterix.external.dataflow.StreamDataFlowController;
+import org.apache.asterix.external.util.DataflowUtils;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+
+public class DataflowControllerProvider {
+
+    /**
+     * Order of calls:
+     * 1. Constructor()
+     * 2. configure(configuration,ctx)
+     * 3. setTupleForwarder(forwarder)
+     * 4. if record flow controller
+     * |-a. Set record reader
+     * |-b. Set record parser
+     * else
+     * |-a. Set stream parser
+     * 5. start(writer)
+     */
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    public static IDataFlowController getDataflowController(ARecordType recordType, IHyracksTaskContext ctx,
+            int partition, IExternalDataSourceFactory dataSourceFactory, IDataParserFactory dataParserFactory,
+            Map<String, String> configuration, boolean indexingOp) throws Exception {
+        switch (dataSourceFactory.getDataSourceType()) {
+            case RECORDS:
+                RecordDataFlowController recordDataFlowController;
+                if (indexingOp) {
+                    recordDataFlowController = new IndexingDataFlowController();
+                } else {
+                    recordDataFlowController = new RecordDataFlowController();
+                }
+                recordDataFlowController.configure(configuration, ctx);
+                recordDataFlowController.setTupleForwarder(DataflowUtils.getTupleForwarder(configuration));
+                IRecordReaderFactory<?> recordReaderFactory = (IRecordReaderFactory<?>) dataSourceFactory;
+                IRecordReader<?> recordReader = recordReaderFactory.createRecordReader(ctx, partition);
+                IRecordDataParserFactory<?> recordParserFactory = (IRecordDataParserFactory<?>) dataParserFactory;
+                IRecordDataParser<?> dataParser = recordParserFactory.createRecordParser(ctx);
+                dataParser.configure(configuration, recordType);
+                recordDataFlowController.setRecordReader(recordReader);
+                recordDataFlowController.setRecordParser(dataParser);
+                return recordDataFlowController;
+            case STREAM:
+                StreamDataFlowController streamDataFlowController = new StreamDataFlowController();
+                streamDataFlowController.configure(configuration, ctx);
+                streamDataFlowController.setTupleForwarder(DataflowUtils.getTupleForwarder(configuration));
+                IInputStreamProviderFactory streamProviderFactory = (IInputStreamProviderFactory) dataSourceFactory;
+                IInputStreamProvider streamProvider = streamProviderFactory.createInputStreamProvider(ctx, partition);
+                IStreamDataParserFactory streamParserFactory = (IStreamDataParserFactory) dataParserFactory;
+                streamParserFactory.configure(configuration);
+                IStreamDataParser streamParser = streamParserFactory.createInputStreamParser(ctx, partition);
+                streamParser.configure(configuration, recordType);
+                streamParser.setInputStream(streamProvider.getInputStream());
+                streamDataFlowController.setStreamParser(streamParser);
+                return streamDataFlowController;
+            default:
+                throw new AsterixException("Unknown data source type: " + dataSourceFactory.getDataSourceType());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
new file mode 100644
index 0000000..c69e12c
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.provider;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalDataSourceFactory;
+import org.apache.asterix.external.api.IInputStreamProviderFactory;
+import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.input.HDFSDataSourceFactory;
+import org.apache.asterix.external.input.record.reader.factory.LineRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.factory.SemiStructuredRecordReaderFactory;
+import org.apache.asterix.external.input.stream.factory.LocalFSInputStreamProviderFactory;
+import org.apache.asterix.external.input.stream.factory.SocketInputStreamProviderFactory;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+
+public class DatasourceFactoryProvider {
+
+    public static IExternalDataSourceFactory getExternalDataSourceFactory(Map<String, String> configuration)
+            throws Exception {
+        switch (ExternalDataUtils.getDataSourceType(configuration)) {
+            case RECORDS:
+                return DatasourceFactoryProvider.getRecordReaderFactory(configuration);
+            case STREAM:
+                return DatasourceFactoryProvider
+                        .getInputStreamFactory(configuration.get(ExternalDataConstants.KEY_STREAM), configuration);
+        }
+        return null;
+    }
+
+    public static IInputStreamProviderFactory getInputStreamFactory(String stream, Map<String, String> configuration)
+            throws Exception {
+        IInputStreamProviderFactory streamFactory;
+        if (ExternalDataUtils.isExternal(stream)) {
+            String dataverse = ExternalDataUtils.getDataverse(configuration);
+            streamFactory = ExternalDataUtils.createExternalInputStreamFactory(dataverse, stream);
+        } else {
+            switch (stream) {
+                case ExternalDataConstants.STREAM_HDFS:
+                    streamFactory = new HDFSDataSourceFactory();
+                    break;
+                case ExternalDataConstants.STREAM_LOCAL_FILESYSTEM:
+                    streamFactory = new LocalFSInputStreamProviderFactory();
+                    break;
+                case ExternalDataConstants.STREAM_SOCKET:
+                    streamFactory = new SocketInputStreamProviderFactory();
+                    break;
+                default:
+                    throw new AsterixException("unknown input stream factory");
+            }
+        }
+        return streamFactory;
+    }
+
+    public static IRecordReaderFactory<?> getRecordReaderFactory(Map<String, String> configuration) throws Exception {
+        String reader = configuration.get(ExternalDataConstants.KEY_READER);
+        IRecordReaderFactory<?> readerFactory;
+        if (ExternalDataUtils.isExternal(reader)) {
+            String dataverse = ExternalDataUtils.getDataverse(configuration);
+            readerFactory = ExternalDataUtils.createExternalRecordReaderFactory(dataverse, reader);
+        } else {
+            switch (reader) {
+                case ExternalDataConstants.READER_HDFS:
+                    readerFactory = new HDFSDataSourceFactory();
+                    break;
+                case ExternalDataConstants.READER_ADM:
+                case ExternalDataConstants.READER_SEMISTRUCTURED:
+                    readerFactory = new SemiStructuredRecordReaderFactory()
+                            .setInputStreamFactoryProvider(DatasourceFactoryProvider.getInputStreamFactory(
+                                    ExternalDataUtils.getRecordReaderStreamName(configuration), configuration));
+                    break;
+                case ExternalDataConstants.READER_DELIMITED:
+                    readerFactory = new LineRecordReaderFactory()
+                            .setInputStreamFactoryProvider(DatasourceFactoryProvider.getInputStreamFactory(
+                                    ExternalDataUtils.getRecordReaderStreamName(configuration), configuration));;
+                    break;
+                default:
+                    throw new AsterixException("unknown input stream factory");
+            }
+        }
+        return readerFactory;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ExternalIndexerProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ExternalIndexerProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ExternalIndexerProvider.java
new file mode 100644
index 0000000..3c090a6
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ExternalIndexerProvider.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.provider;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IExternalIndexer;
+import org.apache.asterix.external.indexing.FileOffsetIndexer;
+import org.apache.asterix.external.indexing.RecordColumnarIndexer;
+import org.apache.asterix.external.util.ExternalDataConstants;
+
+public class ExternalIndexerProvider {
+
+    public static IExternalIndexer getIndexer(Map<String, String> configuration) throws AsterixException {
+        String inputFormatParameter = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim();
+        if (inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.INPUT_FORMAT_TEXT)
+                || inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.CLASS_NAME_TEXT_INPUT_FORMAT)
+                || inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.INPUT_FORMAT_SEQUENCE)
+                || inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT)) {
+            return new FileOffsetIndexer();
+        } else if (inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.INPUT_FORMAT_RC)
+                || inputFormatParameter.equalsIgnoreCase(ExternalDataConstants.CLASS_NAME_RC_INPUT_FORMAT)) {
+            return new RecordColumnarIndexer();
+        } else {
+            throw new AsterixException("Unable to create indexer for data with format: " + inputFormatParameter);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/284590ed/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
new file mode 100644
index 0000000..f5a0512
--- /dev/null
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.provider;
+
+import java.util.Map;
+
+import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.external.api.IDataParserFactory;
+import org.apache.asterix.external.parser.factory.ADMDataParserFactory;
+import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory;
+import org.apache.asterix.external.parser.factory.HiveDataParserFactory;
+import org.apache.asterix.external.parser.factory.RSSParserFactory;
+import org.apache.asterix.external.parser.factory.TweetParserFactory;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+
+public class ParserFactoryProvider {
+    public static IDataParserFactory getDataParserFactory(Map<String, String> configuration)
+            throws InstantiationException, IllegalAccessException, ClassNotFoundException, AsterixException {
+        IDataParserFactory parserFactory = null;
+        String parserFactoryName = configuration.get(ExternalDataConstants.KEY_DATA_PARSER);
+        if (parserFactoryName != null && ExternalDataUtils.isExternal(parserFactoryName)) {
+            return ExternalDataUtils.createExternalParserFactory(ExternalDataUtils.getDataverse(configuration),
+                    parserFactoryName);
+        } else {
+            parserFactory = ParserFactoryProvider.getParserFactory(configuration);
+        }
+        return parserFactory;
+    }
+
+    private static IDataParserFactory getParserFactory(Map<String, String> configuration) throws AsterixException {
+        String recordFormat = ExternalDataUtils.getRecordFormat(configuration);
+        switch (recordFormat) {
+            case ExternalDataConstants.FORMAT_ADM:
+            case ExternalDataConstants.FORMAT_JSON:
+                return new ADMDataParserFactory();
+            case ExternalDataConstants.FORMAT_DELIMITED_TEXT:
+                return new DelimitedDataParserFactory();
+            case ExternalDataConstants.FORMAT_HIVE:
+                return new HiveDataParserFactory();
+            case ExternalDataConstants.FORMAT_TWEET:
+                return new TweetParserFactory();
+            case ExternalDataConstants.FORMAT_RSS:
+                return new RSSParserFactory();
+            default:
+                throw new AsterixException("Unknown data format");
+        }
+    }
+}