You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:48:53 UTC
[37/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build
for nutch-core and nutch-plugins
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java
new file mode 100644
index 0000000..04821e7
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.impl;
+
+import java.text.MessageFormat;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Nutch;
+import org.apache.nutch.service.model.request.JobConfig;
+import org.apache.nutch.service.model.response.JobInfo;
+import org.apache.nutch.service.model.response.JobInfo.State;
+import org.apache.nutch.service.resources.ConfigResource;
+import org.apache.nutch.util.NutchTool;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class JobWorker implements Runnable{
+
+ private JobInfo jobInfo;
+ private JobConfig jobConfig;
+ private static final Logger LOG = LoggerFactory.getLogger(JobWorker.class);
+ private NutchTool tool;
+
+ /**
+ * To initialize JobWorker thread with the Job Configurations provided by user.
+ * @param jobConfig
+ * @param conf
+ * @param tool - NutchTool to run
+ */
+ public JobWorker(JobConfig jobConfig, Configuration conf, NutchTool tool) {
+ this.jobConfig = jobConfig;
+ this.tool = tool;
+ if (jobConfig.getConfId() == null) {
+ jobConfig.setConfId(ConfigResource.DEFAULT);
+ }
+
+ jobInfo = new JobInfo(generateId(), jobConfig, State.IDLE, "idle");
+ if (jobConfig.getCrawlId() != null) {
+ conf.set(Nutch.CRAWL_ID_KEY, jobConfig.getCrawlId());
+ }
+ }
+
+ private String generateId() {
+ if (jobConfig.getCrawlId() == null) {
+ return MessageFormat.format("{0}-{1}-{2}", jobConfig.getConfId(),
+ jobConfig.getType(), String.valueOf(hashCode()));
+ }
+ return MessageFormat.format("{0}-{1}-{2}-{3}", jobConfig.getCrawlId(),
+ jobConfig.getConfId(), jobConfig.getType(), String.valueOf(hashCode()));
+ }
+
+ @Override
+ public void run() {
+ try {
+ getInfo().setState(State.RUNNING);
+ getInfo().setMsg("OK");
+ getInfo().setResult(tool.run(getInfo().getArgs(), getInfo().getCrawlId()));
+ getInfo().setState(State.FINISHED);
+ } catch (Exception e) {
+ LOG.error("Cannot run job worker!", e);
+ getInfo().setMsg("ERROR: " + e.toString());
+ getInfo().setState(State.FAILED);
+ }
+ }
+
+ public JobInfo getInfo() {
+ return jobInfo;
+ }
+
+ /**
+ * To stop the executing job
+ * @return boolean true/false
+ */
+ public boolean stopJob() {
+ getInfo().setState(State.STOPPING);
+ try {
+ return tool.stopJob();
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Cannot stop job with id " + getInfo().getId(), e);
+ }
+ }
+
+ public boolean killJob() {
+ getInfo().setState(State.KILLING);
+ try {
+ boolean result = tool.killJob();
+ getInfo().setState(State.KILLED);
+ return result;
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Cannot kill job with id " + getInfo().getId(), e);
+ }
+ }
+
+ public void setInfo(JobInfo jobInfo) {
+ this.jobInfo = jobInfo;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java
new file mode 100644
index 0000000..cc88501
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.impl;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.ws.rs.WebApplicationException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.scoring.webgraph.LinkDatum;
+import org.apache.nutch.service.NutchReader;
+
+public class LinkReader implements NutchReader{
+
+ @Override
+ public List read(String path) throws FileNotFoundException {
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ LinkDatum value = new LinkDatum();
+
+ while(reader.next(key, value)) {
+ try {
+ HashMap<String, String> t_row = getLinksRow(key,value);
+ rows.add(t_row);
+ }
+ catch (Exception e) {
+ }
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+ }
+
+ @Override
+ public List head(String path, int nrows) throws FileNotFoundException {
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ LinkDatum value = new LinkDatum();
+ int i = 0;
+ while(reader.next(key, value) && i<nrows) {
+
+ HashMap<String, String> t_row = getLinksRow(key,value);
+ rows.add(t_row);
+
+ i++;
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+ }
+
+ @Override
+ public List slice(String path, int start, int end)
+ throws FileNotFoundException {
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ LinkDatum value = new LinkDatum();
+ int i = 0;
+
+ for(;i<start && reader.next(key, value);i++){} // increment to read start position
+ while(reader.next(key, value) && i<end) {
+ HashMap<String, String> t_row = getLinksRow(key,value);
+ rows.add(t_row);
+
+ i++;
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+ }
+
+ @Override
+ public int count(String path) throws FileNotFoundException {
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ int i = 0;
+ try {
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value = (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
+
+ while(reader.next(key, value)) {
+ i++;
+ }
+ reader.close();
+ } catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+ return i;
+ }
+
+ private HashMap<String, String> getLinksRow(Writable key, LinkDatum value) {
+ HashMap<String, String> t_row = new HashMap<String, String>();
+ t_row.put("key_url", key.toString());
+ t_row.put("url", value.getUrl());
+ t_row.put("anchor", value.getAnchor());
+ t_row.put("score", String.valueOf(value.getScore()));
+ t_row.put("timestamp", String.valueOf(value.getTimestamp()));
+ t_row.put("linktype", String.valueOf(value.getLinkType()));
+
+ return t_row;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java
new file mode 100644
index 0000000..2155a16
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java
@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.impl;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.ws.rs.WebApplicationException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.nutch.scoring.webgraph.Node;
+import org.apache.nutch.service.NutchReader;
+
+public class NodeReader implements NutchReader {
+
+ @Override
+ public List read(String path) throws FileNotFoundException {
+ // TODO Auto-generated method stub
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Node value = new Node();
+
+ while(reader.next(key, value)) {
+ try {
+ HashMap<String, String> t_row = getNodeRow(key,value);
+ rows.add(t_row);
+ }
+ catch (Exception e) {
+ }
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+
+ }
+
+ @Override
+ public List head(String path, int nrows) throws FileNotFoundException {
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Node value = new Node();
+ int i = 0;
+ while(reader.next(key, value) && i<nrows) {
+ HashMap<String, String> t_row = getNodeRow(key,value);
+ rows.add(t_row);
+
+ i++;
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+ }
+
+ @Override
+ public List slice(String path, int start, int end)
+ throws FileNotFoundException {
+ List<HashMap> rows=new ArrayList<HashMap>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Node value = new Node();
+ int i = 0;
+
+ for(;i<start && reader.next(key, value);i++){} // increment to read start position
+ while(reader.next(key, value) && i<end) {
+ HashMap<String, String> t_row = getNodeRow(key,value);
+ rows.add(t_row);
+
+ i++;
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return rows;
+ }
+
+ @Override
+ public int count(String path) throws FileNotFoundException {
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ int i =0;
+ try{
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key = (Writable)
+ ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Node value = new Node();
+
+ while(reader.next(key, value)) {
+ i++;
+ }
+ reader.close();
+
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+
+ return i;
+ }
+
+ private HashMap<String, String> getNodeRow(Writable key, Node value) {
+ HashMap<String, String> t_row = new HashMap<String, String>();
+ t_row.put("key_url", key.toString());
+ t_row.put("num_inlinks", String.valueOf(value.getNumInlinks()) );
+ t_row.put("num_outlinks", String.valueOf(value.getNumOutlinks()) );
+ t_row.put("inlink_score", String.valueOf(value.getInlinkScore()));
+ t_row.put("outlink_score", String.valueOf(value.getOutlinkScore()));
+ t_row.put("metadata", value.getMetadata().toString());
+
+ return t_row;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
new file mode 100644
index 0000000..3fc5ba3
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.impl;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Queue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.nutch.service.model.response.JobInfo;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Queues;
+
+
+
+public class NutchServerPoolExecutor extends ThreadPoolExecutor{
+
+ private Queue<JobWorker> workersHistory;
+ private Queue<JobWorker> runningWorkers;
+
+ public NutchServerPoolExecutor(int corePoolSize, int maxPoolSize, long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue){
+ super(corePoolSize, maxPoolSize, keepAliveTime, unit, workQueue);
+ workersHistory = Queues.newArrayBlockingQueue(maxPoolSize);
+ runningWorkers = Queues.newArrayBlockingQueue(maxPoolSize);
+ }
+
+ @Override
+ protected void beforeExecute(Thread thread, Runnable runnable) {
+ super.beforeExecute(thread, runnable);
+ synchronized (runningWorkers) {
+ runningWorkers.offer(((JobWorker) runnable));
+ }
+ }
+ @Override
+ protected void afterExecute(Runnable runnable, Throwable throwable) {
+ super.afterExecute(runnable, throwable);
+ synchronized (runningWorkers) {
+ runningWorkers.remove(((JobWorker) runnable).getInfo());
+ }
+ JobWorker worker = ((JobWorker) runnable);
+ addStatusToHistory(worker);
+ }
+
+ private void addStatusToHistory(JobWorker worker) {
+ synchronized (workersHistory) {
+ if (!workersHistory.offer(worker)) {
+ workersHistory.poll();
+ workersHistory.add(worker);
+ }
+ }
+ }
+
+ /**
+ * Find the Job Worker Thread
+ * @param jobId
+ * @return
+ */
+ public JobWorker findWorker(String jobId) {
+ synchronized (runningWorkers) {
+ for (JobWorker worker : runningWorkers) {
+ if (StringUtils.equals(worker.getInfo().getId(), jobId)) {
+ return worker;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Gives the Job history
+ * @return
+ */
+ public Collection<JobInfo> getJobHistory() {
+ return getJobsInfo(workersHistory);
+ }
+
+ /**
+ * Gives the list of currently running jobs
+ * @return
+ */
+ public Collection<JobInfo> getJobRunning() {
+ return getJobsInfo(runningWorkers);
+ }
+
+ /**
+ * Gives all jobs(currently running and completed)
+ * @return
+ */
+ @SuppressWarnings("unchecked")
+ public Collection<JobInfo> getAllJobs() {
+ return CollectionUtils.union(getJobRunning(), getJobHistory());
+ }
+
+ private Collection<JobInfo> getJobsInfo(Collection<JobWorker> workers) {
+ List<JobInfo> jobsInfo = Lists.newLinkedList();
+ for (JobWorker worker : workers) {
+ jobsInfo.add(worker.getInfo());
+ }
+ return jobsInfo;
+ }
+
+
+ public JobInfo getInfo(String jobId) {
+ for (JobInfo jobInfo : getAllJobs()) {
+ if (StringUtils.equals(jobId, jobInfo.getId())) {
+ return jobInfo;
+ }
+ }
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java
new file mode 100644
index 0000000..ce5d120
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.impl;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.ws.rs.WebApplicationException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Reader;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.service.NutchReader;
+
+/**
+ * Enables reading a sequence file and methods provide different
+ * ways to read the file.
+ * @author Sujen Shah
+ *
+ */
+public class SequenceReader implements NutchReader {
+
+ @Override
+ public List<List<String>> read(String path) throws FileNotFoundException {
+ // TODO Auto-generated method stub
+ List<List<String>> rows=new ArrayList<List<String>>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try {
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key =
+ (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value =
+ (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
+
+ while(reader.next(key, value)) {
+ List<String> row =new ArrayList<String>();
+ row.add(key.toString());
+ row.add(value.toString());
+ rows.add(row);
+ }
+ reader.close();
+ }catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+ return rows;
+ }
+
+ @Override
+ public List<List<String>> head(String path, int nrows)
+ throws FileNotFoundException {
+ // TODO Auto-generated method stub
+
+ List<List<String>> rows=new ArrayList<List<String>>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try {
+
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key =
+ (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value =
+ (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ int i = 0;
+ while(reader.next(key, value) && i<nrows) {
+ List<String> row =new ArrayList<String>();
+ row.add(key.toString());
+ row.add(value.toString());
+ rows.add(row);
+ i++;
+ }
+ reader.close();
+ } catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+ return rows;
+ }
+
+ @Override
+ public List<List<String>> slice(String path, int start, int end)
+ throws FileNotFoundException {
+ List<List<String>> rows=new ArrayList<List<String>>();
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ try {
+
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key =
+ (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value =
+ (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ int i = 0;
+
+ for(;i<start && reader.next(key, value);i++){} // increment to read start position
+ while(reader.next(key, value) && i<end) {
+ List<String> row =new ArrayList<String>();
+ row.add(key.toString());
+ row.add(value.toString());
+ rows.add(row);
+ i++;
+ }
+ reader.close();
+ } catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+ return rows;
+ }
+
+ @Override
+ public int count(String path) throws FileNotFoundException {
+ Path file = new Path(path);
+ SequenceFile.Reader reader;
+ int i = 0;
+ try {
+ reader = new SequenceFile.Reader(conf, Reader.file(file));
+ Writable key =
+ (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ Writable value =
+ (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
+
+ while(reader.next(key, value)) {
+ i++;
+ }
+ reader.close();
+ } catch(FileNotFoundException fne){
+ throw new FileNotFoundException();
+ }catch (IOException e) {
+ // TODO Auto-generated catch block
+ LOG.error("Error occurred while reading file {} : ", file,
+ StringUtils.stringifyException(e));
+ throw new WebApplicationException();
+ }
+ return i;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java
new file mode 100644
index 0000000..5d069dc
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.request;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class DbQuery {
+
+ private String confId;
+ private String type;
+ private Map<String, String> args = new HashMap<String, String>();
+ private String crawlId;
+
+ public String getConfId() {
+ return confId;
+ }
+ public void setConfId(String confId) {
+ this.confId = confId;
+ }
+ public Map<String, String> getArgs() {
+ return args;
+ }
+ public void setArgs(Map<String, String> args) {
+ this.args = args;
+ }
+ public String getType() {
+ return type;
+ }
+ public void setType(String type) {
+ this.type = type;
+ }
+ public String getCrawlId() {
+ return crawlId;
+ }
+ public void setCrawlId(String crawlId) {
+ this.crawlId = crawlId;
+ }
+
+
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java
new file mode 100644
index 0000000..af6c945
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.service.model.request;
+
+import java.util.Map;
+
+import org.apache.nutch.service.JobManager.JobType;
+
+
+public class JobConfig {
+ private String crawlId;
+ private JobType type;
+ private String confId;
+ private String jobClassName;
+ private Map<String, Object> args;
+
+ public String getCrawlId() {
+ return crawlId;
+ }
+
+ public void setCrawlId(String crawlId) {
+ this.crawlId = crawlId;
+ }
+
+ public JobType getType() {
+ return type;
+ }
+
+ public void setType(JobType type) {
+ this.type = type;
+ }
+
+ public String getConfId() {
+ return confId;
+ }
+
+ public void setConfId(String confId) {
+ this.confId = confId;
+ }
+
+ public Map<String, Object> getArgs() {
+ return args;
+ }
+
+ public void setArgs(Map<String, Object> args) {
+ this.args = args;
+ }
+
+ public String getJobClassName() {
+ return jobClassName;
+ }
+
+ public void setJobClassName(String jobClass) {
+ this.jobClassName = jobClass;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java
new file mode 100644
index 0000000..ffa9e3e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.request;
+
+import java.util.Map;
+
+import java.util.Collections;
+
+public class NutchConfig {
+ private String configId;
+ private boolean force = false;
+ private Map<String, String> params = Collections.emptyMap();
+
+ public Map<String, String> getParams() {
+ return params;
+ }
+
+ public void setParams(Map<String, String> params) {
+ this.params = params;
+ }
+
+ public String getConfigId() {
+ return configId;
+ }
+
+ public void setConfigId(String configId) {
+ this.configId = configId;
+ }
+
+ public boolean isForce() {
+ return force;
+ }
+
+ public void setForce(boolean force) {
+ this.force = force;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java
new file mode 100644
index 0000000..81d7440
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.request;
+
+public class ReaderConfig {
+
+ private String path;
+
+ public String getPath() {
+ return path;
+ }
+
+ public void setPath(String path) {
+ this.path = path;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java
new file mode 100644
index 0000000..bbb3e2a
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java
@@ -0,0 +1,93 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.service.model.request;
+
+import java.io.Serializable;
+import java.util.Collection;
+
+import org.apache.commons.collections4.CollectionUtils;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonManagedReference;
+
+public class SeedList implements Serializable {
+
+ private Long id;
+
+ private String name;
+
+ @JsonManagedReference
+ private Collection<SeedUrl> seedUrls;
+
+ public Long getId() {
+ return id;
+ }
+
+ public void setId(Long id) {
+ this.id = id;
+ }
+
+ public Collection<SeedUrl> getSeedUrls() {
+ return seedUrls;
+ }
+
+ public void setSeedUrls(Collection<SeedUrl> seedUrls) {
+ this.seedUrls = seedUrls;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ @JsonIgnore
+ public int getSeedUrlsCount() {
+ if (CollectionUtils.isEmpty(seedUrls)) {
+ return 0;
+ }
+ return seedUrls.size();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((id == null) ? 0 : id.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SeedList other = (SeedList) obj;
+ if (id == null) {
+ if (other.id != null)
+ return false;
+ } else if (!id.equals(other.id))
+ return false;
+ return true;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java
new file mode 100644
index 0000000..b1c93a8
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.service.model.request;
+
+import java.io.Serializable;
+
+import com.fasterxml.jackson.annotation.JsonBackReference;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+public class SeedUrl implements Serializable {
+
+ private Long id;
+
+ @JsonBackReference
+ private SeedList seedList;
+
+ private String url;
+
+ public SeedUrl() {}
+
+ public SeedUrl(String url) {
+ this.url = url;
+ }
+
+ public Long getId() {
+ return id;
+ }
+
+ public void setId(Long id) {
+ this.id = id;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ @JsonIgnore
+ public SeedList getSeedList() {
+ return seedList;
+ }
+
+ @JsonIgnore
+ public void setSeedList(SeedList seedList) {
+ this.seedList = seedList;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((id == null) ? 0 : id.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SeedUrl other = (SeedUrl) obj;
+ if (id == null) {
+ if (other.id != null)
+ return false;
+ } else if (!id.equals(other.id))
+ return false;
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java
new file mode 100644
index 0000000..267b50b
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.response;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.nutch.parse.Outlink;
+
+public class FetchNodeDbInfo {
+
+ private String url;
+ private int status;
+ private int numOfOutlinks;
+ private List<ChildNode> children = new ArrayList<ChildNode>();
+
+
+ public String getUrl() {
+ return url;
+ }
+
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+
+ public int getStatus() {
+ return status;
+ }
+
+
+ public void setStatus(int status) {
+ this.status = status;
+ }
+
+
+ public int getNumOfOutlinks() {
+ return numOfOutlinks;
+ }
+
+
+ public void setNumOfOutlinks(int numOfOutlinks) {
+ this.numOfOutlinks = numOfOutlinks;
+ }
+
+ public void setChildNodes(Outlink[] links){
+ ChildNode childNode;
+ for(Outlink outlink: links){
+ childNode = new ChildNode(outlink.getToUrl(), outlink.getAnchor());
+ children.add(childNode);
+ }
+ }
+
+
+ private class ChildNode{
+ private String childUrl;
+ private String anchorText;
+
+ public ChildNode(String childUrl, String anchorText){
+ this.childUrl = childUrl;
+ this.anchorText = anchorText;
+ }
+
+ public String getAnchorText() {
+ return anchorText;
+ }
+ public void setAnchorText(String anchorText) {
+ this.anchorText = anchorText;
+ }
+ public String getChildUrl() {
+ return childUrl;
+ }
+ public void setChildUrl(String childUrl) {
+ this.childUrl = childUrl;
+ }
+ }
+
+
+ public List<ChildNode> getChildren() {
+ return children;
+ }
+
+
+ public void setChildren(List<ChildNode> children) {
+ this.children = children;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java
new file mode 100644
index 0000000..c2e185d
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.response;
+
+import java.util.Map;
+
+import org.apache.nutch.service.JobManager.JobType;
+import org.apache.nutch.service.model.request.JobConfig;
+
+/**
+ * This is the response object containing Job information
+ *
+ *
+ */
+public class JobInfo {
+
+ public static enum State {
+ IDLE, RUNNING, FINISHED, FAILED, KILLED, STOPPING, KILLING, ANY
+ };
+
+ private String id;
+ private JobType type;
+ private String confId;
+ private Map<String, Object> args;
+ private Map<String, Object> result;
+ private State state;
+ private String msg;
+ private String crawlId;
+
+ public JobInfo(String generateId, JobConfig jobConfig, State state,
+ String msg) {
+ this.id = generateId;
+ this.type = jobConfig.getType();
+ this.confId = jobConfig.getConfId();
+ this.crawlId = jobConfig.getCrawlId();
+ this.args = jobConfig.getArgs();
+ this.msg = msg;
+ this.state = state;
+ }
+ public String getId() {
+ return id;
+ }
+ public void setId(String id) {
+ this.id = id;
+ }
+ public JobType getType() {
+ return type;
+ }
+ public void setType(JobType type) {
+ this.type = type;
+ }
+ public String getConfId() {
+ return confId;
+ }
+ public void setConfId(String confId) {
+ this.confId = confId;
+ }
+ public Map<String, Object> getArgs() {
+ return args;
+ }
+ public void setArgs(Map<String, Object> args) {
+ this.args = args;
+ }
+ public Map<String, Object> getResult() {
+ return result;
+ }
+ public void setResult(Map<String, Object> result) {
+ this.result = result;
+ }
+ public State getState() {
+ return state;
+ }
+ public void setState(State state) {
+ this.state = state;
+ }
+ public String getMsg() {
+ return msg;
+ }
+ public void setMsg(String msg) {
+ this.msg = msg;
+ }
+ public String getCrawlId() {
+ return crawlId;
+ }
+ public void setCrawlId(String crawlId) {
+ this.crawlId = crawlId;
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java
new file mode 100644
index 0000000..f8867e6
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.model.response;
+
+import java.util.Collection;
+import java.util.Date;
+import java.util.Set;
+
+public class NutchServerInfo {
+
+ private Date startDate;
+ private Set<String> configuration;
+ private Collection<JobInfo> jobs;
+ private Collection<JobInfo> runningJobs;
+ public Date getStartDate() {
+ return startDate;
+ }
+ public void setStartDate(Date startDate) {
+ this.startDate = startDate;
+ }
+ public Set<String> getConfiguration() {
+ return configuration;
+ }
+ public void setConfiguration(Set<String> configuration) {
+ this.configuration = configuration;
+ }
+ public Collection<JobInfo> getJobs() {
+ return jobs;
+ }
+ public void setJobs(Collection<JobInfo> jobs) {
+ this.jobs = jobs;
+ }
+ public Collection<JobInfo> getRunningJobs() {
+ return runningJobs;
+ }
+ public void setRunningJobs(Collection<JobInfo> runningJobs) {
+ this.runningJobs = runningJobs;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java
new file mode 100644
index 0000000..ebe4138
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.nutch.service.ConfManager;
+import org.apache.nutch.service.JobManager;
+import org.apache.nutch.service.NutchServer;
+
+@Produces(MediaType.APPLICATION_JSON)
+public abstract class AbstractResource {
+
+ protected JobManager jobManager;
+ protected ConfManager configManager;
+ protected NutchServer server;
+
+ public AbstractResource() {
+ server = NutchServer.getInstance();
+ configManager = NutchServer.getInstance().getConfManager();
+ jobManager = NutchServer.getInstance().getJobManager();
+ }
+
+ protected void throwBadRequestException(String message) {
+ throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).build());
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java
new file mode 100644
index 0000000..3f0189e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+import java.util.Date;
+
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.QueryParam;
+
+import org.apache.nutch.service.model.response.JobInfo.State;
+import org.apache.nutch.service.model.response.NutchServerInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Path(value="/admin")
+public class AdminResource extends AbstractResource{
+
+ private final int DELAY_SEC = 1;
+ private static final Logger LOG = LoggerFactory
+ .getLogger(AdminResource.class);
+
+ /**
+ * To get the status of the Nutch Server
+ * @return
+ */
+ @GET
+ @Path(value="/")
+ public NutchServerInfo getServerStatus(){
+ NutchServerInfo serverInfo = new NutchServerInfo();
+ serverInfo.setConfiguration(configManager.list());
+ serverInfo.setStartDate(new Date(server.getStarted()));
+ serverInfo.setJobs(jobManager.list(null, State.ANY));
+ serverInfo.setRunningJobs(jobManager.list(null, State.RUNNING));
+ return serverInfo;
+ }
+
+ /**
+ * Stop the Nutch server
+ * @param force If set to true, it will kill any running jobs
+ * @return
+ */
+ @GET
+ @Path(value="/stop")
+ public String stopServer(@QueryParam("force") boolean force){
+ if(!server.canStop(force)){
+ return "Jobs still running -- Cannot stop server now" ;
+ }
+ scheduleServerStop();
+ return "Stopping in server on port " + server.getPort();
+ }
+
+ private void scheduleServerStop() {
+ LOG.info("Shutting down server in {} sec", DELAY_SEC);
+ Thread thread = new Thread() {
+ public void run() {
+ try {
+ Thread.sleep(DELAY_SEC*1000);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ server.stop();
+ LOG.info("Service stopped.");
+ }
+ };
+ thread.setDaemon(true);
+ thread.start();
+ LOG.info("Service shutting down...");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java
new file mode 100644
index 0000000..6afd621
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.service.resources;
+
+
+import java.util.Map;
+import java.util.Set;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.nutch.service.model.request.NutchConfig;
+import com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
+@Path("/config")
+public class ConfigResource extends AbstractResource{
+
+ public static final String DEFAULT = "default";
+
+ /**
+ * Returns a list of all configurations created.
+ * @return List of configurations
+ */
+ @GET
+ @Path("/")
+ @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT })
+ public Set<String> getConfigs() {
+ return configManager.list();
+ }
+
+ /**
+ * Get configuration properties
+ * @param configId The configuration ID to fetch
+ * @return HashMap of the properties set within the given configId
+ */
+ @GET
+ @Path("/{configId}")
+ @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT })
+ public Map<String, String> getConfig(@PathParam("configId") String configId) {
+ return configManager.getAsMap(configId);
+ }
+
+ /**
+ * Get property
+ * @param configId The ID of the configuration
+ * @param propertyId The name(key) of the property
+ * @return value of the specified property in the provided configId.
+ */
+ @GET
+ @Path("/{configId}/{propertyId}")
+ @Produces(MediaType.TEXT_PLAIN)
+ @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT })
+ public String getProperty(@PathParam("configId") String configId,
+ @PathParam("propertyId") String propertyId) {
+ return configManager.getAsMap(configId).get(propertyId);
+ }
+
+ /**
+ * Removes the configuration from the list of known configurations.
+ * @param configId The ID of the configuration to delete
+ */
+ @DELETE
+ @Path("/{configId}")
+ public void deleteConfig(@PathParam("configId") String configId) {
+ configManager.delete(configId);
+ }
+
+ /**
+ * Create new configuration.
+ * @param newConfig
+ * @return The name of the new configuration created
+ */
+ @POST
+ @Path("/create")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.TEXT_PLAIN)
+ public Response createConfig(NutchConfig newConfig) {
+ if (newConfig == null) {
+ return Response.status(400)
+ .entity("Nutch configuration cannot be empty!").build();
+ }
+ try{
+ configManager.create(newConfig);
+ }catch(Exception e){
+ return Response.status(400)
+ .entity(e.getMessage()).build();
+ }
+ return Response.ok(newConfig.getConfigId()).build();
+ }
+
+ /**
+ * Adds/Updates a particular property value in the configuration
+ * @param confId Configuration ID whose property needs to be updated. Make sure that the given
+ * confId exists to prevent errors.
+ * @param propertyKey Name of the property
+ * @param value Value as a simple text
+ * @return Success code
+ */
+ @PUT
+ @Path("/{configId}/{propertyId}")
+ @Consumes(MediaType.TEXT_PLAIN)
+ public Response updateProperty(@PathParam("configId")String confId,
+ @PathParam("propertyId")String propertyKey, String value) {
+ try{
+ configManager.setProperty(confId, propertyKey, value);
+ }catch(Exception e) {
+ return Response.status(400).entity(e.getMessage()).build();
+ }
+ return Response.ok().build();
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java
new file mode 100644
index 0000000..2672fcc
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.crawl.CrawlDbReader;
+import org.apache.nutch.fetcher.FetchNode;
+import org.apache.nutch.fetcher.FetchNodeDb;
+import org.apache.nutch.service.model.request.DbQuery;
+import org.apache.nutch.service.model.response.FetchNodeDbInfo;
+
+@Path(value = "/db")
+public class DbResource extends AbstractResource {
+
+ @POST
+ @Path(value = "/crawldb")
+ @Consumes(MediaType.APPLICATION_JSON)
+ public Response readdb(DbQuery dbQuery){
+ if(dbQuery == null)
+ return Response.status(Status.BAD_REQUEST).build();
+
+ Configuration conf = configManager.get(dbQuery.getConfId());
+ if(conf == null){
+ conf = configManager.get(ConfigResource.DEFAULT);
+ }
+ if(dbQuery.getCrawlId() == null || dbQuery.getType() == null){
+ return Response.status(Status.BAD_REQUEST).build();
+ }
+ String type = dbQuery.getType();
+
+ if(type.equalsIgnoreCase("stats")){
+ return crawlDbStats(conf, dbQuery.getArgs(), dbQuery.getCrawlId());
+ }
+ if(type.equalsIgnoreCase("dump")){
+ return crawlDbDump(conf, dbQuery.getArgs(), dbQuery.getCrawlId());
+ }
+ if(type.equalsIgnoreCase("topN")){
+ return crawlDbTopN(conf, dbQuery.getArgs(), dbQuery.getCrawlId());
+ }
+ if(type.equalsIgnoreCase("url")){
+ return crawlDbUrl(conf, dbQuery.getArgs(), dbQuery.getCrawlId());
+ }
+ return null;
+
+ }
+
+ @GET
+ @Path(value="/fetchdb")
+ public List<FetchNodeDbInfo> fetchDb(@DefaultValue("0")@QueryParam("to")int to, @DefaultValue("0")@QueryParam("from")int from){
+ List<FetchNodeDbInfo> listOfFetchedNodes = new ArrayList<FetchNodeDbInfo>();
+ Map<Integer, FetchNode> fetchNodedbMap = FetchNodeDb.getInstance().getFetchNodeDb();
+
+ if(to ==0 || to>fetchNodedbMap.size()){
+ to = fetchNodedbMap.size();
+ }
+ for(int i=from;i<=to;i++){
+ if(!fetchNodedbMap.containsKey(i)){
+ continue;
+ }
+ FetchNode node = fetchNodedbMap.get(i);
+ FetchNodeDbInfo fdbInfo = new FetchNodeDbInfo();
+ fdbInfo.setUrl(node.getUrl().toString());
+ fdbInfo.setStatus(node.getStatus());
+ fdbInfo.setNumOfOutlinks(node.getOutlinks().length);
+ fdbInfo.setChildNodes(node.getOutlinks());
+ listOfFetchedNodes.add(fdbInfo);
+ }
+
+ return listOfFetchedNodes;
+ }
+ @SuppressWarnings("resource")
+ private Response crawlDbStats(Configuration conf, Map<String, String> args, String crawlId){
+ CrawlDbReader dbr = new CrawlDbReader();
+ try{
+ return Response.ok(dbr.query(args, conf, "stats", crawlId)).build();
+ }catch(Exception e){
+ e.printStackTrace();
+ return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build();
+ }
+ }
+
+ @Produces(MediaType.APPLICATION_OCTET_STREAM)
+ private Response crawlDbDump(Configuration conf, Map<String, String> args, String crawlId){
+ CrawlDbReader dbr = new CrawlDbReader();
+ try{
+ return Response.ok(dbr.query(args, conf, "dump", crawlId), MediaType.APPLICATION_OCTET_STREAM).build();
+ }catch(Exception e){
+ e.printStackTrace();
+ return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build();
+ }
+ }
+
+ @Produces(MediaType.APPLICATION_OCTET_STREAM)
+ private Response crawlDbTopN(Configuration conf, Map<String, String> args, String crawlId) {
+ CrawlDbReader dbr = new CrawlDbReader();
+ try{
+ return Response.ok(dbr.query(args, conf, "topN", crawlId), MediaType.APPLICATION_OCTET_STREAM).build();
+ }catch(Exception e){
+ e.printStackTrace();
+ return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build();
+ }
+ }
+
+ private Response crawlDbUrl(Configuration conf, Map<String, String> args, String crawlId){
+ CrawlDbReader dbr = new CrawlDbReader();
+ try{
+ return Response.ok(dbr.query(args, conf, "url", crawlId)).build();
+ }catch(Exception e){
+ e.printStackTrace();
+ return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java
new file mode 100644
index 0000000..b142d73
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+import java.util.Collection;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.MediaType;
+
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures;
+import org.apache.nutch.service.model.request.JobConfig;
+import org.apache.nutch.service.model.response.JobInfo;
+import org.apache.nutch.service.model.response.JobInfo.State;
+
+@Path(value = "/job")
+public class JobResource extends AbstractResource {
+
+ /**
+ * Get job history
+ * @param crawlId
+ * @return A nested JSON object of all the jobs created
+ */
+ @GET
+ @Path(value = "/")
+ @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT })
+ public Collection<JobInfo> getJobs(@QueryParam("crawlId") String crawlId) {
+ return jobManager.list(crawlId, State.ANY);
+ }
+
+ /**
+ * Get job info
+ * @param id Job ID
+ * @param crawlId Crawl ID
+ * @return A JSON object of job parameters
+ */
+ @GET
+ @Path(value = "/{id}")
+ @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT })
+ public JobInfo getInfo(@PathParam("id") String id,
+ @QueryParam("crawlId") String crawlId) {
+ return jobManager.get(crawlId, id);
+ }
+
+ /**
+ * Stop Job
+ * @param id Job ID
+ * @param crawlId
+ * @return
+ */
+ @GET
+ @Path(value = "/{id}/stop")
+ public boolean stop(@PathParam("id") String id,
+ @QueryParam("crawlId") String crawlId) {
+ return jobManager.stop(crawlId, id);
+ }
+
+
+ @GET
+ @Path(value = "/{id}/abort")
+ public boolean abort(@PathParam("id") String id,
+ @QueryParam("crawlId") String crawlId) {
+ return jobManager.abort(crawlId, id);
+ }
+
+ /**
+ * Create a new job
+ * @param config The parameters of the job to create
+ * @return A JSON object of the job created with its details
+ */
+ @POST
+ @Path(value = "/create")
+ @Consumes(MediaType.APPLICATION_JSON)
+ public JobInfo create(JobConfig config) {
+ if (config == null) {
+ throwBadRequestException("Job configuration is required!");
+ }
+ return jobManager.create(config);
+ }
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java
new file mode 100644
index 0000000..030999e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+import java.util.HashMap;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.nutch.service.NutchReader;
+import org.apache.nutch.service.impl.LinkReader;
+import org.apache.nutch.service.impl.NodeReader;
+import org.apache.nutch.service.impl.SequenceReader;
+import org.apache.nutch.service.model.request.ReaderConfig;
+
+/**
+ * The Reader endpoint enables a user to read sequence files,
+ * nodes and links from the Nutch webgraph.
+ * @author Sujen Shah
+ *
+ */
+@Path("/reader")
+public class ReaderResouce {
+
+ /**
+ * Read a sequence file
+ * @param readerConf
+ * @param nrows Number of rows to read. If not specified all rows will be read
+ * @param start Specify a starting line number to read the file from
+ * @param end The line number to read the file till
+ * @param count Boolean value. If true, this endpoint will return the number of lines in the line
+ * @return Appropriate HTTP response based on the query
+ */
+ @Path("/sequence/read")
+ @POST
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response seqRead(ReaderConfig readerConf,
+ @DefaultValue("-1")@QueryParam("nrows") int nrows,
+ @DefaultValue("-1")@QueryParam("start") int start,
+ @QueryParam("end")int end, @QueryParam("count") boolean count) {
+
+ NutchReader reader = new SequenceReader();
+ String path = readerConf.getPath();
+ return performRead(reader, path, nrows, start, end, count);
+ }
+
+ /**
+ * Get Link Reader response schema
+ * @return JSON object specifying the schema of the responses returned by the Link Reader
+ */
+ @Path("/link")
+ @GET
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response linkRead() {
+ HashMap<String, String> schema = new HashMap<>();
+ schema.put("key_url","string");
+ schema.put("timestamp", "int");
+ schema.put("score","float");
+ schema.put("anchor","string");
+ schema.put("linktype","string");
+ schema.put("url","string");
+ return Response.ok(schema).type(MediaType.APPLICATION_JSON).build();
+ }
+
+ /**
+ * Read link object
+ * @param readerConf
+ * @param nrows
+ * @param start
+ * @param end
+ * @param count
+ * @return
+ */
+ @Path("/link/read")
+ @POST
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response linkRead(ReaderConfig readerConf,
+ @DefaultValue("-1")@QueryParam("nrows") int nrows,
+ @DefaultValue("-1")@QueryParam("start") int start,
+ @QueryParam("end") int end, @QueryParam("count") boolean count) {
+
+ NutchReader reader = new LinkReader();
+ String path = readerConf.getPath();
+ return performRead(reader, path, nrows, start, end, count);
+ }
+
+ /**
+ * Get schema of the Node object
+ * @return
+ */
+ @Path("/node")
+ @GET
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response nodeRead() {
+ HashMap<String, String> schema = new HashMap<>();
+ schema.put("key_url","string");
+ schema.put("num_inlinks", "int");
+ schema.put("num_outlinks","int");
+ schema.put("inlink_score","float");
+ schema.put("outlink_score","float");
+ schema.put("metadata","string");
+ return Response.ok(schema).type(MediaType.APPLICATION_JSON).build();
+ }
+
+
+ /**
+ * Read Node object as stored in the Nutch Webgraph
+ * @param readerConf
+ * @param nrows
+ * @param start
+ * @param end
+ * @param count
+ * @return
+ */
+ @Path("/node/read")
+ @POST
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response nodeRead(ReaderConfig readerConf,
+ @DefaultValue("-1")@QueryParam("nrows") int nrows,
+ @DefaultValue("-1")@QueryParam("start") int start,
+ @QueryParam("end") int end, @QueryParam("count") boolean count) {
+
+ NutchReader reader = new NodeReader();
+ String path = readerConf.getPath();
+ return performRead(reader, path, nrows, start, end, count);
+ }
+
+
+ private Response performRead(NutchReader reader, String path,
+ int nrows, int start, int end, boolean count) {
+ Object result;
+ try{
+ if(count){
+ result = reader.count(path);
+ return Response.ok(result).type(MediaType.TEXT_PLAIN).build();
+ }
+ else if(start>-1 && end>0) {
+ result = reader.slice(path, start, end);
+ }
+ else if(nrows>-1) {
+ result = reader.head(path, nrows);
+ }
+ else {
+ result = reader.read(path);
+ }
+ return Response.ok(result).type(MediaType.APPLICATION_JSON).build();
+ }catch(Exception e){
+ return Response.status(Status.BAD_REQUEST).entity("File not found").build();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java
new file mode 100644
index 0000000..5261139
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.service.resources;
+
+import static javax.ws.rs.core.Response.status;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collection;
+
+import javax.ws.rs.Consumes;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.nutch.service.model.request.SeedList;
+import org.apache.nutch.service.model.request.SeedUrl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Files;
+
+@Path("/seed")
+public class SeedResource extends AbstractResource {
+ private static final Logger log = LoggerFactory
+ .getLogger(AdminResource.class);
+
+ /**
+ * Method creates seed list file and returns temporary directory path
+ * @param seedList
+ * @return
+ */
+ @POST
+ @Path("/create")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.TEXT_PLAIN)
+ public Response createSeedFile(SeedList seedList) {
+ if (seedList == null) {
+ return Response.status(Status.BAD_REQUEST)
+ .entity("Seed list cannot be empty!").build();
+ }
+ File seedFile = createSeedFile();
+ BufferedWriter writer = getWriter(seedFile);
+
+ Collection<SeedUrl> seedUrls = seedList.getSeedUrls();
+ if (CollectionUtils.isNotEmpty(seedUrls)) {
+ for (SeedUrl seedUrl : seedUrls) {
+ writeUrl(writer, seedUrl);
+ }
+ }
+
+ return Response.ok().entity(seedFile.getParent()).build();
+ }
+
+ private void writeUrl(BufferedWriter writer, SeedUrl seedUrl) {
+ try {
+ writer.write(seedUrl.getUrl());
+ writer.newLine();
+ writer.flush();
+ } catch (IOException e) {
+ throw handleException(e);
+ }
+ }
+
+ private BufferedWriter getWriter(File seedFile) {
+ try {
+ return new BufferedWriter(new FileWriter(seedFile));
+ } catch (FileNotFoundException e) {
+ throw handleException(e);
+ } catch (IOException e) {
+ throw handleException(e);
+ }
+ }
+
+ private File createSeedFile() {
+ try {
+ return File.createTempFile("seed", ".txt", Files.createTempDir());
+ } catch (IOException e) {
+ throw handleException(e);
+ }
+ }
+
+ private RuntimeException handleException(Exception e) {
+ log.error("Cannot create seed file!", e);
+ return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR)
+ .entity("Cannot create seed file!").build());
+ }
+
+}