You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by su...@apache.org on 2016/10/25 16:02:22 UTC
nutch git commit: Fix for NUTCH-2327: Seeds injected in REST must be
ingested into HDFS, this closes #155
Repository: nutch
Updated Branches:
refs/heads/master 9092e233f -> 24cc2aa9c
Fix for NUTCH-2327: Seeds injected in REST must be ingested into HDFS, this closes #155
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/24cc2aa9
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/24cc2aa9
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/24cc2aa9
Branch: refs/heads/master
Commit: 24cc2aa9c68fa356e4e926b6bf86bac99d52e38c
Parents: 9092e23
Author: Sujen Shah <su...@gmail.com>
Authored: Tue Oct 18 21:36:27 2016 -0700
Committer: Sujen Shah <su...@apache.org>
Committed: Tue Oct 25 09:02:00 2016 -0700
----------------------------------------------------------------------
.../nutch/service/resources/SeedResource.java | 75 +++++++-------------
1 file changed, 27 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nutch/blob/24cc2aa9/src/java/org/apache/nutch/service/resources/SeedResource.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/resources/SeedResource.java b/src/java/org/apache/nutch/service/resources/SeedResource.java
index 638af33..61a0526 100644
--- a/src/java/org/apache/nutch/service/resources/SeedResource.java
+++ b/src/java/org/apache/nutch/service/resources/SeedResource.java
@@ -16,13 +16,7 @@
*/
package org.apache.nutch.service.resources;
-import static javax.ws.rs.core.Response.status;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileWriter;
-import java.io.IOException;
+import java.io.OutputStream;
import java.util.Collection;
import java.util.Map;
@@ -31,19 +25,19 @@ import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.nutch.service.NutchServer;
import org.apache.nutch.service.model.request.SeedList;
import org.apache.nutch.service.model.request.SeedUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.io.Files;
@Path("/seed")
public class SeedResource extends AbstractResource {
@@ -77,58 +71,43 @@ public class SeedResource extends AbstractResource {
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.TEXT_PLAIN)
public Response createSeedFile(SeedList seedList) {
+ try {
if (seedList == null) {
return Response.status(Status.BAD_REQUEST)
.entity("Seed list cannot be empty!").build();
}
- File seedFile = createSeedFile();
- BufferedWriter writer = getWriter(seedFile);
-
Collection<SeedUrl> seedUrls = seedList.getSeedUrls();
- if (CollectionUtils.isNotEmpty(seedUrls)) {
- for (SeedUrl seedUrl : seedUrls) {
- writeUrl(writer, seedUrl);
- }
- }
- String seedFilePath = seedFile.getParent();
+
+ String seedFilePath = writeToSeedFile(seedUrls);
seedList.setSeedFilePath(seedFilePath);
NutchServer.getInstance().getSeedManager().
setSeedList(seedList.getName(), seedList);
return Response.ok().entity(seedFilePath).build();
- }
-
- private void writeUrl(BufferedWriter writer, SeedUrl seedUrl) {
- try {
- writer.write(seedUrl.getUrl());
- writer.newLine();
- writer.flush();
- } catch (IOException e) {
- throw handleException(e);
+ } catch (Exception e) {
+ log.warn("Error while creating seed : {}", e.getMessage());
}
+ return Response.serverError().build();
}
- private BufferedWriter getWriter(File seedFile) {
- try {
- return new BufferedWriter(new FileWriter(seedFile));
- } catch (FileNotFoundException e) {
- throw handleException(e);
- } catch (IOException e) {
- throw handleException(e);
+ private String writeToSeedFile(Collection<SeedUrl> seedUrls) throws Exception {
+ String seedFilePath = "seedFiles/seed-" + System.currentTimeMillis();
+ org.apache.hadoop.fs.Path seedFolder = new org.apache.hadoop.fs.Path(seedFilePath);
+ FileSystem fs = FileSystem.get(new Configuration());
+ if(!fs.exists(seedFolder)) {
+ if(!fs.mkdirs(seedFolder)) {
+ throw new Exception("Could not create seed folder at : " + seedFolder);
+ }
}
- }
-
- private File createSeedFile() {
- try {
- return File.createTempFile("seed", ".txt", Files.createTempDir());
- } catch (IOException e) {
- throw handleException(e);
+ String filename = seedFilePath + System.getProperty("file.separator") + "urls";
+ org.apache.hadoop.fs.Path seedPath = new org.apache.hadoop.fs.Path(filename);
+ OutputStream os = fs.create(seedPath);
+ if (CollectionUtils.isNotEmpty(seedUrls)) {
+ for (SeedUrl seedUrl : seedUrls) {
+ os.write(seedUrl.getUrl().getBytes());
+ os.write("\n".getBytes());
+ }
}
+ os.close();
+ return seedPath.getParent().toString();
}
-
- private RuntimeException handleException(Exception e) {
- log.error("Cannot create seed file!", e);
- return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR)
- .entity("Cannot create seed file!").build());
- }
-
}