You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2016/05/14 16:10:30 UTC
[06/41] jena git commit: Fix line-ending changes.
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java
index 912cae1..3ccb34c 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractQuadSplitWithNodesTests.java
@@ -1,30 +1,30 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.sparql.core.Quad ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.sparql.core.Quad ;
/**
* Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper}
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java
index 91d671e..5d21bde 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitToNodesTests.java
@@ -1,29 +1,29 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
/**
* Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper}
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java
index 327a821..4648a83 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractTripleSplitWithNodesTests.java
@@ -1,29 +1,29 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.split.AbstractNodeTupleSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
/**
* Abstract tests for {@link AbstractNodeTupleSplitToNodesMapper}
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java
index 79f73de..61058c6 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapperTest.java
@@ -1,28 +1,28 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java
index b50cdbb..a171ffb 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapperTest.java
@@ -1,28 +1,28 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitWithNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.mapreduce.split.QuadSplitWithNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java
index 38b6c72..d91efca 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapperTest.java
@@ -1,28 +1,28 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java
index 9731f07..3b71f40 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapperTest.java
@@ -1,29 +1,29 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.split;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitWithNodesMapper;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitToNodesMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.split.TripleSplitWithNodesMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
index ad7b0f2..e6167c0 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapperTest.java
@@ -1,37 +1,37 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.transform;
import java.io.IOException;
-
+
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
-import org.apache.jena.hadoop.rdf.mapreduce.transform.QuadsToTriplesMapper;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.sparql.core.Quad ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import org.apache.jena.hadoop.rdf.mapreduce.transform.QuadsToTriplesMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.sparql.core.Quad ;
import org.junit.Test;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
index 6fb1279..e52ea6f 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapperTest.java
@@ -1,37 +1,37 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.transform;
import java.io.IOException;
-
+
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
-import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsBySubjectMapper;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.sparql.core.Quad ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsBySubjectMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.sparql.core.Quad ;
import org.junit.Test;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
index af9cf20..0a73623 100644
--- a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
+++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapperTest.java
@@ -1,37 +1,37 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.mapreduce.transform;
import java.io.IOException;
-
+
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
-import org.apache.jena.datatypes.xsd.XSDDatatype ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
-import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.sparql.core.Quad ;
+import org.apache.jena.datatypes.xsd.XSDDatatype ;
+import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.graph.Triple ;
+import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapperTests;
+import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.sparql.core.Quad ;
import org.junit.Test;
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
index 7425f42..c62c50b 100644
--- a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
+++ b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/RdfStats.java
@@ -1,421 +1,421 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.hadoop.rdf.stats;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-import javax.inject.Inject;
-
-import org.apache.commons.io.output.CloseShieldOutputStream;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory;
-
-import com.github.rvesse.airline.HelpOption;
-import com.github.rvesse.airline.SingleCommand;
-import com.github.rvesse.airline.annotations.Arguments;
-import com.github.rvesse.airline.annotations.Command;
-import com.github.rvesse.airline.annotations.Option;
-import com.github.rvesse.airline.annotations.restrictions.AllowedRawValues;
-import com.github.rvesse.airline.annotations.restrictions.Required;
-import com.github.rvesse.airline.help.Help;
-import com.github.rvesse.airline.io.colors.BasicColor;
-import com.github.rvesse.airline.io.output.AnsiBasicColorizedOutputStream;
-import com.github.rvesse.airline.io.output.ColorizedOutputStream;
-import com.github.rvesse.airline.model.CommandMetadata;
-import com.github.rvesse.airline.parser.errors.ParseException;
-
-/**
- * Entry point for the Hadoop job, handles launching all the relevant Hadoop
- * jobs
- */
-@Command(name = "hadoop jar PATH_TO_JAR org.apache.jena.hadoop.rdf.stats.RdfStats", description = "A command which computes statistics on RDF data using Hadoop")
-public class RdfStats implements Tool {
- //@formatter:off
- private static final String DATA_TYPE_TRIPLES = "triples",
- DATA_TYPE_QUADS = "quads",
- DATA_TYPE_MIXED = "mixed";
- //@formatter:on
-
- /**
- * Help option
- */
- @Inject
- public HelpOption<RdfStats> helpOption;
-
- /**
- * Gets/Sets whether all available statistics will be calculated
- */
- @Option(name = { "-a", "--all" }, description = "Requests that all available statistics be calculated")
- public boolean all = false;
-
- /**
- * Gets/Sets whether node usage counts will be calculated
- */
- @Option(name = { "-n", "--node-count" }, description = "Requests that node usage counts be calculated")
- public boolean nodeCount = false;
-
- /**
- * Gets/Sets whether characteristic sets will be calculated
- */
- @Option(name = { "-c",
- "--characteristic-sets" }, hidden = true, description = "Requests that characteristic sets be calculated (hidden as this has scalability issues)")
- public boolean characteristicSets = false;
-
- /**
- * Gets/Sets whether type counts will be calculated
- */
- @Option(name = { "-t", "--type-count" }, description = "Requests that rdf:type usage counts be calculated")
- public boolean typeCount = false;
-
- /**
- * Gets/Sets whether data type counts will be calculated
- */
- @Option(name = { "-d", "--data-types" }, description = "Requests that literal data type usage counts be calculated")
- public boolean dataTypeCount = false;
-
- /**
- * Gets/Sets whether namespace counts will be calculated
- */
- @Option(name = { "--namespaces" }, description = "Requests that namespace usage counts be calculated")
- public boolean namespaceCount = false;
-
- @Option(name = { "-g", "--graph-sizes" }, description = "Requests that the size of each named graph be counted")
- public boolean graphSize = false;
-
- /**
- * Gets/Sets the input data type used
- */
- @Option(name = {
- "--input-type" }, description = "Specifies whether the input data is a mixture of quads and triples, just quads or just triples. Using the most specific data type will yield the most accurate statistics")
- @AllowedRawValues(allowedValues = { DATA_TYPE_MIXED, DATA_TYPE_QUADS, DATA_TYPE_TRIPLES })
- public String inputType = DATA_TYPE_MIXED;
-
- /**
- * Gets/Sets the output path
- */
- @Option(name = { "-o", "--output" }, title = "OutputPath", description = "Sets the output path", arity = 1)
- @Required
- public String outputPath = null;
-
- /**
- * Gets/Sets the input path(s)
- */
- @Arguments(description = "Sets the input path(s)", title = "InputPath")
- @Required
- public List<String> inputPaths = new ArrayList<String>();
-
- private Configuration config;
-
- /**
- * Entry point method
- *
- * @param args
- * Arguments
- */
- public static void main(String[] args) {
- ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream(
- new CloseShieldOutputStream(System.err));
- try {
- // Run and exit with result code if no errors bubble up
- // Note that the exit code may still be a error code
- int res = ToolRunner.run(new Configuration(true), new RdfStats(), args);
- System.exit(res);
- } catch (Throwable e) {
- // This will only happen if Hadoop option parsing errors
- // The run() method will handle its error itself
- error.setForegroundColor(BasicColor.RED);
- error.println(e.getMessage());
- e.printStackTrace(error);
- } finally {
- error.close();
- }
- // If any errors bubble up exit with non-zero code
- System.exit(1);
- }
-
- private static void showUsage() throws IOException {
- CommandMetadata metadata = SingleCommand.singleCommand(RdfStats.class).getCommandMetadata();
- Help.help(metadata, System.err);
- System.exit(1);
- }
-
- @Override
- public void setConf(Configuration conf) {
- this.config = conf;
- }
-
- @Override
- public Configuration getConf() {
- return this.config;
- }
-
- @Override
- public int run(String[] args) {
- ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream(
- new CloseShieldOutputStream(System.err));
- try {
- if (args.length == 0) {
- showUsage();
- }
-
- // Parse custom arguments
- RdfStats cmd = SingleCommand.singleCommand(RdfStats.class).parse(args);
-
- // Copy Hadoop configuration across
- cmd.setConf(this.getConf());
-
- // Show help if requested and exit with success
- if (cmd.helpOption.showHelpIfRequested()) {
- return 0;
- }
-
- // Run the command and exit with success
- cmd.run();
- return 0;
- } catch (ParseException e) {
- error.setForegroundColor(BasicColor.RED);
- error.println(e.getMessage());
- error.println();
- } catch (Throwable e) {
- error.setForegroundColor(BasicColor.RED);
- error.println(e.getMessage());
- e.printStackTrace(error);
- error.println();
- } finally {
- error.close();
- }
- return 1;
- }
-
- private void run() throws Throwable {
- if (!this.outputPath.endsWith("/")) {
- this.outputPath += "/";
- }
-
- // If all statistics requested turn on all statistics
- if (this.all) {
- this.nodeCount = true;
- this.characteristicSets = true;
- this.typeCount = true;
- this.dataTypeCount = true;
- this.namespaceCount = true;
- }
-
- // How many statistics were requested?
- int statsRequested = 0;
- if (this.nodeCount)
- statsRequested++;
- if (this.characteristicSets)
- statsRequested++;
- if (this.typeCount)
- statsRequested++;
- if (this.dataTypeCount)
- statsRequested++;
- if (this.namespaceCount)
- statsRequested++;
- if (this.graphSize)
- statsRequested++;
-
- // Error if no statistics requested
- if (statsRequested == 0) {
- System.err.println(
- "You did not request any statistics to be calculated, please use one/more of the relevant options to select the statistics to be computed");
- return;
- }
- int statsComputed = 1;
-
- // Compute statistics
- if (this.nodeCount) {
- Job job = this.selectNodeCountJob();
- statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
- }
- if (this.graphSize) {
- Job job = this.selectGraphSizeJob();
- statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
- }
- if (this.typeCount) {
- Job[] jobs = this.selectTypeCountJobs();
- statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
- }
- if (this.dataTypeCount) {
- Job job = this.selectDataTypeCountJob();
- statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
- }
- if (this.namespaceCount) {
- Job job = this.selectNamespaceCountJob();
- statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
- }
- if (this.characteristicSets) {
- Job[] jobs = this.selectCharacteristicSetJobs();
- statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
- }
- }
-
- private int computeStatistic(Job job, int statsComputed, int statsRequested) throws Throwable {
- System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
- this.runJob(job);
- System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
- System.out.println();
- return ++statsComputed;
- }
-
- private int computeStatistic(Job[] jobs, boolean continueOnFailure, boolean continueOnError, int statsComputed,
- int statsRequested) {
- System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
- this.runJobSequence(jobs, continueOnFailure, continueOnError);
- System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
- System.out.println();
- return ++statsComputed;
- }
-
- private boolean runJob(Job job) throws Throwable {
- System.out.println("Submitting Job " + job.getJobName());
- long start = System.nanoTime();
- try {
- job.submit();
- if (job.monitorAndPrintJob()) {
- System.out.println("Job " + job.getJobName() + " succeeded");
- return true;
- } else {
- System.out.println("Job " + job.getJobName() + " failed");
- return false;
- }
- } catch (Throwable e) {
- System.out.println("Unexpected failure in Job " + job.getJobName());
- throw e;
- } finally {
- long end = System.nanoTime();
- System.out.println("Job " + job.getJobName() + " finished after "
- + String.format("%,d milliseconds", TimeUnit.NANOSECONDS.toMillis(end - start)));
- System.out.println();
- }
- }
-
- private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean continueOnError) {
- for (int i = 0; i < jobs.length; i++) {
- Job job = jobs[i];
- try {
- boolean success = this.runJob(job);
- if (!success && !continueOnFailure)
- throw new IllegalStateException(
- "Unable to complete job sequence because Job " + job.getJobName() + " failed");
- } catch (IllegalStateException e) {
- throw e;
- } catch (Throwable e) {
- if (!continueOnError)
- throw new IllegalStateException(
- "Unable to complete job sequence because job " + job.getJobName() + " errorred", e);
- }
- }
- }
-
- private Job selectNodeCountJob() throws IOException {
- String realOutputPath = outputPath + "node-counts/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadNodeCountJob(this.config, inputs, realOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleNodeCountJob(this.config, inputs, realOutputPath);
- } else {
- return JobFactory.getNodeCountJob(this.config, inputs, realOutputPath);
- }
- }
-
- private Job selectGraphSizeJob() throws IOException {
- String realOutputPath = outputPath + "graph-sizes/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadGraphSizesJob(this.config, inputs, realOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleGraphSizesJob(this.config, inputs, realOutputPath);
- } else {
- return JobFactory.getGraphSizesJob(this.config, inputs, realOutputPath);
- }
- }
-
- private Job selectDataTypeCountJob() throws IOException {
- String realOutputPath = outputPath + "data-type-counts/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadDataTypeCountJob(this.config, inputs, realOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleDataTypeCountJob(this.config, inputs, realOutputPath);
- } else {
- return JobFactory.getDataTypeCountJob(this.config, inputs, realOutputPath);
- }
- }
-
- private Job selectNamespaceCountJob() throws IOException {
- String realOutputPath = outputPath + "namespace-counts/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadNamespaceCountJob(this.config, inputs, realOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleNamespaceCountJob(this.config, inputs, realOutputPath);
- } else {
- return JobFactory.getNamespaceCountJob(this.config, inputs, realOutputPath);
- }
- }
-
- private Job[] selectCharacteristicSetJobs() throws IOException {
- String intermediateOutputPath = outputPath + "characteristics/intermediate/";
- String finalOutputPath = outputPath + "characteristics/final/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadCharacteristicSetJobs(this.config, inputs, intermediateOutputPath,
- finalOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleCharacteristicSetJobs(this.config, inputs, intermediateOutputPath,
- finalOutputPath);
- } else {
- return JobFactory.getCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
- }
- }
-
- private Job[] selectTypeCountJobs() throws IOException {
- String intermediateOutputPath = outputPath + "type-declarations/";
- String finalOutputPath = outputPath + "type-counts/";
- String[] inputs = new String[this.inputPaths.size()];
- this.inputPaths.toArray(inputs);
-
- if (DATA_TYPE_QUADS.equals(this.inputType)) {
- return JobFactory.getQuadTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
- } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
- return JobFactory.getTripleTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
- } else {
- return JobFactory.getTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.stats;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import javax.inject.Inject;
+
+import org.apache.commons.io.output.CloseShieldOutputStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.jena.hadoop.rdf.stats.jobs.JobFactory;
+
+import com.github.rvesse.airline.HelpOption;
+import com.github.rvesse.airline.SingleCommand;
+import com.github.rvesse.airline.annotations.Arguments;
+import com.github.rvesse.airline.annotations.Command;
+import com.github.rvesse.airline.annotations.Option;
+import com.github.rvesse.airline.annotations.restrictions.AllowedRawValues;
+import com.github.rvesse.airline.annotations.restrictions.Required;
+import com.github.rvesse.airline.help.Help;
+import com.github.rvesse.airline.io.colors.BasicColor;
+import com.github.rvesse.airline.io.output.AnsiBasicColorizedOutputStream;
+import com.github.rvesse.airline.io.output.ColorizedOutputStream;
+import com.github.rvesse.airline.model.CommandMetadata;
+import com.github.rvesse.airline.parser.errors.ParseException;
+
+/**
+ * Entry point for the Hadoop job, handles launching all the relevant Hadoop
+ * jobs
+ */
+@Command(name = "hadoop jar PATH_TO_JAR org.apache.jena.hadoop.rdf.stats.RdfStats", description = "A command which computes statistics on RDF data using Hadoop")
+public class RdfStats implements Tool {
+ //@formatter:off
+ private static final String DATA_TYPE_TRIPLES = "triples",
+ DATA_TYPE_QUADS = "quads",
+ DATA_TYPE_MIXED = "mixed";
+ //@formatter:on
+
+ /**
+ * Help option
+ */
+ @Inject
+ public HelpOption<RdfStats> helpOption;
+
+ /**
+ * Gets/Sets whether all available statistics will be calculated
+ */
+ @Option(name = { "-a", "--all" }, description = "Requests that all available statistics be calculated")
+ public boolean all = false;
+
+ /**
+ * Gets/Sets whether node usage counts will be calculated
+ */
+ @Option(name = { "-n", "--node-count" }, description = "Requests that node usage counts be calculated")
+ public boolean nodeCount = false;
+
+ /**
+ * Gets/Sets whether characteristic sets will be calculated
+ */
+ @Option(name = { "-c",
+ "--characteristic-sets" }, hidden = true, description = "Requests that characteristic sets be calculated (hidden as this has scalability issues)")
+ public boolean characteristicSets = false;
+
+ /**
+ * Gets/Sets whether type counts will be calculated
+ */
+ @Option(name = { "-t", "--type-count" }, description = "Requests that rdf:type usage counts be calculated")
+ public boolean typeCount = false;
+
+ /**
+ * Gets/Sets whether data type counts will be calculated
+ */
+ @Option(name = { "-d", "--data-types" }, description = "Requests that literal data type usage counts be calculated")
+ public boolean dataTypeCount = false;
+
+ /**
+ * Gets/Sets whether namespace counts will be calculated
+ */
+ @Option(name = { "--namespaces" }, description = "Requests that namespace usage counts be calculated")
+ public boolean namespaceCount = false;
+
+ @Option(name = { "-g", "--graph-sizes" }, description = "Requests that the size of each named graph be counted")
+ public boolean graphSize = false;
+
+ /**
+ * Gets/Sets the input data type used
+ */
+ @Option(name = {
+ "--input-type" }, description = "Specifies whether the input data is a mixture of quads and triples, just quads or just triples. Using the most specific data type will yield the most accurate statistics")
+ @AllowedRawValues(allowedValues = { DATA_TYPE_MIXED, DATA_TYPE_QUADS, DATA_TYPE_TRIPLES })
+ public String inputType = DATA_TYPE_MIXED;
+
+ /**
+ * Gets/Sets the output path
+ */
+ @Option(name = { "-o", "--output" }, title = "OutputPath", description = "Sets the output path", arity = 1)
+ @Required
+ public String outputPath = null;
+
+ /**
+ * Gets/Sets the input path(s)
+ */
+ @Arguments(description = "Sets the input path(s)", title = "InputPath")
+ @Required
+ public List<String> inputPaths = new ArrayList<String>();
+
+ private Configuration config;
+
+ /**
+ * Entry point method
+ *
+ * @param args
+ * Arguments
+ */
+ public static void main(String[] args) {
+ ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream(
+ new CloseShieldOutputStream(System.err));
+ try {
+ // Run and exit with result code if no errors bubble up
+ // Note that the exit code may still be a error code
+ int res = ToolRunner.run(new Configuration(true), new RdfStats(), args);
+ System.exit(res);
+ } catch (Throwable e) {
+ // This will only happen if Hadoop option parsing errors
+ // The run() method will handle its error itself
+ error.setForegroundColor(BasicColor.RED);
+ error.println(e.getMessage());
+ e.printStackTrace(error);
+ } finally {
+ error.close();
+ }
+ // If any errors bubble up exit with non-zero code
+ System.exit(1);
+ }
+
+ private static void showUsage() throws IOException {
+ CommandMetadata metadata = SingleCommand.singleCommand(RdfStats.class).getCommandMetadata();
+ Help.help(metadata, System.err);
+ System.exit(1);
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.config = conf;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return this.config;
+ }
+
+ @Override
+ public int run(String[] args) {
+ ColorizedOutputStream<BasicColor> error = new AnsiBasicColorizedOutputStream(
+ new CloseShieldOutputStream(System.err));
+ try {
+ if (args.length == 0) {
+ showUsage();
+ }
+
+ // Parse custom arguments
+ RdfStats cmd = SingleCommand.singleCommand(RdfStats.class).parse(args);
+
+ // Copy Hadoop configuration across
+ cmd.setConf(this.getConf());
+
+ // Show help if requested and exit with success
+ if (cmd.helpOption.showHelpIfRequested()) {
+ return 0;
+ }
+
+ // Run the command and exit with success
+ cmd.run();
+ return 0;
+ } catch (ParseException e) {
+ error.setForegroundColor(BasicColor.RED);
+ error.println(e.getMessage());
+ error.println();
+ } catch (Throwable e) {
+ error.setForegroundColor(BasicColor.RED);
+ error.println(e.getMessage());
+ e.printStackTrace(error);
+ error.println();
+ } finally {
+ error.close();
+ }
+ return 1;
+ }
+
+ private void run() throws Throwable {
+ if (!this.outputPath.endsWith("/")) {
+ this.outputPath += "/";
+ }
+
+ // If all statistics requested turn on all statistics
+ if (this.all) {
+ this.nodeCount = true;
+ this.characteristicSets = true;
+ this.typeCount = true;
+ this.dataTypeCount = true;
+ this.namespaceCount = true;
+ }
+
+ // How many statistics were requested?
+ int statsRequested = 0;
+ if (this.nodeCount)
+ statsRequested++;
+ if (this.characteristicSets)
+ statsRequested++;
+ if (this.typeCount)
+ statsRequested++;
+ if (this.dataTypeCount)
+ statsRequested++;
+ if (this.namespaceCount)
+ statsRequested++;
+ if (this.graphSize)
+ statsRequested++;
+
+ // Error if no statistics requested
+ if (statsRequested == 0) {
+ System.err.println(
+ "You did not request any statistics to be calculated, please use one/more of the relevant options to select the statistics to be computed");
+ return;
+ }
+ int statsComputed = 1;
+
+ // Compute statistics
+ if (this.nodeCount) {
+ Job job = this.selectNodeCountJob();
+ statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
+ }
+ if (this.graphSize) {
+ Job job = this.selectGraphSizeJob();
+ statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
+ }
+ if (this.typeCount) {
+ Job[] jobs = this.selectTypeCountJobs();
+ statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
+ }
+ if (this.dataTypeCount) {
+ Job job = this.selectDataTypeCountJob();
+ statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
+ }
+ if (this.namespaceCount) {
+ Job job = this.selectNamespaceCountJob();
+ statsComputed = this.computeStatistic(job, statsComputed, statsRequested);
+ }
+ if (this.characteristicSets) {
+ Job[] jobs = this.selectCharacteristicSetJobs();
+ statsComputed = this.computeStatistic(jobs, false, false, statsComputed, statsRequested);
+ }
+ }
+
+ private int computeStatistic(Job job, int statsComputed, int statsRequested) throws Throwable {
+ System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
+ this.runJob(job);
+ System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
+ System.out.println();
+ return ++statsComputed;
+ }
+
+ private int computeStatistic(Job[] jobs, boolean continueOnFailure, boolean continueOnError, int statsComputed,
+ int statsRequested) {
+ System.out.println(String.format("Computing Statistic %d of %d requested", statsComputed, statsRequested));
+ this.runJobSequence(jobs, continueOnFailure, continueOnError);
+ System.out.println(String.format("Computed Statistic %d of %d requested", statsComputed, statsRequested));
+ System.out.println();
+ return ++statsComputed;
+ }
+
+ private boolean runJob(Job job) throws Throwable {
+ System.out.println("Submitting Job " + job.getJobName());
+ long start = System.nanoTime();
+ try {
+ job.submit();
+ if (job.monitorAndPrintJob()) {
+ System.out.println("Job " + job.getJobName() + " succeeded");
+ return true;
+ } else {
+ System.out.println("Job " + job.getJobName() + " failed");
+ return false;
+ }
+ } catch (Throwable e) {
+ System.out.println("Unexpected failure in Job " + job.getJobName());
+ throw e;
+ } finally {
+ long end = System.nanoTime();
+ System.out.println("Job " + job.getJobName() + " finished after "
+ + String.format("%,d milliseconds", TimeUnit.NANOSECONDS.toMillis(end - start)));
+ System.out.println();
+ }
+ }
+
+ private void runJobSequence(Job[] jobs, boolean continueOnFailure, boolean continueOnError) {
+ for (int i = 0; i < jobs.length; i++) {
+ Job job = jobs[i];
+ try {
+ boolean success = this.runJob(job);
+ if (!success && !continueOnFailure)
+ throw new IllegalStateException(
+ "Unable to complete job sequence because Job " + job.getJobName() + " failed");
+ } catch (IllegalStateException e) {
+ throw e;
+ } catch (Throwable e) {
+ if (!continueOnError)
+ throw new IllegalStateException(
+ "Unable to complete job sequence because job " + job.getJobName() + " errorred", e);
+ }
+ }
+ }
+
+ private Job selectNodeCountJob() throws IOException {
+ String realOutputPath = outputPath + "node-counts/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadNodeCountJob(this.config, inputs, realOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleNodeCountJob(this.config, inputs, realOutputPath);
+ } else {
+ return JobFactory.getNodeCountJob(this.config, inputs, realOutputPath);
+ }
+ }
+
+ private Job selectGraphSizeJob() throws IOException {
+ String realOutputPath = outputPath + "graph-sizes/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadGraphSizesJob(this.config, inputs, realOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleGraphSizesJob(this.config, inputs, realOutputPath);
+ } else {
+ return JobFactory.getGraphSizesJob(this.config, inputs, realOutputPath);
+ }
+ }
+
+ private Job selectDataTypeCountJob() throws IOException {
+ String realOutputPath = outputPath + "data-type-counts/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadDataTypeCountJob(this.config, inputs, realOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleDataTypeCountJob(this.config, inputs, realOutputPath);
+ } else {
+ return JobFactory.getDataTypeCountJob(this.config, inputs, realOutputPath);
+ }
+ }
+
+ private Job selectNamespaceCountJob() throws IOException {
+ String realOutputPath = outputPath + "namespace-counts/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadNamespaceCountJob(this.config, inputs, realOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleNamespaceCountJob(this.config, inputs, realOutputPath);
+ } else {
+ return JobFactory.getNamespaceCountJob(this.config, inputs, realOutputPath);
+ }
+ }
+
+ private Job[] selectCharacteristicSetJobs() throws IOException {
+ String intermediateOutputPath = outputPath + "characteristics/intermediate/";
+ String finalOutputPath = outputPath + "characteristics/final/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadCharacteristicSetJobs(this.config, inputs, intermediateOutputPath,
+ finalOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleCharacteristicSetJobs(this.config, inputs, intermediateOutputPath,
+ finalOutputPath);
+ } else {
+ return JobFactory.getCharacteristicSetJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
+ }
+ }
+
+ private Job[] selectTypeCountJobs() throws IOException {
+ String intermediateOutputPath = outputPath + "type-declarations/";
+ String finalOutputPath = outputPath + "type-counts/";
+ String[] inputs = new String[this.inputPaths.size()];
+ this.inputPaths.toArray(inputs);
+
+ if (DATA_TYPE_QUADS.equals(this.inputType)) {
+ return JobFactory.getQuadTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
+ } else if (DATA_TYPE_TRIPLES.equals(this.inputType)) {
+ return JobFactory.getTripleTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
+ } else {
+ return JobFactory.getTypeCountJobs(this.config, inputs, intermediateOutputPath, finalOutputPath);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
----------------------------------------------------------------------
diff --git a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
index b0ed898..0d6bf18 100644
--- a/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
+++ b/jena-elephas/jena-elephas-stats/src/main/java/org/apache/jena/hadoop/rdf/stats/jobs/JobFactory.java
@@ -1,25 +1,25 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.hadoop.rdf.stats.jobs;
import java.io.IOException;
-
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
@@ -28,7 +28,7 @@ import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
+import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
@@ -36,40 +36,40 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
-import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat;
-import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat;
-import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat;
-import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat;
-import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
-import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
-import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadGraphCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
-import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper;
-import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
-import org.apache.jena.hadoop.rdf.types.NodeWritable;
-import org.apache.jena.hadoop.rdf.types.QuadWritable;
-import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.vocabulary.RDF ;
+import org.apache.jena.hadoop.rdf.io.input.QuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.TriplesOrQuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.nquads.NQuadsInputFormat;
+import org.apache.jena.hadoop.rdf.io.input.ntriples.NTriplesInputFormat;
+import org.apache.jena.hadoop.rdf.io.output.nquads.NQuadsOutputFormat;
+import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat;
+import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesOutputFormat;
+import org.apache.jena.hadoop.rdf.mapreduce.KeyMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
+import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.QuadCharacteristicSetGeneratingReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.QuadDataTypeCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.datatypes.TripleDataTypeCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.QuadNamespaceCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.namespaces.TripleNamespaceCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadGraphCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.positional.QuadObjectCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.count.positional.TripleObjectCountMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.QuadFilterByPredicateMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.filter.positional.TripleFilterByPredicateUriMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.group.QuadGroupBySubjectMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper;
+import org.apache.jena.hadoop.rdf.mapreduce.transform.TriplesToQuadsConstantGraphMapper;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.vocabulary.RDF ;
/**
* Factory that can produce {@link Job} instances for computing various RDF
@@ -101,7 +101,7 @@ public class JobFactory {
public static Job getTripleNodeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
Job job = Job.getInstance(config);
job.setJarByClass(JobFactory.class);
- job.setJobName("RDF Triples Node Usage Count");
+ job.setJobName("RDF Triples Node Usage Count");
// Map/Reduce classes
job.setMapperClass(TripleNodeCountMapper.class);
@@ -180,67 +180,67 @@ public class JobFactory {
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job;
- }
-
- public static Job getTripleGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
- Job job = Job.getInstance(config);
- job.setJarByClass(JobFactory.class);
- job.setJobName("RDF Triples Graph Sizes");
-
- // Map/Reduce classes
- ChainMapper.addMapper(job, TriplesToQuadsConstantGraphMapper.class, LongWritable.class, TripleWritable.class, LongWritable.class, QuadWritable.class, config);
- ChainMapper.addMapper(job, QuadGraphCountMapper.class, LongWritable.class, QuadWritable.class, NodeWritable.class, LongWritable.class, config);
- job.setMapOutputKeyClass(NodeWritable.class);
- job.setMapOutputValueClass(LongWritable.class);
- job.setReducerClass(NodeCountReducer.class);
-
- // Input and Output
- job.setInputFormatClass(TriplesInputFormat.class);
- job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
- FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
- FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
- return job;
- }
-
- public static Job getQuadGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
- Job job = Job.getInstance(config);
- job.setJarByClass(JobFactory.class);
- job.setJobName("RDF Quads Graph Sizes");
-
- // Map/Reduce classes
- job.setMapperClass(QuadGraphCountMapper.class);
- job.setMapOutputKeyClass(NodeWritable.class);
- job.setMapOutputValueClass(LongWritable.class);
- job.setReducerClass(NodeCountReducer.class);
-
- // Input and Output
- job.setInputFormatClass(QuadsInputFormat.class);
- job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
- FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
- FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
- return job;
- }
-
- public static Job getGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
- Job job = Job.getInstance(config);
- job.setJarByClass(JobFactory.class);
- job.setJobName("RDF Graph Sizes");
-
- // Map/Reduce classes
- job.setMapperClass(QuadGraphCountMapper.class);
- job.setMapOutputKeyClass(NodeWritable.class);
- job.setMapOutputValueClass(LongWritable.class);
- job.setReducerClass(NodeCountReducer.class);
-
- // Input and Output
- job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
- job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
- FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
- FileOutputFormat.setOutputPath(job, new Path(outputPath));
-
- return job;
+ }
+
+ public static Job getTripleGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
+ Job job = Job.getInstance(config);
+ job.setJarByClass(JobFactory.class);
+ job.setJobName("RDF Triples Graph Sizes");
+
+ // Map/Reduce classes
+ ChainMapper.addMapper(job, TriplesToQuadsConstantGraphMapper.class, LongWritable.class, TripleWritable.class, LongWritable.class, QuadWritable.class, config);
+ ChainMapper.addMapper(job, QuadGraphCountMapper.class, LongWritable.class, QuadWritable.class, NodeWritable.class, LongWritable.class, config);
+ job.setMapOutputKeyClass(NodeWritable.class);
+ job.setMapOutputValueClass(LongWritable.class);
+ job.setReducerClass(NodeCountReducer.class);
+
+ // Input and Output
+ job.setInputFormatClass(TriplesInputFormat.class);
+ job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+ FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
+ FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+ return job;
+ }
+
+ public static Job getQuadGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
+ Job job = Job.getInstance(config);
+ job.setJarByClass(JobFactory.class);
+ job.setJobName("RDF Quads Graph Sizes");
+
+ // Map/Reduce classes
+ job.setMapperClass(QuadGraphCountMapper.class);
+ job.setMapOutputKeyClass(NodeWritable.class);
+ job.setMapOutputValueClass(LongWritable.class);
+ job.setReducerClass(NodeCountReducer.class);
+
+ // Input and Output
+ job.setInputFormatClass(QuadsInputFormat.class);
+ job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+ FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
+ FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+ return job;
+ }
+
+ public static Job getGraphSizesJob(Configuration config, String[] inputPaths, String outputPath) throws IOException {
+ Job job = Job.getInstance(config);
+ job.setJarByClass(JobFactory.class);
+ job.setJobName("RDF Graph Sizes");
+
+ // Map/Reduce classes
+ job.setMapperClass(QuadGraphCountMapper.class);
+ job.setMapOutputKeyClass(NodeWritable.class);
+ job.setMapOutputValueClass(LongWritable.class);
+ job.setReducerClass(NodeCountReducer.class);
+
+ // Input and Output
+ job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
+ job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
+ FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
+ FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+ return job;
}
/**
http://git-wip-us.apache.org/repos/asf/jena/blob/3d70d735/jena-fuseki1/Data/books.ttl
----------------------------------------------------------------------
diff --git a/jena-fuseki1/Data/books.ttl b/jena-fuseki1/Data/books.ttl
index f341edc..7957323 100644
--- a/jena-fuseki1/Data/books.ttl
+++ b/jena-fuseki1/Data/books.ttl
@@ -1,62 +1,62 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-@prefix dc: <http://purl.org/dc/elements/1.1/> .
-@prefix vcard: <http://www.w3.org/2001/vcard-rdf/3.0#> .
-@prefix ns: <http://example.org/ns#> .
-
-@prefix : <http://example.org/book/> .
-
-# A small dataset for usage examples of Joseki
-# This data is intentionaly irregular (e.g. different ways to
-# record the book creator) as if the information is either an
-# aggregation or was created at different times.
-
-:book1
- dc:title "Harry Potter and the Philosopher's Stone" ;
- dc:creator "J.K. Rowling" ;
- .
-
-:book2
- dc:title "Harry Potter and the Chamber of Secrets" ;
- dc:creator _:a .
-
-:book3
- dc:title "Harry Potter and the Prisoner Of Azkaban" ;
- dc:creator _:a .
-
-:book4
- dc:title "Harry Potter and the Goblet of Fire" .
-
-:book5
- dc:title "Harry Potter and the Order of the Phoenix";
- dc:creator "J.K. Rowling" ;
- .
-
-:book6
- dc:title "Harry Potter and the Half-Blood Prince";
- dc:creator "J.K. Rowling" .
-
-:book7
- dc:title "Harry Potter and the Deathly Hallows" ;
- dc:creator "J.K. Rowling" .
-_:a
- vcard:FN "J.K. Rowling" ;
- vcard:N
- [ vcard:Family "Rowling" ;
- vcard:Given "Joanna"
- ]
- .
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix vcard: <http://www.w3.org/2001/vcard-rdf/3.0#> .
+@prefix ns: <http://example.org/ns#> .
+
+@prefix : <http://example.org/book/> .
+
+# A small dataset for usage examples of Joseki
+# This data is intentionaly irregular (e.g. different ways to
+# record the book creator) as if the information is either an
+# aggregation or was created at different times.
+
+:book1
+ dc:title "Harry Potter and the Philosopher's Stone" ;
+ dc:creator "J.K. Rowling" ;
+ .
+
+:book2
+ dc:title "Harry Potter and the Chamber of Secrets" ;
+ dc:creator _:a .
+
+:book3
+ dc:title "Harry Potter and the Prisoner Of Azkaban" ;
+ dc:creator _:a .
+
+:book4
+ dc:title "Harry Potter and the Goblet of Fire" .
+
+:book5
+ dc:title "Harry Potter and the Order of the Phoenix";
+ dc:creator "J.K. Rowling" ;
+ .
+
+:book6
+ dc:title "Harry Potter and the Half-Blood Prince";
+ dc:creator "J.K. Rowling" .
+
+:book7
+ dc:title "Harry Potter and the Deathly Hallows" ;
+ dc:creator "J.K. Rowling" .
+_:a
+ vcard:FN "J.K. Rowling" ;
+ vcard:N
+ [ vcard:Family "Rowling" ;
+ vcard:Given "Joanna"
+ ]
+ .