You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by jg...@apache.org on 2017/09/13 10:55:27 UTC

[4/6] any23 git commit: Merge branch 'master' into ANY23-311

Merge branch 'master' into ANY23-311

- Resolve conflict in YAMLExtractor.java

Signed-off-by:Jacek Grzebyta <gr...@gmail.com>

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/94caa68e
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/94caa68e
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/94caa68e

Branch: refs/heads/master
Commit: 94caa68ec6f0a86281c147667e75bbd044e4f658
Parents: a56d549 c40b788
Author: Jacek Grzebyta <gr...@gmail.com>
Authored: Tue Aug 29 12:41:16 2017 +0100
Committer: Jacek Grzebyta <gr...@gmail.com>
Committed: Tue Aug 29 12:41:16 2017 +0100

----------------------------------------------------------------------
 .../configuration/DefaultConfiguration.java     |  23 +-
 .../DefaultModifiableConfiguration.java         |   4 +-
 .../apache/any23/extractor/ExtractorGroup.java  |  14 +-
 .../java/org/apache/any23/vocab/Vocabulary.java |  26 +-
 .../resources/default-configuration.properties  |   4 +
 cli/pom.xml                                     |  22 +
 .../org/apache/any23/cli/PluginVerifier.java    |   8 +-
 .../main/java/org/apache/any23/cli/Rover.java   |  10 +-
 .../java/org/apache/any23/cli/ToolRunner.java   |   7 +-
 .../org/apache/any23/cli/ToolRunnerTest.java    |  15 +-
 .../any23/extractor/ExtractorRegistryImpl.java  |  30 +-
 .../extractor/SingleDocumentExtraction.java     |   6 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |   4 +-
 .../any23/extractor/html/GeoExtractor.java      |   7 +-
 .../any23/extractor/html/TagSoupParser.java     |   2 -
 .../any23/extractor/xpath/XPathExtractor.java   |   3 +-
 .../any23/extractor/yaml/YAMLExtractor.java     |  18 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     |  50 +-
 .../java/org/apache/any23/util/StreamUtils.java |  69 +-
 .../any23/extractor/ExtractionAPITest.java      |   4 +-
 .../extractor/ExtractionResultImplTest.java     |   2 +-
 csvutils/pom.xml                                |   2 +-
 encoding/pom.xml                                |   2 +-
 openie/pom.xml                                  | 153 +++++
 .../any23/extractor/openie/OpenIEExtractor.java | 130 ++++
 .../openie/OpenIEExtractorFactory.java          |  52 ++
 .../org.apache.any23.extractor.ExtractorFactory |   1 +
 .../any23/openie/OpenIEExtractorTest.java       |  88 +++
 plugins/basic-crawler/pom.xml                   |  53 +-
 plugins/html-scraper/pom.xml                    |  19 -
 plugins/integration-test/pom.xml                |  16 +-
 .../java/org/apache/any23/plugin/PluginIT.java  |  40 +-
 plugins/office-scraper/pom.xml                  |  19 -
 pom.xml                                         |  19 +-
 service/pom.xml                                 |   2 +-
 src/site/apt/any23-plugins.apt                  |  16 +-
 src/site/apt/configuration.apt                  |   8 +-
 src/site/apt/dev-csv-extractor.apt              |   2 +-
 src/site/apt/dev-data-conversion.apt            |  20 +-
 src/site/apt/dev-data-extraction.apt            |  20 +-
 src/site/apt/dev-microformat-extractors.apt     |  12 +-
 src/site/apt/dev-validation-fix.apt             |  12 +-
 src/site/apt/dev-xpath-extractor.apt            |   2 +-
 src/site/apt/extractors.apt                     |  50 +-
 src/site/apt/getting-started.apt                |   2 +-
 src/site/apt/plugin-basic-crawler.apt           |   4 +-
 src/site/apt/plugin-office-scraper.apt          |   2 +-
 .../any23/extractor/openie/example-openie.html  | 638 +++++++++++++++++++
 48 files changed, 1451 insertions(+), 261 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/94caa68e/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
----------------------------------------------------------------------
diff --cc core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
index bf70b63,1e968c0..4eae6b9
--- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
@@@ -75,10 -74,10 +76,10 @@@ public class YAMLExtractor implements E
  
          // Iterate over page(s)
          for (Object p : docIterate) {
-             Resource pageNode = YAMLExtractor.this.makeUri("document", documentURI);
+             Resource pageNode = RDFUtils.makeIRI("document", documentIRI, true);
              out.writeTriple(documentRoot, vocab.contains, pageNode);
              out.writeTriple(pageNode, RDF.TYPE, vocab.document);
-             buildNode(documentURI, p, out, pageNode);
 -            out.writeTriple(pageNode, vocab.contains, buildNode(documentIRI, p, out));
++            buildNode(documentIRI, p, out, pageNode);
          }
  
      }
@@@ -117,13 -116,12 +118,14 @@@
          }
      }
  
 -    private Value processMap(IRI file, Map<String, Object> node, ExtractionResult out) {
 -        Resource nodeURI = RDFUtils.makeIRI(file);
 +    private Value processMap(IRI file, Map<String, Object> node, ExtractionResult out, Resource... parent) {
 +        Resource nodeURI = Arrays.asList(parent).isEmpty() ? YAMLExtractor.this.makeUri(file) : parent[0];
 +        
++
          for (String k : node.keySet()) {
-             Resource predicate = makeUri(k, file, false);
+             Resource predicate = RDFUtils.makeIRI(k, file, true);
              Value value = buildNode(file, node.get(k), out);
 -            out.writeTriple(nodeURI, RDF.TYPE, vocab.node);
 +            out.writeTriple(nodeURI, RDF.TYPE, vocab.mapping);
              out.writeTriple(nodeURI, (IRI) predicate, value);
              out.writeTriple(predicate, RDF.TYPE, RDF.PREDICATE);
              out.writeTriple(predicate, RDFS.LABEL, RDFUtils.literal(k));
@@@ -172,36 -158,4 +174,36 @@@
          nodeId++;
          return bnode;
      }
 +
 +    private Resource makeUri(IRI docUri) {
 +        return makeUri("node", docUri);
-     }
++}
 +
 +    private Resource makeUri(String type, IRI docUri) {
 +        return makeUri(type, docUri, true);
 +    }
 +
 +    private Resource makeUri(String type, IRI docUri, boolean addId) {
 +
 +        // preprocess string: converts - -> _
 +        //                    converts <space>: word1 word2 -> word1Word2
 +        String newType = StringUtils.implementJavaNaming(type);
 +
 +        String uriString;
 +        if (docUri.toString().endsWith("/")) {
 +            uriString = docUri.toString() + newType;
 +        } else {
 +            uriString = docUri.toString() + "#" + newType;
 +        }
 +
 +        if (addId) {
 +            uriString = uriString + "_" + Integer.toString(nodeId);
 +        }
 +
 +        Resource node = RDFUtils.uri(uriString);
 +        if (addId) {
 +            nodeId++;
 +        }
 +        return node;
 +    }
  }