You are viewing a plain text version of this content. The canonical link for it is here.
Posted to doxia-commits@maven.apache.org by lt...@apache.org on 2009/06/02 11:43:33 UTC

svn commit: r780986 - in /maven/doxia/doxia/trunk/doxia-core/src: main/java/org/apache/maven/doxia/document/ main/java/org/apache/maven/doxia/document/DocumentModelSink.java test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java

Author: ltheussl
Date: Tue Jun  2 09:43:33 2009
New Revision: 780986

URL: http://svn.apache.org/viewvc?rev=780986&view=rev
Log:
Add a DocumentModelSink to extract meta information from documents.

Added:
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/
    maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java   (with props)
    maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java   (with props)

Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java?rev=780986&view=auto
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java (added)
+++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java Tue Jun  2 09:43:33 2009
@@ -0,0 +1,328 @@
+package org.apache.maven.doxia.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.text.ParseException;
+
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Locale;
+
+import javax.swing.text.html.HTML.Attribute;
+
+import org.apache.maven.doxia.sink.SinkAdapter;
+import org.apache.maven.doxia.sink.SinkEventAttributes;
+import org.apache.maven.doxia.util.DoxiaUtils;
+
+import org.codehaus.plexus.util.StringUtils;
+
+/**
+ * A Sink that collects meta-information emitted by a parser and stores it in a DocumentModel.
+ *
+ * <p>Use like:</p>
+ *
+ * <pre>
+ * DocumentModelSink sink = new DocumentModelSink();
+ * parser.parse( reader, sink );
+ * DocumentModel model = sink.getModel();
+ * </pre>
+ *
+ * <p>The sink only collects information from the <code>title()</code>, <code>author</code>
+ * and <code>date</code> events, as well as meta-information emitted via <code>unknown()</code>,
+ * all other events are ignored.</p>
+ *
+ * @author ltheussl
+ * @version $Id$
+ * @since 1.1.1.
+ */
+
+public class DocumentModelSink
+        extends SinkAdapter
+{
+    private final DocumentModel model;
+
+    private StringBuffer buffer;
+    private DocumentAuthor author;
+
+    /**
+     * Create a DocumentModelSink.
+     */
+    public DocumentModelSink()
+    {
+        this.model = new DocumentModel();
+        model.setMeta( new DocumentMeta() );
+    }
+
+    /**
+     * Retrieve the DocumentModel created by this Sink.
+     *
+     * @return the DocumentModel.
+     */
+    public DocumentModel getModel()
+    {
+        return model;
+    }
+
+    /** Start recording a title. */
+    public void title()
+    {
+        title( null );
+    }
+
+    /**
+     * Start recording a title. Only text events within a title event are recorded.
+     *
+     * @param attributes ignored.
+     */
+    public void title( SinkEventAttributes attributes )
+    {
+        this.buffer = new StringBuffer();
+    }
+
+    /** End recording a title. */
+    public void title_()
+    {
+        String title = buffer.toString();
+
+        if ( StringUtils.isNotEmpty( title ) )
+        {
+            getModel().getMeta().setTitle( buffer.toString() );
+        }
+
+        this.buffer = null;
+    }
+
+    /** Start recording an author. */
+    public void author()
+    {
+        author( null );
+    }
+
+    /**
+     * Start recording an author. Only text events within an author event are recorded.
+     *
+     * @param attributes only email attribute is recognized.
+     */
+    public void author( SinkEventAttributes attributes )
+    {
+        this.buffer = new StringBuffer();
+        this.author = new DocumentAuthor();
+
+        if ( attributes != null )
+        {
+            for ( Enumeration e = attributes.getAttributeNames() ; e.hasMoreElements() ; )
+            {
+                String name = e.nextElement().toString();
+
+                if ( name.equals( SinkEventAttributes.EMAIL ) )
+                {
+                    author.setEmail( attributes.getAttribute( name ).toString() );
+                }
+                else
+                {
+                    getLog().warn( "Ignoring unknown author attribute: " + name );
+                }
+            }
+        }
+    }
+
+    /** End recording an author. */
+    public void author_()
+    {
+        String auth = buffer.toString();
+
+        if ( StringUtils.isNotEmpty( auth ) )
+        {
+            author.setName( buffer.toString() );
+            model.getMeta().addAuthor( author );
+        }
+
+        this.author = null;
+        this.buffer = null;
+    }
+
+    /** Start recording a date. */
+    public void date()
+    {
+        date( null );
+    }
+
+    /**
+     * Start recording a date. Only text events within a date event are recorded.
+     *
+     * @param attributes ignored.
+     */
+    public void date( SinkEventAttributes attributes )
+    {
+        this.buffer = new StringBuffer();
+    }
+
+    /** End recording a date. */
+    public void date_()
+    {
+        String dat = buffer.toString();
+
+        if ( StringUtils.isNotEmpty( dat ) )
+        {
+            try
+            {
+                Date date = DoxiaUtils.parseDate( buffer.toString() );
+                model.getMeta().setDate( date );
+            }
+            catch ( ParseException ex )
+            {
+                getLog().warn( "Could not parse date: " + this.buffer.toString(), ex );
+            }
+        }
+
+        this.buffer = null;
+    }
+
+    /**
+     * Record a text.
+     *
+     * @param text the text to record.
+     */
+    public void text( String text )
+    {
+        text( text, null );
+    }
+
+    /**
+     * Record a text.
+     *
+     * @param text the text to record.
+     * @param attributes ignored.
+     */
+    public void text( String text, SinkEventAttributes attributes )
+    {
+        if ( this.buffer != null && StringUtils.isNotEmpty( text ) )
+        {
+            this.buffer.append( text );
+        }
+    }
+
+    /**
+     * Record a text.
+     *
+     * @param text the text to record.
+     */
+    public void rawText( String text )
+    {
+        if ( this.buffer != null && StringUtils.isNotEmpty( text ) )
+        {
+            this.buffer.append( text );
+        }
+    }
+
+    /**
+     * Record an unknown event. Only "meta" events are currently recognized.
+     *
+     * @param name the name of the event. If this is not "meta", the event is ignored.
+     * @param requiredParams ignored.
+     * @param attributes has to contain "name" and "content" attributes.
+     */
+    public void unknown( String name, Object[] requiredParams, SinkEventAttributes attributes )
+    {
+        if ( "meta".equals( name ) )
+        {
+            Object metaName = attributes.getAttribute( Attribute.NAME.toString() );
+            Object metaContent = attributes.getAttribute( Attribute.CONTENT.toString() );
+
+            if ( metaName == null || metaContent == null )
+            {
+                getLog().warn( "Missing name and/or content in meta, ignoring!" );
+                return;
+            }
+
+            handleMeta( metaName.toString().toLowerCase( Locale.ENGLISH ),
+                    metaContent.toString().toLowerCase( Locale.ENGLISH ) );
+        }
+    }
+
+    private boolean handleMeta( String name, String content )
+    {
+        if ( "author".equals( name ) )
+        {
+            this.author( null );
+            this.text( content );
+            this.author_();
+        }
+        else if ( "date".equals( name ) )
+        {
+            this.date( null );
+            this.text( content );
+            this.date_();
+        }
+        else if ( "keywords".equals( name ) )
+        {
+            String[] keywords = StringUtils.split( content, "," );
+
+            for ( int i = 0; i < keywords.length; i++ )
+            {
+                model.getMeta().addKeyWord( keywords[i].trim() );
+            }
+        }
+        else if ( "description".equals( name ) )
+        {
+            model.getMeta().setDescription( content );
+        }
+        else if ( "generator".equals( name ) )
+        {
+            model.getMeta().setGenerator( content );
+        }
+        else if ( "language".equals( name ) || "lang".equals( name ) )
+        {
+            model.getMeta().setLanguage( content );
+        }
+        else if ( "creator".equals( name ) )
+        {
+            model.getMeta().setCreator( content );
+        }
+        else if ( "creation_date".equals( name ) )
+        {
+            try
+            {
+                model.getMeta().setCreationDate( DoxiaUtils.parseDate( content ) );
+            }
+            catch ( ParseException ex )
+            {
+                getLog().warn( "Could not parse date: " + content, ex );
+            }
+        }
+        else if ( "date-creation-yyyymmdd".equals( name ) )
+        {
+            try
+            {
+                model.getMeta().setCreationDate( DoxiaUtils.parseDate( content ) );
+            }
+            catch ( ParseException ex )
+            {
+                getLog().warn( "Could not parse date: " + content, ex );
+            }
+        }
+        else
+        {
+            getLog().warn( "Unknown meta: " + name );
+        }
+
+        return false;
+    }
+}

Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java
------------------------------------------------------------------------------
    svn:keywords = "Author Date Id Revision"

Added: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java
URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java?rev=780986&view=auto
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java (added)
+++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java Tue Jun  2 09:43:33 2009
@@ -0,0 +1,196 @@
+package org.apache.maven.doxia.document;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+
+import org.codehaus.plexus.PlexusTestCase;
+
+/**
+ * Test DocumentModelSink.
+ *
+ * @author ltheussl
+ * @version $Id$
+ * @since 1.1.1
+ */
+public class DocumentModelSinkTest
+        extends PlexusTestCase
+{
+    /**
+     * Test of title method, of class DocumentModelSink.
+     */
+    public void testTitle()
+    {
+        final DocumentModelSink sink = new DocumentModelSink();
+
+        sink.title();
+        sink.rawText( "" );
+        sink.title_();
+
+        assertNull( sink.getModel().getMeta().getTitle() );
+
+        sink.title();
+        sink.rawText( "Title" );
+        sink.title_();
+
+        assertEquals( "Title", sink.getModel().getMeta().getTitle() );
+    }
+
+    /**
+     * Test of author method, of class DocumentModelSink.
+     */
+    public void testAuthor()
+    {
+        final DocumentModelSink sink = new DocumentModelSink();
+        final SinkEventAttributeSet email =
+                new SinkEventAttributeSet( new String[] {"email", "yo@com", "hobby", "breathing"} );
+
+        sink.author();
+        sink.text( "Author" );
+        sink.text( "" );
+        sink.author_();
+
+        sink.author();
+        sink.text( "" );
+        sink.author_();
+
+        sink.author( email );
+        sink.text( "Author with email" );
+        sink.rawText( "" );
+        sink.author_();
+
+        assertEquals( "Author, Author with email", sink.getModel().getMeta().getAllAuthorNames() );
+
+        final List authors = sink.getModel().getMeta().getAuthors();
+        assertEquals( 2, authors.size() );
+
+        for ( final Iterator it = authors.iterator(); it.hasNext(); )
+        {
+            final DocumentAuthor author = (DocumentAuthor) it.next();
+            final String name = author.getName();
+            assertTrue( "Author".equals( name ) || "Author with email".equals( name ) );
+
+            if ( "Author with email".equals( name ) )
+            {
+                assertEquals( "yo@com", author.getEmail() );
+            }
+        }
+    }
+
+    /**
+     * Test of date method, of class DocumentModelSink.
+     */
+    public void testDate()
+    {
+        final DocumentModelSink sink = new DocumentModelSink();
+
+        sink.date();
+        sink.text( "" );
+        sink.date_();
+
+        assertNull( sink.getModel().getMeta().getDate() );
+
+        sink.date();
+        sink.text( "heute" );
+        sink.date_();
+
+        assertNull( sink.getModel().getMeta().getDate() );
+
+        sink.date();
+        sink.text( "1973-02-27" );
+        sink.date_();
+
+        final long feb27 = 99615600000L;
+        assertEquals( feb27, sink.getModel().getMeta().getDate().getTime() );
+    }
+
+    /**
+     * Test of unknown method, of class DocumentModelSink.
+     */
+    public void testUnknown()
+    {
+        final String id = "meta";
+        final String name = "name";
+        final String content = "content";
+
+        final SinkEventAttributeSet meta =
+                new SinkEventAttributeSet( new String[] {name, "generator", content, "me"} );
+
+        final DocumentModelSink sink = new DocumentModelSink();
+
+        sink.unknown( id, null, meta );
+        assertEquals( "me", sink.getModel().getMeta().getGenerator() );
+
+        meta.addAttribute( name, "lang" );
+        meta.addAttribute( content, "en-us" );
+        sink.unknown( id, null, meta );
+        assertEquals( "en-us", sink.getModel().getMeta().getLanguage() );
+
+        meta.addAttribute( name, "language" );
+        meta.addAttribute( content, "de-at" );
+        sink.unknown( id, null, meta );
+        assertEquals( "de-at", sink.getModel().getMeta().getLanguage() );
+
+        meta.addAttribute( name, "creator" );
+        meta.addAttribute( content, "yo" );
+        sink.unknown( id, null, meta );
+        assertEquals( "yo", sink.getModel().getMeta().getCreator() );
+
+        meta.addAttribute( name, "creation_date" );
+        meta.addAttribute( content, "today" );
+        sink.unknown( id, null, meta );
+        assertNotNull( sink.getModel().getMeta().getCreationDate() );
+
+        sink.getModel().getMeta().setCreationDate( null );
+        meta.addAttribute( name, "date-creation-yyyymmdd" );
+        meta.addAttribute( content, "20000101" );
+        sink.unknown( id, null, meta );
+        assertNotNull( sink.getModel().getMeta().getCreationDate() );
+
+        meta.addAttribute( name, "description" );
+        meta.addAttribute( content, "hot air" );
+        sink.unknown( id, null, meta );
+        assertEquals( "hot air", sink.getModel().getMeta().getDescription() );
+
+        meta.addAttribute( name, "keywords" );
+        meta.addAttribute( content, "a, b, c" );
+        sink.unknown( id, null, meta );
+        assertEquals( 3, sink.getModel().getMeta().getKeyWords().size() );
+        assertEquals( "a, b, c", sink.getModel().getMeta().getAllKeyWords() );
+
+        meta.addAttribute( name, "date" );
+        meta.addAttribute( content, "today" );
+        sink.unknown( id, null, meta );
+        assertNotNull( sink.getModel().getMeta().getDate() );
+
+        meta.addAttribute( name, "author" );
+        meta.addAttribute( content, "me" );
+        sink.unknown( id, null, meta );
+        assertEquals( "me", sink.getModel().getMeta().getAllAuthorNames() );
+
+        meta.addAttribute( name, "unknownmeta" );
+        meta.addAttribute( content, "unknowncontent" );
+        sink.unknown( id, null, meta );
+        // unknown meta should log a warning
+    }
+}

Propchange: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java
------------------------------------------------------------------------------
    svn:keywords = "Author Date Id Revision"