You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by fm...@apache.org on 2012/05/20 12:49:41 UTC

svn commit: r1340668 - in /sling/site/tools: ./ README.txt conversion/ conversion/README.txt conversion/convert_cwiki_markup.pl conversion/convert_export_template.pl conversion/export_site.pl

Author: fmeschbe
Date: Sun May 20 10:49:41 2012
New Revision: 1340668

URL: http://svn.apache.org/viewvc?rev=1340668&view=rev
Log:
SLING-2002 Add conversion tools folder

Added:
    sling/site/tools/   (with props)
    sling/site/tools/README.txt
    sling/site/tools/conversion/
    sling/site/tools/conversion/README.txt
    sling/site/tools/conversion/convert_cwiki_markup.pl   (with props)
    sling/site/tools/conversion/convert_export_template.pl   (with props)
    sling/site/tools/conversion/export_site.pl   (with props)

Propchange: sling/site/tools/
------------------------------------------------------------------------------
--- svn:externals (added)
+++ svn:externals Sun May 20 10:49:41 2012
@@ -0,0 +1 @@
+build http://svn.apache.org/repos/infra/websites/cms/build

Added: sling/site/tools/README.txt
URL: http://svn.apache.org/viewvc/sling/site/tools/README.txt?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/README.txt (added)
+++ sling/site/tools/README.txt Sun May 20 10:49:41 2012
@@ -0,0 +1,10 @@
+CMS Conversion and Build Tools
+------------------------------
+
+* The conversion folder has an adapted convert_cwiki_markup.pl
+  script which handles more Confluence markup than the original
+  one. See the README.txt file for details.
+
+* The build folder is externally linked to the source repository
+  of the CMS build scripts. This can be used to locally test
+  content, template and/or script changes.

Added: sling/site/tools/conversion/README.txt
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/README.txt?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/README.txt (added)
+++ sling/site/tools/conversion/README.txt Sun May 20 10:49:41 2012
@@ -0,0 +1,61 @@
+            Confluence Auto-Export to CMS Conversion Tools
+            -----------------------------------------------
+
+These tools exist to help you migrate a site from using confluence wiki
+(cwiki) auto-export to the CMS.
+
+The steps are:
+
+1. Setup CMS site structure in SVN
+ Details on this are available from http://www.staging.apache.org/dev/cms.html
+
+2. Setup CMS view and path
+ These will handle turning the markdown into nice HTML
+ See https://svn.apache.org/repos/asf/comdev/site/trunk for an example
+
+3. Convert your CWiki auto-export template
+ You need to turn your CWiki auto-export template into two CMS templates.
+ One template handles the main layout, and is available for use by DTL
+  powered HTML pages
+ The second allows markdown formatted text to be rendered
+
+ Use <convert_export_template.pl> to handle this
+
+ eg
+   cd templates
+   ~/apache/cms/conversion-utilities/cwiki/convert_export_template.pl /tmp/export.xml standard.html standard_markdown.html
+   cd ..
+
+4. Convert your CWiki pages to markdown
+ You need to spider your CWiki site, and download the wiki pages in their raw
+  CWiki markup. These pages then need to be converted into MarkDown syntax.
+
+ The markup translation is done by <convert_cwiki_markup.pl>
+ The spidering tool is <export_site.pl>
+
+ export_site.pl will handle downloading all the pages, and running them
+  through convert_cwiki_markup.pl for you. In theory it will do everything
+  you need
+
+5. Test the site generation
+ Use build/build_site.pl to generate the HTML version of the site
+
+6. Tweak the markdown pages as required
+
+7. Delete the cwiki markup files
+ The cwiki markup files will have been saved in the content directory for
+  you to review when converting. When you're happy, these should be removed,
+  and probably shouldn't ever be committed to svn
+
+
+Changes by fmeschbe to convert_cwiki_markup.pl:
+
+  * convert {toc} to [TOC]
+  * convert {excerpt} to Excerpt header
+      and respect hidden=true
+  * convert {{..}} to `..` (code)
+  * convert _.._ to *..* (italic)
+  * convert {display-footnotes} to ///Footnotes Go Here///
+      (actual footnote definitions are not converted)
+  * properly convert tables
+      for headers a separator line is added

Added: sling/site/tools/conversion/convert_cwiki_markup.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/convert_cwiki_markup.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/convert_cwiki_markup.pl (added)
+++ sling/site/tools/conversion/convert_cwiki_markup.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,252 @@
+#!/usr/bin/perl
+# Converts a CWiki markup to MarkDown syntax
+use strict;
+use warnings;
+
+use Text::Wrap qw/wrap $huge/;
+$huge = "overflow";
+
+my $pageName = shift;
+my $compressedPageName = $pageName;
+$compressedPageName =~ s/\s//g;
+
+my $source = shift;
+unless($source && -f $source) {
+   print "Use:\n";
+   print "  $0 <page name> <cwiki template> [export.html]\n";
+   exit 1;
+}
+my $dest = shift;
+unless($dest) {
+   $dest = $source;
+   $dest .= ".html";
+}
+
+sub convertURL {
+   my $url = shift;
+   if($url =~ /^http/) {
+      return $url;
+   }
+   $url =~ s/\s/-/g;
+   $url = lc($url).".html";
+   return $url;
+}
+
+open(INP, "<$source");
+open(OUT, ">$dest");
+
+print OUT "Title: $pageName\n";
+
+# Load the file into a temporary array, and do some line wrapping on
+#  the paragraphs
+my @contents;
+my $excerpt = "";
+my $excerptEx = 0;
+my $excerptHidden = 0;
+while(my $line = <INP>) {
+   $line =~ s/\r//;
+#   unless($line =~ /^\s*$/) {
+#      my @parts = split(/( \[)/, $line);
+#      $line = "";
+#      foreach my $p (@parts) {
+#         if($p =~ /^(.*?\])(.*)$/) {
+#            $p = $1."\n".wrap("","",$2);
+#         } else {
+#            $p = wrap("","",$p);
+#         }
+#         $line .= $p;
+#      }
+#      unless($line =~ /\n$/s) { $line .= "\n"; }
+#   }
+
+   # convert excerpt to header
+   if ($excerptEx) {
+        if ($line =~ s/{excerpt}\s*\n$//) {
+            $excerpt =~ s/\n//g;
+            print OUT "Excerpt: $excerpt$line\n";
+            $excerpt = "";
+            $excerptEx = 0;
+            $excerptHidden = 0;
+        } else {
+            $excerpt .= " $line";
+            print STDOUT ": $line";
+        }
+        next if ($excerptHidden);
+   } elsif($line =~ s/^{excerpt}//) {
+        if ($line =~ s/{excerpt}\s*\n$/\n/) {
+            print OUT "Excerpt: $line";
+        } else {
+            $excerpt = $line;
+            $excerptEx = 1;
+            $excerptHidden = 0;
+        }
+   } elsif($line =~ s/^{excerpt:hidden=true}//) {
+        if ($line =~ s/{excerpt}\s*\n$/\n/) {
+            print OUT "Excerpt: $line";
+        } else {
+            $excerpt = $line;
+            $excerptEx = 1;
+            $excerptHidden = 1;
+        }
+        next;
+   }
+        
+   
+   push(@contents, $line);
+}
+
+# Header Separator
+print OUT "\n";
+
+# Convert it
+my $in = "";
+foreach my $line (@contents) {
+   if($in eq "noformat") {
+      if($line =~ /^\s*^{noformat}/) {
+         print OUT "\n";
+         $in = "";
+      } else {
+         print OUT "    ".$line;
+      }
+      next;
+   } elsif($in eq "code:xml" || $in eq "code:text") {
+      if($line =~ /^\s*^{code}/) {
+         print OUT "\n";
+         $in = "";
+      } else {
+         print OUT "    ".$line;
+      }
+      next;
+#   } elsif($in eq "table") {
+#      if($line =~ /^\s*$/) {
+#         print OUT "</table>\n";
+#         $in = "";
+#      }
+   } elsif($in) {
+      warn("Unexpected block '$in' for $line");
+      $in = "";
+   }
+
+   # Numbered lists
+   if($line =~ /^\s*#/) {
+      $line =~ s/^\s*#\s*/1. /;
+   }
+
+   # Headings
+   if($line =~ /^h(\d)\.\s?(.*)$/) {
+      # Build the heading entry
+      $line = ("#"x$1)." ".$2."\n";
+      # Now replicate the a name
+      my $name = $2;
+      $name =~ s/\s//g;
+#      $line = "<a name=\"$compressedPageName-$name\"></a>\n".$line;
+   }
+
+   # Links
+   if($line =~ /(\[(.*?)\])/) {
+      my ($all,$text,$link) = ($1,$2,"");
+      if($text =~ /^(.*?)\|(.*)/) {
+         ($text,$link) = ($1,$2);
+      }
+
+      my $newlink;
+      if($link) {
+         $newlink = "[$text](".convertURL($link).")";
+      } else {
+         $newlink = "[$text](".convertURL($text).")";
+      }
+      $line =~ s/\Q$all\E/$newlink/;
+   }
+
+   # Old-style bold / italic
+   $line =~ s/\{\{(.*?)\}\}/`$1`/g;
+   $line =~ s/_(.*?)_/*$1*/g;
+   # $line =~ s/\{\{(.*?)\}\}/*$1*/g;
+   $line =~ s/\{\{\{(.*?)\}\}\}/**$1**/g;
+
+   # No-Format
+   if($line =~ /^\s*^{noformat}/) {
+      $in = "noformat";
+      $line = "\n";
+   }
+
+   # Code blocks
+   if($line =~ /^\s*^{code:xml}/) {
+      $in = "code:xml";
+      $line = "\n";
+   }
+   if($line =~ /^\s*^{code:title=(.*?)}/) {
+      $in = "code:text";
+      $line = '<DIV class="code panel" style="border-style: solid;border-width: 1px;"><DIV class="codeHeader panelHeader" style="border-bottom-width: 1px;border-bottom-style: solid;"><B>'.$1.'</B></DIV><DIV class="codeContent panelContent">'."\n";
+   }
+   if($line =~ /^\s*^{code}/) {
+      $in = "code:text";
+      $line = "\n";
+   }
+
+   # Forced breaks
+   if($line =~ /\\\\/) {
+      $line = "  \n  \n";
+   }
+
+   # Tables
+   if($line =~ /^\|/) {
+#      unless($in eq "table") {
+#         print OUT "<table>\n";
+#         $in = "table";
+#      }
+
+      my $th = ($line =~ /\|\|/);
+      $line =~ s/\|\|/\|/g;
+      if ($th) {
+        $line .= "|--|--|\n";
+      }
+   }
+
+   # Table of contents
+   if($line =~ /\s*^{toc.*}/) {
+      $line = "[TOC]\n";
+#      my @headings = grep(/^h/, @contents);
+#      $line = "";
+#      foreach my $h (@headings) { 
+#         $h =~ /^\s*h(\d).\s+(.*?)\s*$/s;
+#         my $hnum = $1;
+#         my $text = $2;
+#         my $name = $2;
+#         $name =~ s/\s//g;
+#
+#         my $l = "   "x($hnum-1);
+#         $l .= "* [$text](#$compressedPageName-$name)\n";
+#         $line .= $l;
+#      }
+   }
+
+   # Footnotes display
+   if($line =~/{display-footnotes}/) {
+       $line = "///Footnotes Go Here///\n";
+   }
+
+   print OUT $line;
+}
+
+close INP;
+close OUT;
+
+=head1 LICENSE
+
+           Licensed to the Apache Software Foundation (ASF) under one
+           or more contributor license agreements.  See the NOTICE file
+           distributed with this work for additional information
+           regarding copyright ownership.  The ASF licenses this file
+           to you under the Apache License, Version 2.0 (the
+           "License"); you may not use this file except in compliance
+           with the License.  You may obtain a copy of the License at
+
+             http://www.apache.org/licenses/LICENSE-2.0
+
+           Unless required by applicable law or agreed to in writing,
+           software distributed under the License is distributed on an
+           "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+           KIND, either express or implied.  See the License for the
+           specific language governing permissions and limitations
+           under the License.

Propchange: sling/site/tools/conversion/convert_cwiki_markup.pl
------------------------------------------------------------------------------
    svn:executable = *

Added: sling/site/tools/conversion/convert_export_template.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/convert_export_template.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/convert_export_template.pl (added)
+++ sling/site/tools/conversion/convert_export_template.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+# Converts a CWiki export template to a DTL template
+use strict;
+use warnings;
+
+my $source = shift;
+unless($source && -f $source) {
+   print "Use:\n";
+   print "  $0 <cwiki template.xml> [export.html] [export_markdown.html]\n";
+   exit 1;
+}
+
+my $dest = shift;
+unless($dest) {
+   $dest = $source;
+   $dest =~ s/\.xml/\.html/;
+}
+if(-f $dest) {
+   print "Destination file $dest already exists\n";
+   exit 1;
+}
+
+my $mdest = shift;
+unless($mdest) {
+   $mdest = $dest;
+   $mdest =~ s/\.html/_markdown.html/;
+}
+if(-f $mdest) {
+   print "Destination file $mdest already exists\n";
+   exit 1;
+}
+
+# Prepare to convert
+open(INP, "<$source");
+open(OUT, ">$dest");
+open(MOUT, ">$mdest");
+
+print MOUT "{% extends \"$dest\" %}\n";
+
+my $done_title = 0;
+my $done_body = 0;
+while(my $line = <INP>) {
+   if($line =~ /^#/) { next; }
+
+   if($line =~ /\$page.title/) {
+      $done_title++;
+      if($done_title > 1) {
+         warn("Found \$page.title multiple times, but only converting once\n");
+         $line =~ s/\$page.title//;
+      } else {
+         $line =~ s/\$page.title/{% block title %}{% endblock %}/;
+         print MOUT '{% block title %}{{ headers.title }}{% endblock %}'."\n";
+      }
+   }
+
+   if($line =~ /\$body/) {
+      $done_body++;
+      if($done_body > 1) {
+         warn("Found \$body multiple times, but only converting once\n");
+         $line =~ s/\$body//;
+      } else {
+         $line =~ s/\$body/{% block content %}{% endblock %}/;
+         print MOUT '{% block content %}{{ content|markdown }}{% endblock %}'."\n";
+      }
+   }
+
+   $line =~ s/\$autoexport.breadcrumbs\(\$page\)/{{ breadcrumbs|safe }}/;
+
+   print OUT $line;
+}
+
+close INP;
+close OUT;
+close MOUT;
+
+print "Generated $dest\n";
+print "Generated $mdest\n";
+
+=head1 LICENSE
+
+           Licensed to the Apache Software Foundation (ASF) under one
+           or more contributor license agreements.  See the NOTICE file
+           distributed with this work for additional information
+           regarding copyright ownership.  The ASF licenses this file
+           to you under the Apache License, Version 2.0 (the
+           "License"); you may not use this file except in compliance
+           with the License.  You may obtain a copy of the License at
+
+             http://www.apache.org/licenses/LICENSE-2.0
+
+           Unless required by applicable law or agreed to in writing,
+           software distributed under the License is distributed on an
+           "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+           KIND, either express or implied.  See the License for the
+           specific language governing permissions and limitations
+           under the License.

Propchange: sling/site/tools/conversion/convert_export_template.pl
------------------------------------------------------------------------------
    svn:executable = *

Added: sling/site/tools/conversion/export_site.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/export_site.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/export_site.pl (added)
+++ sling/site/tools/conversion/export_site.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,93 @@
+#!/usr/bin/perl
+# Spiders a site using the cwiki export, and generates markdown files
+#  for each page in cwiki
+use strict;
+use warnings;
+
+use RPC::XML;
+use RPC::XML::Client;
+
+my $confluence = "https://cwiki.apache.org/confluence/";
+my $RPCURL = $confluence."rpc/xmlrpc";
+
+my $siteName = shift;
+my $username = shift;
+my $password = shift;
+unless($siteName && $username && $password) {
+   print "Use:\n";
+   print "   $0 <cwiki site name> <username> <password>\n";
+   print "\n";
+   print " eg for https://cwiki.apache.org/confluence/display/COMDEVxSITE\n";
+   print "    $0 COMDEVxSITE jim securePassword\n";
+   exit 1;
+}
+
+# Check we're in the right place
+if(-d "content" && -d "lib") {
+} else {
+   die("Must be run from root of site, containing /content/ and /lib/\n");
+}
+my ($binPath) = ($0 =~ /^(.*)\/(.*?)$/);
+
+# Connect to their icky API. (What's wrong with REST we ask...?)
+my $client = RPC::XML::Client->new($RPCURL);
+my $response = $client->send_request('confluence1.login', $username, $password);
+$response->is_fault and die "ERROR: could not login as $username: ", $response->value->{faultString};
+my $token = $response->value;
+
+# Get a list of all the pages
+$response = $client->send_request('confluence1.getPages', $token, $siteName);
+$response->is_fault and die "ERROR: could not get pages for $siteName: ", $response->value->{faultString};
+
+my @pages = @{$response->value};
+
+foreach my $page (@pages) {
+   my $title = $page->{title};
+
+   print "Fetching $title (".$page->{id}.")\n";
+   $response = $client->send_request('confluence1.getPage', $token, $siteName, $title);
+   $response->is_fault and die "ERROR: could not get page details: ", $response->value->{faultString};
+
+   my %details = %{$response->value};
+   my $content = $details{content};
+
+   print "Processing $title from ".$page->{url}."\n";
+
+   my $page = $title;
+   $page =~ s/\s/-/g;
+
+   my $cwikiFile = "content/".lc($page).".cwiki";
+   open(CWIKI, ">$cwikiFile");
+   print CWIKI $content;
+   close CWIKI;
+
+   my $mdFile = $cwikiFile;
+   $mdFile =~ s/\.cwiki/.mdtext/;
+   
+   print "  Generating markdown file\n";
+   `$binPath/convert_cwiki_markup.pl "$title" $cwikiFile $mdFile`;
+
+   print "  Finished processing $title\n\n";
+}
+
+print "\n";
+print "Done!\n";
+
+=head1 LICENSE
+
+           Licensed to the Apache Software Foundation (ASF) under one
+           or more contributor license agreements.  See the NOTICE file
+           distributed with this work for additional information
+           regarding copyright ownership.  The ASF licenses this file
+           to you under the Apache License, Version 2.0 (the
+           "License"); you may not use this file except in compliance
+           with the License.  You may obtain a copy of the License at
+
+             http://www.apache.org/licenses/LICENSE-2.0
+
+           Unless required by applicable law or agreed to in writing,
+           software distributed under the License is distributed on an
+           "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+           KIND, either express or implied.  See the License for the
+           specific language governing permissions and limitations
+           under the License.

Propchange: sling/site/tools/conversion/export_site.pl
------------------------------------------------------------------------------
    svn:executable = *