You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by fm...@apache.org on 2012/05/20 12:49:41 UTC
svn commit: r1340668 - in /sling/site/tools: ./ README.txt conversion/
conversion/README.txt conversion/convert_cwiki_markup.pl
conversion/convert_export_template.pl conversion/export_site.pl
Author: fmeschbe
Date: Sun May 20 10:49:41 2012
New Revision: 1340668
URL: http://svn.apache.org/viewvc?rev=1340668&view=rev
Log:
SLING-2002 Add conversion tools folder
Added:
sling/site/tools/ (with props)
sling/site/tools/README.txt
sling/site/tools/conversion/
sling/site/tools/conversion/README.txt
sling/site/tools/conversion/convert_cwiki_markup.pl (with props)
sling/site/tools/conversion/convert_export_template.pl (with props)
sling/site/tools/conversion/export_site.pl (with props)
Propchange: sling/site/tools/
------------------------------------------------------------------------------
--- svn:externals (added)
+++ svn:externals Sun May 20 10:49:41 2012
@@ -0,0 +1 @@
+build http://svn.apache.org/repos/infra/websites/cms/build
Added: sling/site/tools/README.txt
URL: http://svn.apache.org/viewvc/sling/site/tools/README.txt?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/README.txt (added)
+++ sling/site/tools/README.txt Sun May 20 10:49:41 2012
@@ -0,0 +1,10 @@
+CMS Conversion and Build Tools
+------------------------------
+
+* The conversion folder has an adapted convert_cwiki_markup.pl
+ script which handles more Confluence markup than the original
+ one. See the README.txt file for details.
+
+* The build folder is externally linked to the source repository
+ of the CMS build scripts. This can be used to locally test
+ content, template and/or script changes.
Added: sling/site/tools/conversion/README.txt
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/README.txt?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/README.txt (added)
+++ sling/site/tools/conversion/README.txt Sun May 20 10:49:41 2012
@@ -0,0 +1,61 @@
+ Confluence Auto-Export to CMS Conversion Tools
+ -----------------------------------------------
+
+These tools exist to help you migrate a site from using confluence wiki
+(cwiki) auto-export to the CMS.
+
+The steps are:
+
+1. Setup CMS site structure in SVN
+ Details on this are available from http://www.staging.apache.org/dev/cms.html
+
+2. Setup CMS view and path
+ These will handle turning the markdown into nice HTML
+ See https://svn.apache.org/repos/asf/comdev/site/trunk for an example
+
+3. Convert your CWiki auto-export template
+ You need to turn your CWiki auto-export template into two CMS templates.
+ One template handles the main layout, and is available for use by DTL
+ powered HTML pages
+ The second allows markdown formatted text to be rendered
+
+ Use <convert_export_template.pl> to handle this
+
+ eg
+ cd templates
+ ~/apache/cms/conversion-utilities/cwiki/convert_export_template.pl /tmp/export.xml standard.html standard_markdown.html
+ cd ..
+
+4. Convert your CWiki pages to markdown
+ You need to spider your CWiki site, and download the wiki pages in their raw
+ CWiki markup. These pages then need to be converted into MarkDown syntax.
+
+ The markup translation is done by <convert_cwiki_markup.pl>
+ The spidering tool is <export_site.pl>
+
+ export_site.pl will handle downloading all the pages, and running them
+ through convert_cwiki_markup.pl for you. In theory it will do everything
+ you need
+
+5. Test the site generation
+ Use build/build_site.pl to generate the HTML version of the site
+
+6. Tweak the markdown pages as required
+
+7. Delete the cwiki markup files
+ The cwiki markup files will have been saved in the content directory for
+ you to review when converting. When you're happy, these should be removed,
+ and probably shouldn't ever be committed to svn
+
+
+Changes by fmeschbe to convert_cwiki_markup.pl:
+
+ * convert {toc} to [TOC]
+ * convert {excerpt} to Excerpt header
+ and respect hidden=true
+ * convert {{..}} to `..` (code)
+ * convert _.._ to *..* (italic)
+ * convert {display-footnotes} to ///Footnotes Go Here///
+ (actual footnote definitions are not converted)
+ * properly convert tables
+ for headers a separator line is added
Added: sling/site/tools/conversion/convert_cwiki_markup.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/convert_cwiki_markup.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/convert_cwiki_markup.pl (added)
+++ sling/site/tools/conversion/convert_cwiki_markup.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,252 @@
+#!/usr/bin/perl
+# Converts a CWiki markup to MarkDown syntax
+use strict;
+use warnings;
+
+use Text::Wrap qw/wrap $huge/;
+$huge = "overflow";
+
+my $pageName = shift;
+my $compressedPageName = $pageName;
+$compressedPageName =~ s/\s//g;
+
+my $source = shift;
+unless($source && -f $source) {
+ print "Use:\n";
+ print " $0 <page name> <cwiki template> [export.html]\n";
+ exit 1;
+}
+my $dest = shift;
+unless($dest) {
+ $dest = $source;
+ $dest .= ".html";
+}
+
+sub convertURL {
+ my $url = shift;
+ if($url =~ /^http/) {
+ return $url;
+ }
+ $url =~ s/\s/-/g;
+ $url = lc($url).".html";
+ return $url;
+}
+
+open(INP, "<$source");
+open(OUT, ">$dest");
+
+print OUT "Title: $pageName\n";
+
+# Load the file into a temporary array, and do some line wrapping on
+# the paragraphs
+my @contents;
+my $excerpt = "";
+my $excerptEx = 0;
+my $excerptHidden = 0;
+while(my $line = <INP>) {
+ $line =~ s/\r//;
+# unless($line =~ /^\s*$/) {
+# my @parts = split(/( \[)/, $line);
+# $line = "";
+# foreach my $p (@parts) {
+# if($p =~ /^(.*?\])(.*)$/) {
+# $p = $1."\n".wrap("","",$2);
+# } else {
+# $p = wrap("","",$p);
+# }
+# $line .= $p;
+# }
+# unless($line =~ /\n$/s) { $line .= "\n"; }
+# }
+
+ # convert excerpt to header
+ if ($excerptEx) {
+ if ($line =~ s/{excerpt}\s*\n$//) {
+ $excerpt =~ s/\n//g;
+ print OUT "Excerpt: $excerpt$line\n";
+ $excerpt = "";
+ $excerptEx = 0;
+ $excerptHidden = 0;
+ } else {
+ $excerpt .= " $line";
+ print STDOUT ": $line";
+ }
+ next if ($excerptHidden);
+ } elsif($line =~ s/^{excerpt}//) {
+ if ($line =~ s/{excerpt}\s*\n$/\n/) {
+ print OUT "Excerpt: $line";
+ } else {
+ $excerpt = $line;
+ $excerptEx = 1;
+ $excerptHidden = 0;
+ }
+ } elsif($line =~ s/^{excerpt:hidden=true}//) {
+ if ($line =~ s/{excerpt}\s*\n$/\n/) {
+ print OUT "Excerpt: $line";
+ } else {
+ $excerpt = $line;
+ $excerptEx = 1;
+ $excerptHidden = 1;
+ }
+ next;
+ }
+
+
+ push(@contents, $line);
+}
+
+# Header Separator
+print OUT "\n";
+
+# Convert it
+my $in = "";
+foreach my $line (@contents) {
+ if($in eq "noformat") {
+ if($line =~ /^\s*^{noformat}/) {
+ print OUT "\n";
+ $in = "";
+ } else {
+ print OUT " ".$line;
+ }
+ next;
+ } elsif($in eq "code:xml" || $in eq "code:text") {
+ if($line =~ /^\s*^{code}/) {
+ print OUT "\n";
+ $in = "";
+ } else {
+ print OUT " ".$line;
+ }
+ next;
+# } elsif($in eq "table") {
+# if($line =~ /^\s*$/) {
+# print OUT "</table>\n";
+# $in = "";
+# }
+ } elsif($in) {
+ warn("Unexpected block '$in' for $line");
+ $in = "";
+ }
+
+ # Numbered lists
+ if($line =~ /^\s*#/) {
+ $line =~ s/^\s*#\s*/1. /;
+ }
+
+ # Headings
+ if($line =~ /^h(\d)\.\s?(.*)$/) {
+ # Build the heading entry
+ $line = ("#"x$1)." ".$2."\n";
+ # Now replicate the a name
+ my $name = $2;
+ $name =~ s/\s//g;
+# $line = "<a name=\"$compressedPageName-$name\"></a>\n".$line;
+ }
+
+ # Links
+ if($line =~ /(\[(.*?)\])/) {
+ my ($all,$text,$link) = ($1,$2,"");
+ if($text =~ /^(.*?)\|(.*)/) {
+ ($text,$link) = ($1,$2);
+ }
+
+ my $newlink;
+ if($link) {
+ $newlink = "[$text](".convertURL($link).")";
+ } else {
+ $newlink = "[$text](".convertURL($text).")";
+ }
+ $line =~ s/\Q$all\E/$newlink/;
+ }
+
+ # Old-style bold / italic
+ $line =~ s/\{\{(.*?)\}\}/`$1`/g;
+ $line =~ s/_(.*?)_/*$1*/g;
+ # $line =~ s/\{\{(.*?)\}\}/*$1*/g;
+ $line =~ s/\{\{\{(.*?)\}\}\}/**$1**/g;
+
+ # No-Format
+ if($line =~ /^\s*^{noformat}/) {
+ $in = "noformat";
+ $line = "\n";
+ }
+
+ # Code blocks
+ if($line =~ /^\s*^{code:xml}/) {
+ $in = "code:xml";
+ $line = "\n";
+ }
+ if($line =~ /^\s*^{code:title=(.*?)}/) {
+ $in = "code:text";
+ $line = '<DIV class="code panel" style="border-style: solid;border-width: 1px;"><DIV class="codeHeader panelHeader" style="border-bottom-width: 1px;border-bottom-style: solid;"><B>'.$1.'</B></DIV><DIV class="codeContent panelContent">'."\n";
+ }
+ if($line =~ /^\s*^{code}/) {
+ $in = "code:text";
+ $line = "\n";
+ }
+
+ # Forced breaks
+ if($line =~ /\\\\/) {
+ $line = " \n \n";
+ }
+
+ # Tables
+ if($line =~ /^\|/) {
+# unless($in eq "table") {
+# print OUT "<table>\n";
+# $in = "table";
+# }
+
+ my $th = ($line =~ /\|\|/);
+ $line =~ s/\|\|/\|/g;
+ if ($th) {
+ $line .= "|--|--|\n";
+ }
+ }
+
+ # Table of contents
+ if($line =~ /\s*^{toc.*}/) {
+ $line = "[TOC]\n";
+# my @headings = grep(/^h/, @contents);
+# $line = "";
+# foreach my $h (@headings) {
+# $h =~ /^\s*h(\d).\s+(.*?)\s*$/s;
+# my $hnum = $1;
+# my $text = $2;
+# my $name = $2;
+# $name =~ s/\s//g;
+#
+# my $l = " "x($hnum-1);
+# $l .= "* [$text](#$compressedPageName-$name)\n";
+# $line .= $l;
+# }
+ }
+
+ # Footnotes display
+ if($line =~/{display-footnotes}/) {
+ $line = "///Footnotes Go Here///\n";
+ }
+
+ print OUT $line;
+}
+
+close INP;
+close OUT;
+
+=head1 LICENSE
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
Propchange: sling/site/tools/conversion/convert_cwiki_markup.pl
------------------------------------------------------------------------------
svn:executable = *
Added: sling/site/tools/conversion/convert_export_template.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/convert_export_template.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/convert_export_template.pl (added)
+++ sling/site/tools/conversion/convert_export_template.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+# Converts a CWiki export template to a DTL template
+use strict;
+use warnings;
+
+my $source = shift;
+unless($source && -f $source) {
+ print "Use:\n";
+ print " $0 <cwiki template.xml> [export.html] [export_markdown.html]\n";
+ exit 1;
+}
+
+my $dest = shift;
+unless($dest) {
+ $dest = $source;
+ $dest =~ s/\.xml/\.html/;
+}
+if(-f $dest) {
+ print "Destination file $dest already exists\n";
+ exit 1;
+}
+
+my $mdest = shift;
+unless($mdest) {
+ $mdest = $dest;
+ $mdest =~ s/\.html/_markdown.html/;
+}
+if(-f $mdest) {
+ print "Destination file $mdest already exists\n";
+ exit 1;
+}
+
+# Prepare to convert
+open(INP, "<$source");
+open(OUT, ">$dest");
+open(MOUT, ">$mdest");
+
+print MOUT "{% extends \"$dest\" %}\n";
+
+my $done_title = 0;
+my $done_body = 0;
+while(my $line = <INP>) {
+ if($line =~ /^#/) { next; }
+
+ if($line =~ /\$page.title/) {
+ $done_title++;
+ if($done_title > 1) {
+ warn("Found \$page.title multiple times, but only converting once\n");
+ $line =~ s/\$page.title//;
+ } else {
+ $line =~ s/\$page.title/{% block title %}{% endblock %}/;
+ print MOUT '{% block title %}{{ headers.title }}{% endblock %}'."\n";
+ }
+ }
+
+ if($line =~ /\$body/) {
+ $done_body++;
+ if($done_body > 1) {
+ warn("Found \$body multiple times, but only converting once\n");
+ $line =~ s/\$body//;
+ } else {
+ $line =~ s/\$body/{% block content %}{% endblock %}/;
+ print MOUT '{% block content %}{{ content|markdown }}{% endblock %}'."\n";
+ }
+ }
+
+ $line =~ s/\$autoexport.breadcrumbs\(\$page\)/{{ breadcrumbs|safe }}/;
+
+ print OUT $line;
+}
+
+close INP;
+close OUT;
+close MOUT;
+
+print "Generated $dest\n";
+print "Generated $mdest\n";
+
+=head1 LICENSE
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
Propchange: sling/site/tools/conversion/convert_export_template.pl
------------------------------------------------------------------------------
svn:executable = *
Added: sling/site/tools/conversion/export_site.pl
URL: http://svn.apache.org/viewvc/sling/site/tools/conversion/export_site.pl?rev=1340668&view=auto
==============================================================================
--- sling/site/tools/conversion/export_site.pl (added)
+++ sling/site/tools/conversion/export_site.pl Sun May 20 10:49:41 2012
@@ -0,0 +1,93 @@
+#!/usr/bin/perl
+# Spiders a site using the cwiki export, and generates markdown files
+# for each page in cwiki
+use strict;
+use warnings;
+
+use RPC::XML;
+use RPC::XML::Client;
+
+my $confluence = "https://cwiki.apache.org/confluence/";
+my $RPCURL = $confluence."rpc/xmlrpc";
+
+my $siteName = shift;
+my $username = shift;
+my $password = shift;
+unless($siteName && $username && $password) {
+ print "Use:\n";
+ print " $0 <cwiki site name> <username> <password>\n";
+ print "\n";
+ print " eg for https://cwiki.apache.org/confluence/display/COMDEVxSITE\n";
+ print " $0 COMDEVxSITE jim securePassword\n";
+ exit 1;
+}
+
+# Check we're in the right place
+if(-d "content" && -d "lib") {
+} else {
+ die("Must be run from root of site, containing /content/ and /lib/\n");
+}
+my ($binPath) = ($0 =~ /^(.*)\/(.*?)$/);
+
+# Connect to their icky API. (What's wrong with REST we ask...?)
+my $client = RPC::XML::Client->new($RPCURL);
+my $response = $client->send_request('confluence1.login', $username, $password);
+$response->is_fault and die "ERROR: could not login as $username: ", $response->value->{faultString};
+my $token = $response->value;
+
+# Get a list of all the pages
+$response = $client->send_request('confluence1.getPages', $token, $siteName);
+$response->is_fault and die "ERROR: could not get pages for $siteName: ", $response->value->{faultString};
+
+my @pages = @{$response->value};
+
+foreach my $page (@pages) {
+ my $title = $page->{title};
+
+ print "Fetching $title (".$page->{id}.")\n";
+ $response = $client->send_request('confluence1.getPage', $token, $siteName, $title);
+ $response->is_fault and die "ERROR: could not get page details: ", $response->value->{faultString};
+
+ my %details = %{$response->value};
+ my $content = $details{content};
+
+ print "Processing $title from ".$page->{url}."\n";
+
+ my $page = $title;
+ $page =~ s/\s/-/g;
+
+ my $cwikiFile = "content/".lc($page).".cwiki";
+ open(CWIKI, ">$cwikiFile");
+ print CWIKI $content;
+ close CWIKI;
+
+ my $mdFile = $cwikiFile;
+ $mdFile =~ s/\.cwiki/.mdtext/;
+
+ print " Generating markdown file\n";
+ `$binPath/convert_cwiki_markup.pl "$title" $cwikiFile $mdFile`;
+
+ print " Finished processing $title\n\n";
+}
+
+print "\n";
+print "Done!\n";
+
+=head1 LICENSE
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
Propchange: sling/site/tools/conversion/export_site.pl
------------------------------------------------------------------------------
svn:executable = *