You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@openwebbeans.apache.org by wa...@apache.org on 2021/06/07 18:34:00 UTC

[openwebbeans-site] 04/16: fixup html content in blogs

This is an automated email from the ASF dual-hosted git repository.

wave pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/openwebbeans-site.git

commit b084c395697a8078f3e5d95cbd330887a0da43a5
Author: Dave Fisher <da...@davefisher.tech>
AuthorDate: Mon Jun 7 09:32:59 2021 -0700

    fixup html content in blogs
---
 theme/plugins/asfdata.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/theme/plugins/asfdata.py b/theme/plugins/asfdata.py
index 6095537..7b72ee2 100644
--- a/theme/plugins/asfdata.py
+++ b/theme/plugins/asfdata.py
@@ -24,6 +24,7 @@ import os.path
 import sys
 import random
 import json
+import re
 import traceback
 import operator
 import pprint
@@ -37,12 +38,18 @@ import xml.dom.minidom
 import pelican.plugins.signals
 import pelican.utils
 
+from bs4 import BeautifulSoup
 
 ASF_DATA = {
     'metadata': { },
     'debug': False,
 }
 
+FIXUP_HTML = [
+    (re.compile(r'&lt;'),'<'),
+    (re.compile(r'&gt;'),'>'),
+]
+
 # read the asfdata configuration in order to get data load and transformation instructions.
 def read_config(config_yaml):
     with pelican.utils.pelican_open(config_yaml) as text:
@@ -362,6 +369,12 @@ def process_blog(feed, count, words, debug):
         content_text = ''
         if words:
             content_text = ' '.join(get_element_text(entry, 'content').split(' ')[:words]) + "..."
+            for regex, replace in FIXUP_HTML:
+                m = regex.search(content_text)
+                if m:
+                    content_text = re.sub(regex, replace, content_text)
+            tree_soup = BeautifulSoup(content_text, 'html.parser')
+            content_text = tree_soup.decode(formatter='html')
         # we want the title and href
         v.append(
             {