You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwebbeans.apache.org by wa...@apache.org on 2021/06/07 18:34:00 UTC
[openwebbeans-site] 04/16: fixup html content in blogs
This is an automated email from the ASF dual-hosted git repository.
wave pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/openwebbeans-site.git
commit b084c395697a8078f3e5d95cbd330887a0da43a5
Author: Dave Fisher <da...@davefisher.tech>
AuthorDate: Mon Jun 7 09:32:59 2021 -0700
fixup html content in blogs
---
theme/plugins/asfdata.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/theme/plugins/asfdata.py b/theme/plugins/asfdata.py
index 6095537..7b72ee2 100644
--- a/theme/plugins/asfdata.py
+++ b/theme/plugins/asfdata.py
@@ -24,6 +24,7 @@ import os.path
import sys
import random
import json
+import re
import traceback
import operator
import pprint
@@ -37,12 +38,18 @@ import xml.dom.minidom
import pelican.plugins.signals
import pelican.utils
+from bs4 import BeautifulSoup
ASF_DATA = {
'metadata': { },
'debug': False,
}
+FIXUP_HTML = [
+ (re.compile(r'<'),'<'),
+ (re.compile(r'>'),'>'),
+]
+
# read the asfdata configuration in order to get data load and transformation instructions.
def read_config(config_yaml):
with pelican.utils.pelican_open(config_yaml) as text:
@@ -362,6 +369,12 @@ def process_blog(feed, count, words, debug):
content_text = ''
if words:
content_text = ' '.join(get_element_text(entry, 'content').split(' ')[:words]) + "..."
+ for regex, replace in FIXUP_HTML:
+ m = regex.search(content_text)
+ if m:
+ content_text = re.sub(regex, replace, content_text)
+ tree_soup = BeautifulSoup(content_text, 'html.parser')
+ content_text = tree_soup.decode(formatter='html')
# we want the title and href
v.append(
{