Clean out the <a> tags from RSS with a single regexp,
authorDeryck Hodge <deryck@samba.org>
Sun, 20 Mar 2005 14:31:47 +0000 (14:31 +0000)
committerDeryck Hodge <deryck@samba.org>
Sun, 20 Mar 2005 14:31:47 +0000 (14:31 +0000)
and find all possible forms of <a>, including across
multiple lines.

deryck

git-svn-id: file:///home/svn/samba-web/trunk@596 44aeb9d7-1cd8-0310-b257-a505e0beeac2

scripts/updateNews.py

index 314196b04cf24609f8c74f8d20191f2911c80b86..68f548697070c9c55b880a9a8db645d3fe7a87f6 100755 (executable)
@@ -212,10 +212,8 @@ for date in post_dates:
         end = item_text.find('</p>') 
         descrip = item_text[begin:end]
         # Remove links to avoid malformed XML
-        a_begin = re.compile('<a href="(.*)">')
-        a_end = re.compile('</a>')
-        descrip = a_begin.sub('', descrip)
-        descrip = a_end.sub('', descrip)
+        atags = re.compile('<a href="(.*)">|</a>|<a>|<a|href="(.*)">')
+        descrip = atags.sub('', descrip)
 
         feed.write('<item>\n')
         feed.write('<title>' + title.group(0) + '</title>\n')