Correct fix for RSS parsing errors caused by HTML encoded
authorDeryck Hodge <deryck@samba.org>
Tue, 12 Apr 2005 21:24:41 +0000 (21:24 +0000)
committerDeryck Hodge <deryck@samba.org>
Tue, 12 Apr 2005 21:24:41 +0000 (21:24 +0000)
characters -- replace all ampersands with '&amp;'.

deryck

git-svn-id: file:///home/svn/samba-web/trunk@624 44aeb9d7-1cd8-0310-b257-a505e0beeac2

scripts/updateNews.py

index 68f548697070c9c55b880a9a8db645d3fe7a87f6..2928f15c635833b8862d5e2b8e50c61fea130af2 100755 (executable)
@@ -192,15 +192,12 @@ feed.write('<title>news.samba.org</title>\n')
 feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
 feed.write('<link>http://news.samba.org/</link>\n\n')
 
-# Characters to avoid as "undefined entities" in XML
-ents = { '&mdash;' : '--' }
-
 count = 10
 for date in post_dates:
     item_text = all_stories[date]
-    if '&' in item_text and ';' in item_text:
-        for ent in ents.keys():
-            item_text = item_text.replace(ent, ents[ent])
+    # Encode *all* ampersands
+    if '&' in item_text:
+            item_text = item_text.replace('&', '&amp;')
                 
     if count > 0:
         title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)