#!/usr/bin/python __author__ = "David Wragg " __copyright__ = "Copyright 2007, David Wragg" __license__ = "http://www.apache.org/licenses/LICENSE-2.0" try: from xml.etree import ElementTree # for Python 2.5 except: from elementtree import ElementTree import gdata.service import atom import sys import re def get_blog_uri(service, name, rel='http://schemas.google.com/g/2005#post'): feed = service.Get('/feeds/default/blogs') blogs = [entry for entry in feed.entry if entry.title.text == name] if blogs: postlink = [link.href for link in blogs[0].link if link.rel == rel] if postlink: return postlink[0] return None href_re = re.compile(r'(href\s*=\s*)([\'\"])([^\'\"]*\&[^\'\"]*)\2') badamper_re = re.compile(r'\&(?!\#|[a-z]+;)') def fix_uri_ampersands(match): (href, quotes, uri) = match.groups() return href + quotes + badamper_re.sub('&', uri) + quotes def fix_blog_uri_ampersands(email, passwd, blog_name): service = gdata.service.GDataService(email, passwd) service.source = 'Blog Ampersands-in-URLs Fixer' service.service = 'blogger' service.server = 'www.blogger.com' service.ProgrammaticLogin() # to handle blogs with more than 100 posts, this code should fetch # the entries in chunks query = gdata.service.Query(get_blog_uri(service, blog_name)) query.max_results = "100" feed = service.GetFeed(query.ToUri()) for entry in feed.entry: if entry.content.type == 'html': origtext = entry.content.text newtext = href_re.sub(fix_uri_ampersands, origtext) if origtext != newtext: print "Updating %s (%s)" % (entry.title.text, entry.GetEditLink().href) entry.content.text = newtext service.Put(entry, entry.GetEditLink().href) print "Updated" if __name__ == '__main__': # usage: fixblogamps.py fix_blog_uri_ampersands(sys.argv[1], sys.argv[2], sys.argv[3])