#!/usr/bin/python

__author__ = "David Wragg <david@wragg.org>"
__copyright__ = "Copyright 2007, David Wragg"
__license__ = "http://www.apache.org/licenses/LICENSE-2.0"

try:
  from xml.etree import ElementTree # for Python 2.5
except:
  from elementtree import ElementTree

import gdata.service
import atom
import sys
import re

def get_blog_uri(service, name, rel='http://schemas.google.com/g/2005#post'):
    feed = service.Get('/feeds/default/blogs')

    blogs = [entry for entry in feed.entry if entry.title.text == name]
    if blogs:
        postlink = [link.href for link in blogs[0].link if link.rel == rel]
        if postlink:
            return postlink[0]

    return None

href_re = re.compile(r'(href\s*=\s*)([\'\"])([^\'\"]*\&[^\'\"]*)\2')
badamper_re = re.compile(r'\&(?!\#|[a-z]+;)')

def fix_uri_ampersands(match):
    (href, quotes, uri) = match.groups()
    return href + quotes + badamper_re.sub('&amp;', uri) + quotes

def fix_blog_uri_ampersands(email, passwd, blog_name):
    service = gdata.service.GDataService(email, passwd)
    service.source = 'Blog Ampersands-in-URLs Fixer'
    service.service = 'blogger'
    service.server = 'www.blogger.com'
    service.ProgrammaticLogin()

    # to handle blogs with more than 100 posts, this code should fetch
    # the entries in chunks
    query = gdata.service.Query(get_blog_uri(service, blog_name))
    query.max_results = "100"
    feed = service.GetFeed(query.ToUri())

    for entry in feed.entry:
        if entry.content.type == 'html':
            origtext = entry.content.text
            newtext = href_re.sub(fix_uri_ampersands, origtext)
            if origtext != newtext:
                print "Updating %s (%s)" % (entry.title.text,
                                            entry.GetEditLink().href)
                entry.content.text = newtext
                service.Put(entry, entry.GetEditLink().href)
                print "Updated"


if __name__ == '__main__':
    # usage: fixblogamps.py <email> <passwd> <blog name>
    fix_blog_uri_ampersands(sys.argv[1], sys.argv[2], sys.argv[3])