<a href="toto.html" title="<strong>I like it!<strong><br />Jerome">
Toto
</a>
to
<a href="toto.html" title="<strong>I like it!<strong><br />Jerome"> Toto </a>
It's used by Lightbox and similar plugins as caption.
# one file python escape.py toto.html # multiple files python escape.py *.html
escape.py
# -*- coding: utf-8 -*- import re import sys def escape(s): reg = r'^\s*<a.* title="(.*)">$' p = re.compile(reg) m = p.match(s) if m: title = m.group(1) title_new = title title_new = title_new.replace('<p>', '') title_new = title_new.replace('</p>', '') title_new = title_new.replace('<', '<') title_new = title_new.replace('>', '>') return s.replace(title, title_new) else: return s def process_file(path): # read file f = open(path) s = "" for l in f: #l = l.rstrip('\n') if l.find('<a ') != -1: s+= escape(l) else: s+= l f.close() print s # write file f = open(path, 'w') f.write(s) f.close() # main sys.argv.pop(0) for path in sys.argv: print path process_file(path)