from sys import argv import codecs name, codec=argv #f = codecs.open("example.html", 'r', 'cp1140') f = codecs.open("example.html", 'r', codec) u = f.read() # now the contents have been transformed to a Unicode string out = codecs.open("example_u.html", 'w', 'utf-8') out.write(u) # and now the contents have been output as UTF-8