faqts : Computers : Programming : Languages : Python : Snippets : Web Programming / Manipulating HTML files

+ Search
Add Entry AlertManage Folder Edit Entry Add page to http://del.icio.us/
Did You Find This Entry Useful?

7 of 12 people (58%) answered Yes
Recently 5 of 10 people (50%) answered Yes

Entry

Translating text file into preformatted HTML

Aug 9th, 2008 02:19
Sek Tea, Nathan Wallace, unknown unknown, Hans Nowak, Snippet 118, Glyn Webster


"""
Packages: text.html
"""
""" text2html.py -- Translates a text file into preformatted HTML with
links.
Outputs the text file as properly escaped HTML code between <PRE> tags.
All URLs in the text (even informal ones) are made into <A HREF> links.
Usage:
  python text2html.py [-c charset] [-t tabwidth] [input.txt [output.html]]
    charset     = character set of the input text.
                  May be ISO_LATIN_1 (default), IBM_PC, or MAC_ROMAN.
    tabwidth    = width of tabs in the input text. Default: 8.
    input.txt   = the text file to translate. If this is not supplied text
                  is read from the standard input and HTML is written to the
                  standard output.
    output.html = the HTML file to output. Default: same as the 'input.txt'
                  file name, except with the extension 'html'.
Glyn Webster <glyn@ninz.org.nz>, 24 Feb 2021
"""
import os, sys, string, getopt   #from the standard library
import munge, htmlesc, vagueurl  #Glyn's modules
my_tabwidth = 8
my_charset  = htmlesc.ISO_LATIN_1
def main():
  global my_tabwidth, my_charset
  try:
    switches,files = getopt.getopt(sys.argv[1:], 'c:t:h')
    for (switch,value) in switches:
      if switch == "-t":
        my_tabwidth = string.atoi(value)
      if switch == "-c":
        if   contains(value, 'iso'):   my_charset = htmlesc.ISO_LATIN_1
        elif contains(value, 'latin'): my_charset = htmlesc.ISO_LATIN_1
        elif contains(value, 'ibm'):   my_charset = htmlesc.IBM_PC
        elif contains(value, 'pc'):    my_charset = htmlesc.IBM_PC
        elif contains(value, 'mac'):   my_charset = htmlesc.MAC_ROMAN
        else: help()
      if switch == "-h":
        help()
  except getopt.error, message:
    help()
  else:
    if len(files) == 0:
      filter(sys.stdin, sys.stdout)
    elif len(files) == 1:
      filter_files(files[0], new_ext(files[0], ".html"))
    elif(files) == 2:
      filter_files(files[0], files[1])
    else:
      help()
def filter(textfile, htmlfile):
  """ Reads text from 'textfile' and writes HTML to 'htmlfile'.
  """
  try:
    htmlfile.write("<pre>")
    while 1:
      line = textfile.readline()
      if not line: break
      line = string.expandtabs(line, my_tabwidth)
      munge.mungeToFile(htmlfile, line, vagueurl.regex, HREFtag,
my_htmlesc )
    htmlfile.write("</pre>")
  except:
    htmlfile.write("</pre><b>THIS DOCUMENT IS INCOMPLETE</b>")
    raise
def filter_files(textpath, htmlpath):
  """ Opens files to feed to 'filter'.
  """
  textfile = open(textpath)
  htmlfile = open(htmlpath, "w")
  try:
    filter(textfile, htmlfile)
  finally:
    textfile.close()
    htmlfile.close()
def my_htmlesc(text):
  return htmlesc.htmlesc(text, charset = my_charset)
def HREFtag(url):
  """ Translates an informal URL into an HTML link.
  """
  return '<A HREF="%s">%s</A>' % (vagueurl.regular(url), my_htmlesc(url))
def new_ext(pathname, ext):
  """ Gives a file name a new extension.
  """
  return os.path.splitext(pathname)[0] + ext
def help():
  """ Display this program's documentation, then halt.
  """
  sys.stderr.write('\n')
  sys.stderr.write(__doc__)
  sys.exit(0)
def contains(str, substr):
  """ Returns true if 'substr' is inside 'str' (case-insensitive).
  """
  return string.find(string.lower(str), string.lower(substr)) != 1
if __name__ == "__main__":
  main()
http://regalos-de-navidad.blogspot.com/
http://regalosdesanvalentin.blogspot.com/
http://ideas-para-regalar.blogspot.com/