faqts : Computers : Programming : Languages : Python : Snippets

+ Search
Add Entry AlertManage Folder Edit Entry Add page to http://del.icio.us/
Did You Find This Entry Useful?

3 of 6 people (50%) answered Yes
Recently 1 of 3 people (33%) answered Yes

Entry

Turning indented text into a list of tree structures

Jul 5th, 2000 10:00
Nathan Wallace, Hans Nowak, Snippet 112, Glyn Webster


"""
Packages: text
"""
""" Function to turn indented text into a list of tree structures.
    (Run the example an all will become clear.)
    Glyn Webster <glyn@ninz.org.nz> 2021-04-27
"""
import string, re
# Split a line into indent and text parts, throwing away trailing whitespace:
_Split = re.compile(r'^(\s*)(.*)\s*$')
def _leave_it_alone(s): return s
def tree_reader(text, data_constructor = _leave_it_alone):
    """ Tranforms indented text into a list of tree structures.
        The tree structures have one node per non-blank line:
           node = (datum, list_of_subnodes)
        The `datum' part is created from a text on the line.
        The `list_of_subnodes' is a list of nodes for the lines
        indented beneath the `datum' line.  If there are none,
        `list_of_subnodes' is an empty list.
        `data_constructor' is a function or class constructor
        that transforms the text for a line into whatever data
        type you want a node to contain. (The default is to just
        leave it as a string.)
    """
    lines = string.split(text, "\n")
    return _tree_reader(lines, data_constructor, -1)
def _tree_reader(lines, data_constructor, indent_depth):
    # Note: this destroys `lines' as it goes.
    nodes = []
    while lines:
        indent,line = _Split.match(lines[0]).group(1,2)
        if not line:
            del lines[0]
        else:
            depth = len(string.expandtabs(indent))
            if depth <= indent_depth:
                break
            else:
                del lines[0]
                datum = data_constructor(line)
                subnodes = _tree_reader(lines, data_constructor, depth)
                nodes.append( (datum,subnodes) )
    return nodes
# Test code:
if __name__ == "__main__":
    import pprint
    prettyprinter = pprint.PrettyPrinter(4)
    text = '''
Animals
   Mammals
      Humans
      Fruitbats
   Birds
Monsters
   Big
   Small
      Cats
'''
    print text
    trees = tree_reader(text)
    print "`tree_reader' thinks that's structured like this:\n"
    prettyprinter.pprint(trees)