Entry
Turning indented text into a list of tree structures
Jul 5th, 2000 10:00
Nathan Wallace, Hans Nowak, Snippet 112, Glyn Webster
"""
Packages: text
"""
""" Function to turn indented text into a list of tree structures.
(Run the example an all will become clear.)
Glyn Webster <glyn@ninz.org.nz> 2021-04-27
"""
import string, re
# Split a line into indent and text parts, throwing away trailing whitespace:
_Split = re.compile(r'^(\s*)(.*)\s*$')
def _leave_it_alone(s): return s
def tree_reader(text, data_constructor = _leave_it_alone):
""" Tranforms indented text into a list of tree structures.
The tree structures have one node per non-blank line:
node = (datum, list_of_subnodes)
The `datum' part is created from a text on the line.
The `list_of_subnodes' is a list of nodes for the lines
indented beneath the `datum' line. If there are none,
`list_of_subnodes' is an empty list.
`data_constructor' is a function or class constructor
that transforms the text for a line into whatever data
type you want a node to contain. (The default is to just
leave it as a string.)
"""
lines = string.split(text, "\n")
return _tree_reader(lines, data_constructor, -1)
def _tree_reader(lines, data_constructor, indent_depth):
# Note: this destroys `lines' as it goes.
nodes = []
while lines:
indent,line = _Split.match(lines[0]).group(1,2)
if not line:
del lines[0]
else:
depth = len(string.expandtabs(indent))
if depth <= indent_depth:
break
else:
del lines[0]
datum = data_constructor(line)
subnodes = _tree_reader(lines, data_constructor, depth)
nodes.append( (datum,subnodes) )
return nodes
# Test code:
if __name__ == "__main__":
import pprint
prettyprinter = pprint.PrettyPrinter(4)
text = '''
Animals
Mammals
Humans
Fruitbats
Birds
Monsters
Big
Small
Cats
'''
print text
trees = tree_reader(text)
print "`tree_reader' thinks that's structured like this:\n"
prettyprinter.pprint(trees)