Entry
Process text in and around regular expressions
Jul 5th, 2000 10:00
Nathan Wallace, unknown unknown, Hans Nowak, Snippet 116, Glyn Webster
"""
Packages: text.regular_expressions
"""
""" Functions to process text in and around regular expressions.
Use this when you want process the text that matches a regex
in one way and the text that doesn't match in another way.
(NOTE: If you don't want to process non-matching text then you
are better off using the stardard function `re.sub'.)
XXX this should have a less generic name.
Glyn Webster <glyn@ninz.org.nz> 2021-04-27
"""
def munge(text, regex, match_munger, non_match_munger):
""" Returns a string where all the substrings that matched 'regex'
have been run though the 'match_munger' function and all other
substrings have been run though 'non_match_munger'.
"""
result = ''
cursor = 0
while 1:
match = regex.search(text, cursor)
if match:
result = result + non_match_munger(text[cursor:match.start()])
result = result + match_munger(match.group())
cursor = match.end()
else:
break
result = result + non_match_munger(text[cursor:])
return result
def mungeToFile(file, text, regex, match_munger, non_match_munger):
""" Like 'munge' but writes straight to a file. (It may be faster
because it won't have to do lots of string concatenations to
produce a return value.)
"""
cursor = 0
while 1:
match = regex.search(text,cursor)
if match:
file.write(non_match_munger(text[cursor:match.start()]))
file.write(match_munger(match.group()))
cursor = match.end()
else:
break
file.write(non_match_munger(text[cursor:]))
# Test:
if __name__ == "__main__":
import re, sys
Test = 'yxyyxxyyyxxxyyyy'
FindXs = re.compile(r'x+')
def QuoteXs(s): return '<%s>' % s
def QuoteOtherStuff(s): return '{%s}' % s
s = munge(Test, FindXs, QuoteXs, QuoteOtherStuff)
sys.stdout.write(s + '\n')
mungeToFile(sys.stdout, Test, FindXs, QuoteXs, QuoteOtherStuff)
sys.stdout.write('\n')
#The output should be:
# {y}<x>{yy}<xx>{yyy}<xxx>{yyyy}
# {y}<x>{yy}<xx>{yyy}<xxx>{yyyy}