faqts : Computers : Programming : Languages : Python : Snippets : Regular Expressions

+ Search
Add Entry AlertManage Folder Edit Entry Add page to http://del.icio.us/
Did You Find This Entry Useful?

2 of 4 people (50%) answered Yes
Recently 0 of 2 people (0%) answered Yes

Entry

Process text in and around regular expressions

Jul 5th, 2000 10:00
Nathan Wallace, unknown unknown, Hans Nowak, Snippet 116, Glyn Webster


"""
Packages: text.regular_expressions
"""
""" Functions to process text in and around regular expressions.
    Use this when you want process the text that matches a regex
    in one way and the text that doesn't match in another way.
    (NOTE: If you don't want to process non-matching text then you
    are better off using the stardard function `re.sub'.)
    XXX this should have a less generic name.
    Glyn Webster <glyn@ninz.org.nz> 2021-04-27
"""
def munge(text, regex, match_munger, non_match_munger):
  """ Returns a string where all the substrings that matched 'regex'
      have been run though the 'match_munger' function and all other
      substrings have been run though 'non_match_munger'.
  """
  result = ''
  cursor = 0
  while 1:
    match = regex.search(text, cursor)
    if match:
      result = result + non_match_munger(text[cursor:match.start()])
      result = result + match_munger(match.group())
      cursor = match.end()
    else:
      break
  result = result + non_match_munger(text[cursor:])
  return result
def mungeToFile(file, text, regex, match_munger, non_match_munger):
  """ Like 'munge' but writes straight to a file. (It may be faster 
      because it won't have to do lots of string concatenations to 
      produce a return value.)
  """
  cursor = 0
  while 1:
    match = regex.search(text,cursor)
    if match:
      file.write(non_match_munger(text[cursor:match.start()]))
      file.write(match_munger(match.group()))
      cursor = match.end()
    else:
      break
  file.write(non_match_munger(text[cursor:]))
# Test:
if __name__ == "__main__":
  import re, sys
  Test = 'yxyyxxyyyxxxyyyy'
  FindXs = re.compile(r'x+')
  def QuoteXs(s): return '<%s>' % s
  def QuoteOtherStuff(s): return '{%s}' % s
  s = munge(Test, FindXs, QuoteXs, QuoteOtherStuff)
  sys.stdout.write(s + '\n')
  mungeToFile(sys.stdout, Test, FindXs, QuoteXs, QuoteOtherStuff)
  sys.stdout.write('\n')
  #The output should be:
  #   {y}<x>{yy}<xx>{yyy}<xxx>{yyyy}
  #   {y}<x>{yy}<xx>{yyy}<xxx>{yyyy}