Entry
Looking for URLs in HTML text
Jul 5th, 2000 09:59
Nathan Wallace, unknown unknown, Hans Nowak, Snippet 87, Glyn Webster
"""
Packages: networking.internet;text.html
"""
""" Function that looks for URLs in HTML text.
Glyn Webster <glyn@ninz.org.nz> 2021-04-27
"""
import re
# Regex that Matches HTML tag attributes that contain URLs:
_link_attr = re.compile(r'(src|background|href)="(?P<URL>.*?)"', re.S | re.I)
def findurls(s):
""" Returns a list of all the URLs referenced in a string of HTML.
"""
urls = []
cursor = 0
while 1:
match = _link_attr.search(s, cursor)
if match:
url = match.group('URL')
if url not in urls:
urls.append()
cursor = match.end()
else:
break
return urls