I am testing a code for internet crawling.
def getExternalLinks(bs, excludeUrl):
externalLinks = []
#Finds all links that start with "http" that do
#not contain the current URL
for link in bs.find_all('a',
href=re.compile('^(http|www)((?!'+excludeUrl+').)*$')):
if link.attrs['href'] is not None:
if link.attrs['href'] not in externalLinks:
externalLinks.append(link.attrs['href'])
return externalLinks
I cannot analysis the regular expression ((?!'+excludeUrl+').) in re.compile('^(http|www)((?!'+excludeUrl+').)*$'))