import re, urllib2 been_there = {} def get(page): url = 'http://en.wikipedia.org' + page header = {'User-Agent':'InfluenceTree Bot'} try: response = urllib2.urlopen(urllib2.Request(url,[],header)) except: print "Could not find requested page:", url response = [] return [a.strip() for a in response.readlines()] def influences(doc): for i, line in enumerate(doc): if line.count("Influenced by"): by_line = i if line.count("Influenced"): to_line = i re_item = re.compile('a href="(.*?)"') try: influenced_by = re_item.findall(doc[by_line + 1]) influenced = re_item.findall(doc[to_line + 1]) except: influenced_by = [] influenced = [] return influenced_by, influenced def get_influences(page): return influences(get(page)) page = "/wiki/Immanuel_Kant" for inf in get_influences(page)[1]: print page, "->", inf + ';' for inf2 in get_influences(inf)[1]: try: b = been_there[inf2] except: been_there[inf2] = True b = False if not b: print ' ', inf, "->", inf2 + ';'