|
代码是从国外网站上找的,原来的一些参数不能用了,我调整了一下。
如果觉得有用,给点鼓励。谢谢。- #!/usr/bin/python
- # Copyright (C) 2010 <[email protected]>
- import urllib2, sys, getopt, time
- import socket
- from urllib import quote
- from xml.etree.ElementTree import XMLParser
- from threading import Thread
- print "\n***************************"
- print "* Google Suggest Scrapper *"
- print "* Coded by ____________ *"
- print "* [email protected] *"
- print "***************************\n"
- global i
- global recursiveLevel
- global outputFile
- recursiveLevel = 0
- outputFile = ''
- def usage():
- print "Usage: GoogleSuggest.py [options] \n"
- print " -k: Keywords: Keywords to use (separated by #)"
- print " -f: File: File to read keywords from (overrides -k)"
- print " -r: Recursive level (0-5): Use suggested keywords to get more keywords up to -r times [Default: 0]"
- print " -t: Threads: Number of threads (default 5)"
- print " -o: Output file: Save keywords found to file\n\n"
- print "Examples:\n"
- print " GoogleSuggest.py -k keyword1"
- print " GoogleSuggest.py -k keyword1#keyword2#keyword3 -r 1"
- print " GoogleSuggest.py -f keyword_file.txt -o keywords_found.txt -t 10"
- sys.exit()
- class do(Thread):
- def __init__( self, keyword ):
- Thread.__init__(self)
- self.keyword = keyword
- self.result = []
- def run(self):
- global i
- tmp = []
- tmp2 = []
- for level in range(0, recursiveLevel+1):
- if level == 0:
- keyword = self.keyword.split()[0]
- tmp = self.getKeywords(keyword)
- else:
- seedkeyword = keyword
- while len(tmp):
- keyword = tmp.pop(0)
- if keyword == seedkeyword:
- continue
- else:
- if level<recursiveLevel:
- tmp2 = self.getKeywords(keyword)
- else:
- self.getKeywords(keyword)
- tmp = tmp2
- time.sleep(0)
- i = i-1
- def getKeywords(self, keyword):
- tmp = []
- url = 'http://clients1.google.com/complete/search?output=toolbar&q='+quote(keyword)
- print url
- response = urllib2.urlopen(url)
- cont = response.read()
- x = XMLParser()
- x.feed(cont)
- tree = x.close()
- for e in tree.findall('CompleteSuggestion'):
- #self.result.append([[keyword], [e.find('suggestion').get('data'), e.find('num_queries').get('int')]])
- self.result.append([[keyword], [e.find('suggestion').get('data')]])
- tmp.append(e.find('suggestion').get('data'))
- return tmp
- def startThreads(keywords):
- global i
- i = 0
- threads = []
- ret = []
- while len(keywords):
- try:
- if i<th:
- keyword = keywords.pop(0)
- i = i+1
- thread = do(keyword)
- thread.start()
- threads.append(thread)
- except KeyboardInterrupt:
- print 'Suspended by user...\n'
- sys.exit()
- for t in threads:
- t.join()
- for r in t.result:
- ret.append(r)
- output(ret)
- def output(ret):
- global outputFile
- output = []
- while len(ret):
- data = ret.pop(0)[1][0]
- output.append(data+'\n')
- print data
- if outputFile:
- try:
- f = open(outputFile,'w')
- except:
- print 'Can\'t open output file\n'
- sys.exit()
- f.writelines(output)
- f.close()
- def run(argv):
- global th
- global recursiveLevel
- global outputFile
- th = 5
- if len(sys.argv) < 3:
- usage()
- try:
- opts, args = getopt.getopt(argv,'k:f:r:t:o:')
- except getopt.GetoptError:
- usage()
- for opt,arg in opts :
- if opt == '-k':
- inputKeywords = arg
- elif opt == '-f':
- try:
- inputFile = open(arg, "r")
- except:
- print 'Can\'t open keywords file\n'
- sys.exit()
- elif opt == '-r':
- recursiveLevel = int(arg)
- elif opt == '-t':
- th = arg
- elif opt == '-o':
- outputFile = arg
- try:
- inputFile
- except NameError:
- inputKeywords = inputKeywords.split('#')
- else:
- inputKeywords = inputFile.readlines()
- startThreads(inputKeywords)
- if __name__ == "__main__":
- try:
- run(sys.argv[1:])
- except KeyboardInterrupt:
- print "Ctrl+C Exit By USER...\n"
- sys.exit()
复制代码 |
评分
-
查看全部评分
|