本帖最后由 阿百川 于 2013-4-2 14:41 编辑
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import urllib,urllib2,re
- def sunWeb(proxy='http://g.cn:80'):
- ph = urllib2.ProxyHandler({"http":proxy,"https":proxy})
- opener = urllib2.build_opener(ph)
- opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20100101 Firefox/15.0'),
- ('Accept-Language','en-us,en;q=0.5')]
- return opener
- GETNUM = re.compile(r'<div id=.?resultStats.?>[^\d<]*([\d,]+) results?<',re.I)
- def ggNum(key):
- baseurl = 'http://www.google.com/search?hl=en&pws=0&query=%s&source=ig&sky=rndy'
- ggurl = baseurl % urllib.quote_plus(key)
- html = br.open(ggurl).read()
- if 'schema.org/WebPage' not in html: 'IP Block'
- if 'Your search -' in html:return '0'
- gn = GETNUM.search(html)
- return gn.group(1) if gn else '0'
- br = sunWeb()
- print ggNum('site:google.com')
- print ggNum('intitle:keyword')
- print ggNum('"advertcn.com"')
复制代码 代码很简单,支持代理,默认的代理g.cn,可以自己设置
复制代码有干扰码到这:http://pastebin.com/2JP7QpDY
|