模拟点击 验证码识别 全后台 youtube订阅
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑# -*- coding: utf-8 -*-
from aip import AipOcr
from selenium import webdriver
import time
import random
import sys,re
from PIL import Image, ImageDraw,ImageFont
""" 你的 APPID AK SK """
APP_ID = 'xxx'
API_KEY = 'xxx'
SECRET_KEY = xxx'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
#PROXY = "127.0.0.1:8118"
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
#chrome_options.add_argument('--proxy-server=%s' % PROXY)
chrome_options.add_argument("--incognito")
chrome_options.add_argument('--ignore-certificate-errors')
# Win
# chrome_options.add_argument("--log-level=3")
# chrome_options.add_argument("--disable-logging")
# chrome_options.add_argument("--disable-logging")
#chrome_options.add_argument('--no-sandbox')
""" 读取图片 """
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
driver.set_window_size(1280, 727)
driver.get("https://youlikehits.com/")
time.sleep(5)
driver.find_element_by_id("username").send_keys('11111') #user
driver.find_element_by_id("password").send_keys('111111') #password
driver.find_element_by_xpath("//input[@value='Login']").click()
driver.get("https://youlikehits.com/youtubenew2.php")
time.sleep(1)
#Try Again
def checkRefresh(driver):
try:
Refresh = driver.find_element_by_id('loadmore')
Refresh.click()
driver.set_window_size(1280, 727)
except Exception as e:
pass
def checkcaptcha(driver):
try:
captcha = driver.find_element_by_id('captcha')
print driver.get_window_size()
time.sleep(2)
driver.save_screenshot('/tmp/screenshot.png')
im = Image.open('/tmp/screenshot.png')
#取消headless模式
#a = im.resize((1269, 610),Image.ANTIALIAS)
#开启headless模式
a = im.resize((1269, 727),Image.ANTIALIAS)
element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
left = element.location['x']
top = element.location['y']
right = element.location['x'] + element.size['width']
bottom = element.location['y'] + element.size['height']
#element = driver.find_element_by_xpath("//div['#captcha']//img")
#driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
image = get_file_content('/tmp/screenshot1.png')
a = client.basicGeneral(image)
print a
yzm = re.sub(r'\xd7',r'*',a['words_result']['words'])
yzm = re.sub(r'\xf7',r'/',yzm)
yzm = re.sub(r'x',r'*',yzm)
yzm = re.sub(r'X',r'*',yzm)
yzm = eval(yzm)
#yzm = eval(yzm.replace('x','*').replace('÷','/'))
print '验证码: ' + str(yzm)
driver.find_element_by_name('answer').send_keys(yzm)
driver.find_element_by_name('submit').click()
time.sleep(3)
return 1
except Exception as e:
return 0
def followbutton(driver):
try:
driver.switch_to_window(driver.window_handles)
points = driver.find_element_by_id("currentpoints").text
driver.execute_script('return document.querySelector("a")').click()
driver.switch_to_window(driver.window_handles)
VideoSource = ''.join(driver.page_source.split())
if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
print VideoSource
driver.switch_to_window(driver.window_handles)
print "Refresh..."
driver.get("https://youlikehits.com/youtubenew2.php")
time.sleep(1)
return points
except Exception as e:
return 0
for i in range(0,5000):
try:
captcha = checkcaptcha(driver)
time.sleep(1)
checkRefresh(driver)
points = followbutton(driver)
time.sleep(65)
driver.switch_to_window(driver.window_handles)
tmpp = driver.find_element_by_id("currentpoints").text
print "points: " + str(tmpp)
if points == tmpp:
print "Refresh..."
driver.get("https://youlikehits.com/youtubenew2.php")
time.sleep(5)
except Exception as e:
driver.get("https://youlikehits.com/youtubenew2.php")
print 'error: ' + str(e)
driver.quit()
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
高手啊 跟我写的几乎一样,我也是百度验证码识别的,建议可以增加cookies保存和读取功能,省得每次都的输入,用户名和密码。他家是可以用cookies保持登录状态的。 自动在https://youlikehits.com/ 点击么? 大佬太哇塞 自学了几天selenium能看懂一小部分了。 用的是selenium库啊,我最近也写了一个小爬虫也用了selenium+pywin32模拟点击 小白龙 发表于 2018-10-18 15:11
用的是selenium库啊,我最近也写了一个小爬虫也用了selenium+pywin32模拟点击
小批量的还可以,多了实在是舍不得cpu。 并发也是个问题。卡卡的 娱乐还可以。个人观点。。哈哈
页:
[1]