|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
3 q; }7 N6 s+ a. u) N$ X; R+ z3 {6 L6 L$ F
( j8 }/ s2 X M# d9 t& T( A
$ x- I' ^( X, z9 H7 T8 t( _" a- # -*- coding: utf-8 -*-
) @; @; C6 `0 }
3 i- f" T! y8 v- {3 N9 \- from aip import AipOcr
1 r0 ~* k$ @3 ^- a9 w2 V - # g7 |; v0 ], B( i! w$ g5 U! |( z5 w. |
- from selenium import webdriver
; n+ z* u, d% F9 C - : |5 C% [# ]+ \$ }
- import time
- Q2 a, p; W" q: t8 D# C2 j$ |- Q - : @6 Q2 r. P( @ F/ g4 [# x
- import random2 G. r* D9 D; F3 [7 d6 Z
- , d! v |2 O( A* h( B
- import sys,re
. Q2 h9 r! x* N9 N - / S9 H# ^8 Y' G' D
- from PIL import Image, ImageDraw,ImageFont9 c8 ?( j. M! C" y4 |5 M
- ; ]2 L) C" P' j2 [ M( {: g
- """ 你的 APPID AK SK """+ P4 \/ a3 A: m( t$ `
- " |1 l8 I5 C6 J' {- z
- APP_ID = 'xxx'$ }- T5 |9 I; B
- ; J' o3 e& u: U1 s+ b7 `
- API_KEY = 'xxx'
2 E/ O9 g0 ]4 W/ g! W - ( q" r0 R0 u4 F2 F
- SECRET_KEY = xxx'
; z( y' y, g5 S
# T8 h$ b' i1 N1 Q% z5 l- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)7 w9 t5 K6 Z6 G# g
! C7 H" r3 l) x1 {0 v+ G- #PROXY = "127.0.0.1:8118"
5 e& Y# w) Z0 ]2 G - , x, m0 {- n& y& L" T8 |
- chrome_options = webdriver.ChromeOptions()2 H# y$ c4 O4 l3 u. a" ^" L' q
p' L- c! V! ]" ~0 V' Y- chrome_options.add_argument('--headless')
( ~# n, ^$ N8 w2 G; n$ Y& k
$ O5 x9 X9 m# q8 a# W- h3 \* O5 k8 f& W- chrome_options.add_argument('--disable-gpu')4 {) X: C4 u$ n& ?* `
# \+ B5 B, ?: I" @5 v. |- #chrome_options.add_argument('--proxy-server=%s' % PROXY)$ T. m% Y! [! i! h# ]
B8 m: v/ i* S1 e2 `- chrome_options.add_argument("--incognito")6 J- f- ^, X8 ?! v5 g: u
- % ^$ G7 M0 X+ y. Q1 V
- chrome_options.add_argument('--ignore-certificate-errors')
9 P- O8 n2 F. X2 `8 ?& D5 q - , W$ l: \. U' O9 c& S% K
- # Win
2 }: e7 Q" V3 M: E$ |. J - 2 c2 d6 i4 l, Q& i; R
- # chrome_options.add_argument("--log-level=3")
q" C/ }/ y- _% V
+ ^8 Z0 p; Z. B' K: J- # chrome_options.add_argument("--disable-logging")) a) I# {3 ]2 J; w3 y. t" t
- ! t9 N# [ ?' V5 \4 `; F
- # chrome_options.add_argument("--disable-logging")
0 ^+ R; P. L, U
! G1 B/ X5 `2 C" S- #chrome_options.add_argument('--no-sandbox')/ t3 s; w% t: ~ w
7 m5 [1 ^: }# X0 K- Q- """ 读取图片 """( L5 i4 {1 i3 t! t
6 k8 z8 }$ e* |( o8 Q- def get_file_content(filePath):2 B9 Q+ F' W/ r. {1 Z) M9 ]
- : e" Y& H, D4 k
- with open(filePath, 'rb') as fp:, O/ L% g9 o, e+ d1 X, m: D
0 d$ T3 G" b; v0 L, } c0 W+ ]6 h1 ^- return fp.read()
+ Q0 J: V3 |7 y& k: _
, |6 M0 f2 ]/ t/ Z# z/ E- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)0 b7 m# d, F3 ^0 B" _7 y% t" D
. ~# }: H7 P9 K# p- driver.set_window_size(1280, 727)
" K0 B0 v8 ^2 U, b; J# {. a Z - 2 g& Y s8 {- Q1 l8 H
- driver.get("https://youlikehits.com/")
+ A; K+ Q$ ]" s% z! R - 1 Z' {: K+ Z W, t- ~
- time.sleep(5)
$ J6 t5 x: l- _2 i1 b# [ - : S: P+ q1 h! E$ u+ B
- driver.find_element_by_id("username").send_keys('11111') #user% V/ \/ y! e6 u% w& M+ n% j
- 1 h( d: [% _9 {1 X- I
- driver.find_element_by_id("password").send_keys('111111') #password6 O& P) `" u9 s2 S$ @ ?2 s' d
5 h3 p/ S" ^. r, S. ^5 \- driver.find_element_by_xpath("//input[@value='Login']").click()4 R$ K& X( M* C( r
7 ?8 A4 @" ?1 u- T- I. \- driver.get("https://youlikehits.com/youtubenew2.php")
) _- ?( u; l, r! s
5 n1 v" E8 |+ S% x- time.sleep(1), E% I' }4 Q' v3 @) H
- ; z, V+ F2 w* ]% b6 x
- #Try Again
' f/ w8 _& {- ?, S6 F) y - # t; S2 d u) n4 d+ S5 M& b
- def checkRefresh(driver):
# y. ^/ m- D; j+ Y4 O- @5 H9 ? - - A* E: z; ~# Q5 ^0 ?, g
- try:4 ^. \9 L& O) Y; \8 N' p
- & z# O6 X( p# N. z* ?% f
- Refresh = driver.find_element_by_id('loadmore')' L( {8 l5 m! o* ]; }: P" h
- ( N# n" [* P, e4 J; o$ C2 [
- Refresh.click()' c$ L, e- H8 ] K8 c+ w3 ~" \
- 7 F' c3 [( ]; Q
- driver.set_window_size(1280, 727)3 t+ u; a) }% p
0 ^9 ^; B) h, h4 h( H- except Exception as e:
) d4 y# [! S2 p$ C! m0 V - 3 H2 v- B$ j" h' X8 Q
- pass
+ i" o8 S# f: y$ w - . m q5 u3 u3 \) S6 M$ R# _: c1 _
- def checkcaptcha(driver):9 |0 U; E0 y+ `; i( h% @
B+ L9 t$ W r6 k) @- try:" W7 e! P3 H% J0 J! o
# w* x& o" S4 e( y6 a- captcha = driver.find_element_by_id('captcha')+ _' F& z: ^1 v
. r$ J8 B& D2 h8 ?* O, c- print driver.get_window_size()+ n% H0 c; v' U! c+ B
3 P' V* N( R' o) c0 A( m- time.sleep(2)
; c' @. x* L1 ?3 O0 ^
# `2 e' E" q. i- Q% M- driver.save_screenshot('/tmp/screenshot.png')
7 R/ C& O; n3 v( p7 E0 z4 K - 8 N8 @" r# h, r' ^" c
- im = Image.open('/tmp/screenshot.png')
; v6 D7 ]6 i7 | - 4 C0 f5 l* l( ?2 E% h$ o
- #取消headless模式
- w4 N, C$ y% s. |
. {' r$ P# s# b- u" W+ j/ Y- #a = im.resize((1269, 610),Image.ANTIALIAS); H& B [6 @. ?& S
- 4 b" x- F5 G5 F! T
- ! w; |3 ^- M- p" \: |% _. a
- ( X' K# D# Z) m( P; [; S! W+ _
- #开启headless模式
: H4 t' s( N. [0 c- F. D& B - 4 b8 ?& \ `7 D: p# ~0 K
- a = im.resize((1269, 727),Image.ANTIALIAS)- f; g! R& f& ?& G. z* J
3 D+ h% D" J9 u; \8 ~- i( g2 Z- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
2 m3 u5 g7 D2 g& p. X
; z' ?4 R, W) o9 N r, w- left = element.location['x']+ I. k) I1 U: z$ h5 N6 ?
- 4 ^$ d" s" x8 m* d4 Z
- top = element.location['y']
( j2 h! l6 F6 J0 h
6 C6 {1 u) W) ~; [, w- right = element.location['x'] + element.size['width']3 f0 {# ]/ k8 }- |( X
- - B$ g& I" t- d6 n0 O: N1 v
- bottom = element.location['y'] + element.size['height']
2 M5 E% G5 `9 h! }
8 z/ C$ @' k8 Z- ; t W$ G8 Q/ ^% E4 i
- 6 j/ @1 F; r; ^9 U2 Z
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
, q5 O# W8 B2 y& Z# x/ u. M: G. w
; @' W3 B( c0 u& i: q: e* C. s- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")$ D# C( ?% J# H. {7 {" [
7 `2 ]; |$ k" o- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']"). t! I3 O3 q4 h
- / |2 v- V8 O0 z
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
; m2 j9 L1 P! w I - * k: j. t- ~3 h# r
- image = get_file_content('/tmp/screenshot1.png')5 H9 a5 P- t/ c9 G
, p% a1 Y! @" U' ?: T0 O9 o! L; F- a = client.basicGeneral(image)
: w% g0 U. O! m
, ^- S. y! v$ @6 i2 s* V* `- print a. H# I' |1 [1 J5 L
, t4 z$ K: Q' N6 s5 C$ ~. x- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])! G6 E; ]* d1 |+ z( o+ g; Y! ]
- ' t$ _$ P5 W* j4 S' g/ G2 D
- yzm = re.sub(r'\xf7',r'/',yzm)
, U& a# D! o# B* N' N7 M
6 a" U, [4 [3 r% F& V- yzm = re.sub(r'x',r'*',yzm)
4 h l( U5 ^6 C3 K( |& k! C% S8 {2 n - . [6 G# F3 b) U/ k/ x
- yzm = re.sub(r'X',r'*',yzm)6 l/ ?2 M' B0 V& m
( k; u. t9 Z2 G- yzm = eval(yzm)+ G- b0 ~& A- F. I' z, u. M
- $ o3 E- C$ x( w u0 e& @: }7 u/ ~
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))! n, p) O; q8 b+ u$ B: w0 t
- ) c$ w# ?; A6 P @2 I
- print '验证码: ' + str(yzm)
9 J g" X- z8 w! u. w - # T6 R. Y6 o! e/ w' }, p
- driver.find_element_by_name('answer').send_keys(yzm)" ^* n! k. S& Z2 F8 g; V4 q/ l
- 4 K0 [/ e" o/ i& M* ~# Z
- driver.find_element_by_name('submit').click()) w( f7 V4 ~) Y4 L4 |
4 X+ f7 @6 `5 Q. r" s& R- time.sleep(3)
0 L0 ]8 \+ u r0 d3 e- X+ S- \
9 Z9 \/ I5 X' }$ ~# _- return 11 B: W! v8 F$ ]7 A; ?0 |
- $ r+ h4 ?& \6 z7 l' w3 W S
- except Exception as e:
, @* @6 G2 b U ]
9 z! i) Q0 b# E' u6 o- return 0
$ ]0 y% M4 V/ O2 Y# C - 1 p$ ]2 M. v4 H6 W) d- E
- def followbutton(driver):
, W' n# w; ^& s" ?) z& I# D
/ O' l. N, U) g2 }. R8 B9 ]- try:
" O0 ?2 n& g2 \ - 6 v& ?2 i9 E' s. x& H8 Z3 h+ L
- driver.switch_to_window(driver.window_handles[0]). i- _% j' z& K: i
: d* _& X7 F4 y3 z- points = driver.find_element_by_id("currentpoints").text0 V' g+ m# f* w! D$ T$ w; ^( ]
- 3 l- S# L! H, w3 E2 R! q5 P. D
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()" j* t: ]0 O5 C1 c+ P0 N7 D, [
7 ]) m6 `$ Z* p% I, \; U- driver.switch_to_window(driver.window_handles[1])+ W4 l# o9 Y6 R, g! m
- ; i, Q t: V* k- q
- VideoSource = ''.join(driver.page_source.split())
7 Y4 S j0 h' G1 ~, Z5 i - : m- h! I! t y% b$ F6 [* C5 K4 N
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
( h* w& y; [# n0 \# q0 l - + Z; Y2 _9 m! @7 Q# W/ m. W
- print VideoSource
+ S9 `; j: c+ l - , v2 `+ o+ Y* v& Q& N% b
- driver.switch_to_window(driver.window_handles[0])) H) s E: l$ V$ N* Y2 P
- ) j' g( t' ~0 D( w
- print "Refresh..."% P8 p$ z, ^8 B# l2 r ^
- ( u4 z; c" h7 X9 Q! Z6 u1 J- l
- driver.get("https://youlikehits.com/youtubenew2.php")9 n4 H: ]1 n3 d, U3 f! b8 H
( D& t' _, N; N: o9 Z( J9 ?2 o- time.sleep(1)" B' K" i5 ~% e& y" c1 K9 U9 \2 D
( y8 r! F* L* H d/ w' P- return points
; Y5 c! I" b2 N0 P( h - & x3 U3 C0 @. Z
- except Exception as e:
5 x8 U0 B7 V! w$ _ - & x9 P5 q9 k8 H5 I
- return 0# O) N# ?* o0 S
2 |. Z# s/ Q1 [9 u# H( \$ ?- for i in range(0,5000):
9 ?, c& k, o+ ^2 b4 o8 e, {& x6 V - ' i7 U( q0 {/ V, A
- try:8 A6 ]5 R5 R; o6 c
- $ t/ o: j; N7 ^& S& z( Z& \
- captcha = checkcaptcha(driver)- }+ V; } H2 ~& t$ ]2 O+ ^6 @
- 6 S( K' j% ^8 b. ~3 t" c6 R0 t
- time.sleep(1)
' G5 ?" W3 m2 |
. z6 a* Z0 {) E+ ?- checkRefresh(driver)
9 b4 ]! M: D3 F3 C: s - * R( O! r- C. N7 h" Z* ~
- points = followbutton(driver)
3 _! Q1 z' H9 n) ~* a
2 i( B8 |3 v% h) |, t. ?- time.sleep(65)
( v" Y6 j# M' j/ g3 L
; C+ {1 F0 k" _- driver.switch_to_window(driver.window_handles[0])1 ?4 B1 z% {* r5 T- b
: q9 j: J' R4 s+ M" d q- tmpp = driver.find_element_by_id("currentpoints").text* ]) B/ o6 J* N o
- 3 ? D, i( v9 J
- print "points: " + str(tmpp)
; m' O6 }" K6 E) ^" g
8 C0 |3 g7 o9 Z- if points == tmpp:# j7 d2 b% ?5 o! A
- 6 k, K+ [* }( s& X* z* z
- print "Refresh..."
; W! v/ t5 p2 S% D2 q - 5 f$ o# o% T9 ?2 E# _ g( E7 q* ]( p
- driver.get("https://youlikehits.com/youtubenew2.php")
( I+ ]( `* n, P& ]8 W
; j! R& U! T4 m% J' c- time.sleep(5)' }, s% }. o$ |! ]/ A% O
- ) ?* k- l5 s5 H! i7 n$ u
- except Exception as e:
( n* m1 d* \+ e+ L( s% c% i* W% u - 3 Q' Q+ C+ c1 L# D: n
- driver.get("https://youlikehits.com/youtubenew2.php"): l i" _8 d/ o' `* \
6 g* j* y" p4 H- print 'error: ' + str(e)
" \5 E' l9 c d c, v0 G4 c - 0 j% n1 | e! k4 ?. f% o
- driver.quit()5 r0 J- r0 `$ r& O
复制代码 3 s O6 f" p5 a* r w: T6 ^# H& b
$ Q7 `0 e+ l* n0 W& f
+ f% t% y# [! c& l* `: A7 Z' `! O4 c
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
0 Y. s+ E$ v \5 w
; K. V* A- l& H( R2 | a# P |
评分
-
查看全部评分
|