|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 5 f( y1 ]9 z( v: s; i
( ~6 n" Y3 A% J+ w3 F" {. h4 S- t3 e9 ]- W% @
- + _5 @9 U/ e) I/ c! ]) v& T3 a
- # -*- coding: utf-8 -*-' X( t& m3 H1 {7 b/ z$ I
: T& L6 |; q% g; P5 y- from aip import AipOcr
4 h' C, ~: }0 i9 ], }5 c
! p" Z; V* Y) K: L- from selenium import webdriver
$ X) }8 U* o6 i/ D$ O* z1 s
0 R: P9 L, N- ^) Q& b2 h7 q- z6 l- import time
q4 h/ ]) Z V3 z/ w* r% c5 l1 f( ?/ Q
$ x+ f9 A2 T! o7 y' G- import random/ W/ t4 F) \& Y- i' X q
- ; x, N: d B0 x z) o
- import sys,re: P; r% h( ~+ O9 `
- 1 r$ Y7 ^! V, Z% k
- from PIL import Image, ImageDraw,ImageFont
: z9 }' `3 n% t' R2 i - 7 }/ q2 @2 H. e3 U% q
- """ 你的 APPID AK SK """$ g( j7 a$ s4 J6 A
5 t& N8 f& b% D- x; ?- APP_ID = 'xxx'
. t: I% h( {0 o# i2 |" k" \
6 y& [+ W$ d/ y! t- API_KEY = 'xxx'
& j# Z1 r1 F; K3 p
5 F( h0 ~( v" F8 ?# ]- SECRET_KEY = xxx'
: i3 i8 n$ A+ h0 T7 K" @ - * i& `7 m* ~/ [ b1 F I0 E$ @' n
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY) J3 _ @, j$ y/ X2 B( C6 a# J8 r" B
4 N- ]) {% s) B6 X- #PROXY = "127.0.0.1:8118"/ _7 T- @. c8 m. n: B
! }8 E# V3 y# {) w* C* q) X. r- chrome_options = webdriver.ChromeOptions()
' ]5 I: S- f% t' A3 Z0 Y- v& i
) X: \2 s2 D6 ?6 P6 q9 V/ M ?4 H- chrome_options.add_argument('--headless')
0 D, o! x0 X/ @; k - 2 ~9 @4 V* M0 ?' T
- chrome_options.add_argument('--disable-gpu')
6 d4 w* U4 F' t! |3 O9 p0 O
8 S* r- O# I, M0 ~. s$ K- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
+ }3 w0 p7 m" J& Q# }, u% M - 5 |8 w4 f/ M `% c
- chrome_options.add_argument("--incognito") J$ Y+ ^ K* \. {! C4 `4 w" |
; q# S. M3 f9 M0 a# L* e- chrome_options.add_argument('--ignore-certificate-errors')8 ~9 q- _% U( R. v) G* X
% ^1 i* U8 L% n6 O$ ~7 q- e- # Win
5 z4 ~3 Y1 Y$ N4 q" J7 H | - ) x w( d( e+ o3 n. h- D& [
- # chrome_options.add_argument("--log-level=3")* s- k+ h3 r0 c/ }! o" c7 z- g
- ( L; q% M7 H8 r! h
- # chrome_options.add_argument("--disable-logging")' {, @4 w% ]" G! }; |" M( T' v
/ I: Y0 b8 e' I8 @/ D5 v) Z- # chrome_options.add_argument("--disable-logging"), I! |5 A; I: Q) F. w% C& b
- % b% ^! U- `9 I @3 u: S8 c
- #chrome_options.add_argument('--no-sandbox')7 m5 S8 {% U' R) S" z- c; q9 y0 N
- 9 q, A6 L7 W- u4 u7 Q
- """ 读取图片 """6 j) W* H8 K1 ^4 E0 u1 l' v
; B& L+ v; @ r T- T- def get_file_content(filePath):
5 j, |& g: ~7 P - $ W( A7 F V2 K- X: ^# N; O% q/ x
- with open(filePath, 'rb') as fp:
[' c( n( L% W1 i9 {, X `
9 f" s9 }- s0 t% F3 R7 C- return fp.read()
* w" @' C* T. H( L$ V- Y! E3 u2 G' z
/ N b+ }4 _0 N& y0 e8 A/ R8 c4 }- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
5 V1 W2 m. w. u2 _4 z4 [
0 G, ^0 d: d7 r, m$ z- driver.set_window_size(1280, 727)
% F2 w: j3 v! X$ S2 D# { - 4 e8 o' i9 P6 S2 p& E: N1 W
- driver.get("https://youlikehits.com/")' f/ U9 |3 f( J. _) G n) T
4 y, q# l# |- f) A- time.sleep(5)5 g: g; e& |% n2 p) h" F/ a
) q+ Y4 h5 p5 [( h. G. h- driver.find_element_by_id("username").send_keys('11111') #user
! g, |+ A% S; c$ k/ G U& x
8 s+ `. F/ {+ {( N9 P2 g" I. _- driver.find_element_by_id("password").send_keys('111111') #password! W2 X+ n* P) Q, l/ }
! J+ X0 y/ c; o7 b$ f- driver.find_element_by_xpath("//input[@value='Login']").click()- I' T* `) C: T# l8 L0 P& X
7 B: ~- }) v2 ^$ g8 o6 K- driver.get("https://youlikehits.com/youtubenew2.php")
" r6 l& N+ U0 X
6 S5 _% A6 h4 G- time.sleep(1)
' C$ K/ i0 d5 ^' I* q% y, p- E2 H' Z
; j# k: u) A3 N4 [2 A- #Try Again
5 `9 e6 |6 _' `# i9 { - & r" f4 K" A. P: B! R) m: r' v
- def checkRefresh(driver):; @& b2 u0 {. E e/ x& x! u9 {1 ~
% k' S( l9 k4 `! }; t- try:' ^* {! v9 ?' v: U
- 9 h2 s& T3 O8 p+ \; o* n5 _
- Refresh = driver.find_element_by_id('loadmore')+ C8 `% w* F- H _* H0 q
- + G( \, r8 y) m' p
- Refresh.click()
0 b5 B5 @! Z# c& o1 P
, @% O6 A" q+ {* o: X- driver.set_window_size(1280, 727)
2 U, c7 J: Z# V f) T7 w - , D" k: S/ l3 U) A. ]& \ c3 i
- except Exception as e:
7 Z: l* G; O5 f; ?: F6 R& o4 }
3 Q( o; x, e; P! A! {- pass
3 u" Y8 t5 k8 V
; [0 q) ]3 Q/ s) \- def checkcaptcha(driver):$ P, K2 a+ K6 [9 n& F2 O
2 Z2 L- F. v1 f# r- try:
* e4 Q9 [! w2 m8 {9 U" |
! {, q7 _2 e& U9 K7 ?; `- captcha = driver.find_element_by_id('captcha')0 S W& {9 ?: k }7 v3 W
- $ Q& E. B2 M0 f1 E
- print driver.get_window_size()' d6 `) X" W) a$ \& ?! M1 u6 d
( }# C6 h- l2 Z& d& I, U- time.sleep(2)5 w; S9 Y% g1 V' l% B9 i' T d
- # D) H+ d) @( x, [5 v& G
- driver.save_screenshot('/tmp/screenshot.png')9 f2 [8 o8 {) c( S- Q( X( w; a. d
- : h9 m# [* W2 W- F% C2 _
- im = Image.open('/tmp/screenshot.png')0 S" J$ f0 v& K: v( e# o& c
/ N3 K' q; n) G9 ^, |- #取消headless模式
+ {! j C, a# U
3 R. I! n. E2 B. w- #a = im.resize((1269, 610),Image.ANTIALIAS)
, s r6 u1 a7 K/ q9 {3 Z% U: ]9 b - 7 K4 J; j7 R* v! M* D3 \
- ) r B& H- J" I1 y; M+ K$ }
+ ~/ y( u4 o0 |- #开启headless模式5 n: F, u' I$ n
- / ~7 i# ]" a! _3 }
- a = im.resize((1269, 727),Image.ANTIALIAS)
! W6 H1 w# G8 E% A% c/ h4 K
" R6 ~$ U& G/ b! F' k. Q: `5 [8 z- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
3 v- R( e4 M! c( ~ - # @) J6 W# `2 r* M, X7 f
- left = element.location['x']. o# z3 A( {$ j& S" t8 F8 ~
& Y6 v6 w2 L, f" J! g( S" |' a- top = element.location['y']0 A! q8 L) }0 r! H
- " }- W& j. X2 U: Z2 }* W
- right = element.location['x'] + element.size['width']
, ?) M' ~. v& w+ v, S
& r$ A$ w; ^, J* u8 l$ [8 r# G- bottom = element.location['y'] + element.size['height']2 H( f. J" C0 m3 O* A/ ?: X6 Z
. H# G( ^8 A+ {% h- 0 O0 l2 ~4 s; n& |
- ( N6 _8 l; m4 Y4 S7 l' Q6 R; O9 U
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
, W. {, ~0 z/ L+ S) f - 6 j/ `9 l7 |( E1 `- G+ ^0 @
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")( I; R. p* c% x
3 [- N) C% Q4 J1 r) ^- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")$ T( G9 Y8 } K' I7 r: U) Q9 s
- 8 X# t2 n9 Q0 M7 N, Q) f0 D
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
: t9 z* y. ]3 O, P - 5 W' b3 D, ~ c" Y
- image = get_file_content('/tmp/screenshot1.png')
# R+ U5 W' s, K! N - 0 E/ J1 \ w" F3 l2 G% r* z: i7 L
- a = client.basicGeneral(image)( O% g# q+ _6 M( B
; w+ `" e, G' N, Q+ \$ f& k5 x- print a
7 M4 e$ a* d( h" Y7 w - " w. \* [$ W3 _) H! I4 `' j6 K
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
( f9 W4 r$ H2 ]8 B3 W- t1 w: I) n
$ q$ g$ ]0 p" k5 _+ K6 m. P# M: O' F- yzm = re.sub(r'\xf7',r'/',yzm)0 H5 [ }. J( y' M
- : N+ b2 |3 h1 {3 d3 g# C
- yzm = re.sub(r'x',r'*',yzm)
1 M/ U0 t6 i- q7 C: q# u* p - ) P6 b6 X& \- h
- yzm = re.sub(r'X',r'*',yzm)7 Q8 Z6 C/ M- r7 N& h# U7 ^" F
) b x4 W6 r4 M; F7 ?- yzm = eval(yzm): d# M1 ~+ s* e8 }, Q
# e( ~0 L- G; ~; z. r% I- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
' C* B- g4 ]$ y" ~, t1 Y" q3 N
9 U- N& d! Q' }' v1 `- print '验证码: ' + str(yzm)9 g5 `3 e, H6 I7 Z% Z0 `
- # L3 q; s7 {$ U1 o; K7 Z! m$ E; z
- driver.find_element_by_name('answer').send_keys(yzm)9 B9 f$ D& [% [
- # P' M& @# ^3 ^; k0 K1 v
- driver.find_element_by_name('submit').click()/ p5 \0 s& W& V7 |) X
- - x# {4 y, b D
- time.sleep(3)/ [0 M( b9 M2 R' w' b1 L2 t
- . e! J& w9 O |0 u
- return 18 Y) c2 ]& ]- m2 i: X
- ( Z" z4 \9 ~' Q
- except Exception as e:, \2 E8 P, o* I% P+ J# D. u
r( X, D; D/ `) d- return 0
/ W7 L$ A. F) W4 U) l - 4 u, s8 L1 A7 C( I$ Q
- def followbutton(driver):
; R3 L0 X1 G% `9 V3 j
7 p% F; Z7 |. [: h$ W- try:1 K# l" q) e- E+ m
- # T% J/ l& z6 u; B
- driver.switch_to_window(driver.window_handles[0])3 ?6 e1 ]4 N/ K% O; }0 M, S* k8 a
- : ]- X! p) ]; y: Z4 h8 \" e! @
- points = driver.find_element_by_id("currentpoints").text% r4 j( N. z: h4 R. T4 j t
- 0 r& ^9 v9 Z$ Z2 x4 a( j
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click(): k; I/ z: [- z+ s; N" Q
- ; t5 v( E8 v+ R2 \% F
- driver.switch_to_window(driver.window_handles[1])
1 S) \& l' L* W! M% q' } - ! g# f: _! E! z$ B
- VideoSource = ''.join(driver.page_source.split())* k3 ?) b' n! h4 x( A
7 g$ j- u9 I* q3 Q) ?# T- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:. X/ X: T+ m* F
5 Y/ k' Z g S8 f% h; F- print VideoSource
# T: z! k) [) k. P. [ - 0 ]: M7 \6 V5 Y( |! i4 g8 K2 x2 r
- driver.switch_to_window(driver.window_handles[0])
& D% d4 C8 ` X P. e# A - 8 K" J% _# q! ]0 i
- print "Refresh..."0 j8 Z7 a) G4 J/ |
- 8 H$ q: e u0 H' k6 ^
- driver.get("https://youlikehits.com/youtubenew2.php")0 [, t4 N3 `5 `
- , {1 d; y5 p: p l0 Y+ U
- time.sleep(1)* @$ H! [" |8 s5 d: x
) q4 l/ _+ u9 _# j( w' P- return points
. e2 n+ G: {- L# i S% Q6 G - * L6 g; |. g. \: I9 g) Q6 A
- except Exception as e:
" z' \- ^+ ?: A" [- {2 d
; J& n: S8 n6 Z( E% E- return 05 P7 `4 N, P' ^$ Y9 S7 ^) e2 s* m
- 2 Z3 c1 R: V7 n, i j
- for i in range(0,5000):
0 C; a; G! y2 U7 Z
$ \1 _5 y. R; B/ O9 `- try:
0 W, T. y) _; ]+ l$ C
( \; J4 h( {, @1 o4 U' S- captcha = checkcaptcha(driver)' c. }3 {) n4 n- P8 [
A9 V4 J' @# a- time.sleep(1); {! A/ [- u" G
- 4 f# o ]3 f5 T# A( k
- checkRefresh(driver)
0 W* p" k- ~( y' e8 P8 h O
0 F3 g {; _$ @- points = followbutton(driver)
+ {: K$ ?0 i, q' s# ?, S) h
/ h+ _0 m8 K( x" w7 M9 v) |- time.sleep(65)
9 ]) ]3 u# ^+ L) K1 l1 S - / A2 [: v+ ^3 u R! m
- driver.switch_to_window(driver.window_handles[0])
2 S) T8 ]( A2 `- B, T, {) M1 n6 i
3 a) ^6 Q/ V, U D6 \- tmpp = driver.find_element_by_id("currentpoints").text
& q! Z! V$ K" g7 O2 A
' F; @; ?9 A& N( M1 S- A5 L) h( b- print "points: " + str(tmpp)) H- n! ^& T" s
- . a8 [$ S2 \. y+ Q: a/ w7 s
- if points == tmpp:2 V3 K: j$ n. L6 `
$ O3 k2 y# T" Q! P- print "Refresh..."
2 L' B- ~7 m2 q" B+ d. e5 t - ) q+ a; n0 j7 E A. S
- driver.get("https://youlikehits.com/youtubenew2.php")% R; b5 i4 R; m2 C) M
" W2 z9 Y ^0 ?3 O6 f( @+ W- time.sleep(5)# o0 A( i% h( N7 _: a5 V
- 2 D* a7 [7 |# O+ _4 P
- except Exception as e:
% Z( `* y- v; ?0 G$ J9 i - - S- A' y U% S; o' J
- driver.get("https://youlikehits.com/youtubenew2.php")4 I+ |+ p3 B2 j# @7 ] D
6 C6 U/ X4 T: K4 |( O( ]- print 'error: ' + str(e)8 \; W- H7 r' T4 {, K
- ! m, F" b9 p( a, ?
- driver.quit()
: Q5 t) Q# L; B. p4 W# O9 e
复制代码 6 h9 } J. x! L- b6 n
% ~6 W( A& F- z9 s1 U" h5 R5 f2 ~* ~9 O, M0 g/ I
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
1 I0 c# M" z9 ^9 l s6 f: w8 ~4 [7 q, u: W. w& m3 t
|
评分
-
查看全部评分
|