|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
* K5 M W3 Y$ [6 M' l; [6 k: I( p1 ]3 r# n+ ]0 _7 ^
' s/ n9 k* [. q2 ~7 R0 `& b" U" ?- 4 [+ r# w" Q2 V6 n9 O, }
- # -*- coding: utf-8 -*-8 R: K) _; |0 b, n
, ]! I0 w5 K' ?- from aip import AipOcr* A/ U2 O. a. L. ^0 q* l
, V0 H* D) l9 [% P. M: m9 \- from selenium import webdriver3 r& a. ^4 X' G! J O- E8 n* q
- S c M9 @0 u3 Z O
- import time
8 ^: E$ k4 n; B( U - . }: O' |2 Q2 q' c9 g+ T# U7 ?: p
- import random' y( `& p8 _# C: u
+ b- N* ?3 R% [7 G0 q: J. K6 _- import sys,re1 L7 K. Q6 R2 k$ M( s' [
' V3 P+ D9 u) z1 M9 m" e+ a- from PIL import Image, ImageDraw,ImageFont+ ]" y& t( r. t7 y% r( v
- ?6 x2 v1 f6 ?+ q% z
- """ 你的 APPID AK SK """: t& g3 t b6 @! Q' m4 R/ r
- # w$ `9 y$ j- A- @: m
- APP_ID = 'xxx'! ?: ]3 [; X @7 b2 B! p
+ i$ o" \! |8 d% P: I- API_KEY = 'xxx'7 R$ a, h2 [+ Q
- # Q& i* z7 f5 V- E$ T
- SECRET_KEY = xxx'" Y9 O1 M% N+ m b! G- T
- 9 f9 ^7 I; _6 v x5 ^6 E
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
5 c6 {9 G, r+ T) C/ u
' g8 U8 n9 G! u! h* F+ `. k6 }- #PROXY = "127.0.0.1:8118"
) b- I1 s* m) {# \9 Q! Y& b$ V0 N5 o - 7 n3 u" T3 x* K5 j# B3 `9 X, k& l
- chrome_options = webdriver.ChromeOptions()
0 U( I; [0 q% f$ @ - * ~0 |9 B r9 n8 g) G' w
- chrome_options.add_argument('--headless')6 ^# r, Q$ Y! N. n
- * g G+ p' P7 i$ V& O6 r/ b
- chrome_options.add_argument('--disable-gpu'). T! r5 q( R b" o# U- X
- " B! D) L% A5 Z8 @; j& P3 e" L
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)4 k/ e6 k" V- Y) m N2 t5 s( S
- & v+ |3 ^ f1 {' Q* h' |5 X
- chrome_options.add_argument("--incognito")! U$ Y/ R j0 e7 ]+ F
- 4 y2 G1 N3 d9 Q" Y9 N( {& C
- chrome_options.add_argument('--ignore-certificate-errors')1 A' T9 W9 C( o! u2 @, {' I
- 1 N! U" @7 ^1 h. U" J
- # Win4 k$ {$ ~1 w- r$ w. ?# F6 _- ]
- + |, s7 U* ^2 ~# S, \5 B: N
- # chrome_options.add_argument("--log-level=3") F) @# b) _2 j3 j
9 T2 r: ?: [( ^% v' @8 {7 N, y- # chrome_options.add_argument("--disable-logging")
8 u& K. b) ?! I3 B) \; F" R - ; M, J% b# ?2 u: d
- # chrome_options.add_argument("--disable-logging")9 X1 J1 M. ?; i# `: _" P# _
! b2 _5 p% H( S0 [5 {+ w. u. ~- #chrome_options.add_argument('--no-sandbox')% Y) R" z" \! C. {3 O8 D% n
- 5 l ^2 n( S1 I
- """ 读取图片 """: O9 h% r) y+ y- M0 W: ]. _
. i `$ A; A5 r! ^7 b/ i- def get_file_content(filePath):1 E+ v8 |% _3 x$ c. ~2 R8 }% }. ?
: D+ r" O8 b( J9 R9 U4 e5 d- with open(filePath, 'rb') as fp:
& c4 x8 F1 v' J2 P N8 \, r. c/ R
7 W' o% f9 s- o5 Q- O' q- return fp.read(): p" w, L3 U7 s Y1 o" Q1 O
- . g# H$ U; ]& i: W r% Q- @
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)+ Q! M0 `2 e, `+ g! C! N O
- 5 ]5 a; ^8 K& S7 C" ~6 ~
- driver.set_window_size(1280, 727), b/ r; ]7 U1 u) [
+ t3 v% R. I: t3 k- driver.get("https://youlikehits.com/")
2 n( Z/ c9 V- d% q6 U% d - # h8 v+ y7 C$ u/ d) ^9 A
- time.sleep(5)) M% e6 d+ k/ K; w3 A, T
1 [: s7 t; ]4 J( ]" Q- driver.find_element_by_id("username").send_keys('11111') #user
' ~+ { {0 u: ~. b$ ^0 p - + t; F p, m7 u+ {" G1 U9 O
- driver.find_element_by_id("password").send_keys('111111') #password9 Y1 p* l( Y9 n& r9 H1 m Y; H4 A
$ }- x: O# V7 n5 W1 p/ X4 j- driver.find_element_by_xpath("//input[@value='Login']").click()2 i: Q7 a( I: T0 a
2 ?; ^5 ?: l4 i! `8 [- driver.get("https://youlikehits.com/youtubenew2.php")' J) c. g6 c1 A8 T: U+ d
- ! B# J" V5 M* r" A6 _ B
- time.sleep(1)
- V8 z" a1 Q- v9 x6 ]# ~& |0 \
) Y7 P, ^! L7 @- #Try Again( N2 k; u' ?9 j* l. R. F
- , T/ e+ m" x$ {
- def checkRefresh(driver):1 c: Z% @2 i0 c
7 I- W2 u* v6 v4 R; R8 O- try:1 l1 l0 T e+ W% e
% L+ l( g. v i- Refresh = driver.find_element_by_id('loadmore')$ Y l2 }# k& |- S+ G
- t+ g3 ], e) q: s
- Refresh.click(). v$ w0 z9 R! q6 I8 H" {+ d
( K* {' W# \4 |6 i: D& i& v; _8 c- driver.set_window_size(1280, 727)5 Y6 ^, @# \' s
- 1 l, e( P5 w% M+ f& G* U
- except Exception as e:
. E; W3 C5 h6 ^- V9 l( G
( z% N3 X- m3 k0 w$ n" [( E- pass' {- q0 I- Z8 E# k; C
- D$ |+ i1 i7 T! U: r
- def checkcaptcha(driver):
- f: n! Z: ^% M R& K5 y5 C
( ] F) S9 B1 v" Q7 U- try:* G( j" h3 ]9 f6 A% Q8 M( r2 A
" ]6 f* M: Z" i9 p4 y% I- captcha = driver.find_element_by_id('captcha')$ {# K9 X4 l* s6 x( F# i
% }# ?$ r& G, ^- print driver.get_window_size()/ b0 v0 u: A7 g3 v
$ _! \4 }( v5 \. ^8 _ ?5 X- time.sleep(2)# }. F7 q/ s/ c( Z
- ) e1 T" Z* k. J* h. Z
- driver.save_screenshot('/tmp/screenshot.png') r4 X. }: B! m
. {+ M8 @, Q6 K- C9 c. M% `- im = Image.open('/tmp/screenshot.png')
7 q- P6 Q9 G9 q+ X+ e
7 O) v7 K3 Z o/ j4 |0 c& p- #取消headless模式
2 m: y x) Z' C- U! D; z
- `( |& Q; a- }0 Q% H' ^- #a = im.resize((1269, 610),Image.ANTIALIAS): ~3 v2 q/ x1 m5 C
- 9 l& @& ]4 w& `9 L
- * }3 }- `3 Z3 R* A
1 O, |" d, W0 C- #开启headless模式
- M1 Q- r7 M5 q0 D% Z. S5 J
/ @. |( H6 h' q6 S. U) L# i- a = im.resize((1269, 727),Image.ANTIALIAS)( O3 e/ [1 x7 ]: _" |) j# p o
- / N5 Y+ H: m | U( W
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
6 C$ p1 t: V. r) B - ! S, s* [' w. }" ~% R" d
- left = element.location['x']5 w B! Q1 v/ a: n- s9 D
- t3 L9 c( o4 m' j- top = element.location['y']+ r; A- l7 a" s/ s) H2 A1 n
- K3 U9 D+ c: p7 p* ]+ M
- right = element.location['x'] + element.size['width']
: T8 c- B% \7 t
( f& J- k9 [! o, C: S2 K- T5 Q- bottom = element.location['y'] + element.size['height']( k( o# N5 S b+ h" A5 G9 t5 ^( ?' E
- 9 \, h. Y3 H1 h
-
) [' B4 ?/ l6 G, M. a
2 P$ J0 r5 Z! r' p- #element = driver.find_element_by_xpath("//div['#captcha']//img")
, j' D4 e7 H0 `8 d' Z' W+ Y - . L! K# ^ Q4 w8 \
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")$ [7 c% ^ d' d: y3 _4 J
1 L2 [' v, W/ y. D1 b6 K- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
y8 i1 w" Y9 ^1 N* u
; A/ J" V! x9 q) J- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
% Q( J* ~ p) P" \
" _8 w# H/ O0 d% v6 W+ m- image = get_file_content('/tmp/screenshot1.png')& f3 P. y# b7 u8 b+ p
# b# }/ j }' E, o' G$ ~/ A: v- a = client.basicGeneral(image)5 v5 |7 z$ o ~! i- \' u
2 G- x8 G( I8 [: A0 M( b" e- print a9 G! z# A# [4 d: x
- 6 ^5 M5 e, y p# v& I8 P
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words']), `# ^6 K$ y6 g5 w; a- J
- y/ a0 u/ ~1 h7 ]% w
- yzm = re.sub(r'\xf7',r'/',yzm)$ V+ Y6 d5 o% B. A; N, s
- 8 r$ i# b/ t* X
- yzm = re.sub(r'x',r'*',yzm)5 `/ c# s, Y2 P! r
- 2 D# m- i( E" s6 [
- yzm = re.sub(r'X',r'*',yzm)9 ~0 ]/ c; W$ L; K
- ) ?" F' o# S# [- ?/ L I
- yzm = eval(yzm)' R) r0 A O/ m2 | G7 D
- + q; q9 `, s( G! H& W& u4 w
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
' Y% i% X) f/ G8 \# n' k: B
9 `; a% z5 x: Z2 z' m, u- print '验证码: ' + str(yzm)
8 i) s& O% t3 K& P% ?+ V
m# [0 I3 m( j# F+ C$ [# H- driver.find_element_by_name('answer').send_keys(yzm)# N6 J, l' W% s. s0 ?! p' o
5 v1 Z- Y, A4 h- i) w+ F- driver.find_element_by_name('submit').click()' l5 [' [. [& I ^/ v$ \( L1 A
3 f( X O9 I$ _- v1 R5 L5 \- time.sleep(3)
0 `; J' h9 p) w3 g$ {! L
9 K, N0 W& {4 @* w q/ C- return 1
* c& Q/ \# _& L, ~. ^ - 4 x0 c5 h, O" Z. r$ e+ w
- except Exception as e:/ a: b6 t) v o7 s+ e/ O6 T. B! s
- 4 l, g" ]4 H+ i( _3 t/ G7 _
- return 0
* [: B4 [4 X$ X* ]' s; N$ f& a
# z9 I. e( ]- t0 V- a0 @- def followbutton(driver):
% z+ n: `+ W% N0 B# K4 B
\3 P1 Y+ h a& u- try:( |. @: e: t! P$ i
- 0 w" ]: Q3 W, Z
- driver.switch_to_window(driver.window_handles[0])$ l( ?; u! @% M6 }" j! V+ h2 u0 l2 ?% l% Y) O
2 G& g; q, H2 m/ w; P, q- points = driver.find_element_by_id("currentpoints").text) q6 N/ p( Q! Q9 k2 b
- ) f9 Z3 s! P' j( O0 P" m9 F
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
, a" v* K' q w* @0 C, ^: u1 |$ l" j - 9 ?1 J. v" [/ z+ _. c
- driver.switch_to_window(driver.window_handles[1])
2 o0 M8 b( g0 k/ }% B
# j+ L# A7 v4 G, M5 O+ {! e% R$ A7 Y- VideoSource = ''.join(driver.page_source.split())' E1 h' |# ]0 L( O
6 C9 U2 M/ c p- r, H8 n% _- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
; i% p! Q" G8 c+ O8 T+ | - 2 `' Y2 R) {4 l+ `
- print VideoSource+ {- r8 @: {5 e5 R* D; c2 h! ?
- m& v/ g) c* F* m" X2 P% O# e- driver.switch_to_window(driver.window_handles[0])2 Y |8 ~" ?) J
- * M [! E! x! F9 `* t
- print "Refresh..."4 E+ e6 h9 m: y+ X; q3 i
- ( _9 Y% F( c, x$ }8 L
- driver.get("https://youlikehits.com/youtubenew2.php")
~6 s9 t- } s. p0 k - / I( Q- O; r1 S8 w# H3 _! Q5 ~
- time.sleep(1) H2 r _, A( m! t# |) O1 h5 V
( I) Q3 K& }# ], `4 E/ e, n- return points
' M- H! {+ r/ @+ q1 y5 R
# R/ G& E: B% I5 h/ X8 K: ?- except Exception as e:) i' u7 C+ D7 r1 B# I' _
- ' t- F2 O' d3 t* [
- return 0
( t: n+ [6 d$ { - # z: \4 O c8 c+ z! {# x6 K
- for i in range(0,5000):
! r; }0 h2 P0 h Y. A Q - * A; P7 [* h, c% ~0 D! f2 o
- try:( z" d8 N$ F& m v. s
- 4 d& T9 n2 R3 c2 B4 F2 J/ X9 s
- captcha = checkcaptcha(driver)
$ ^) L. r; J$ D
$ v4 u: A, R$ A) n- time.sleep(1)
, t' `6 ]% Q: o$ M
4 o* ]4 j8 t9 Q2 a- checkRefresh(driver)0 s, Q9 N% Q& w O" K
$ j. d) f0 p6 {4 n* K( @5 s0 z- points = followbutton(driver)
; b0 X9 v# G- p, v9 U* G& E1 Y0 } - " i( l$ A; p8 \7 Y3 }/ M) @+ b
- time.sleep(65)
+ g, m1 F7 }# f3 r' i1 H7 C: t - % h# M9 _3 C/ v% n
- driver.switch_to_window(driver.window_handles[0])" f! t2 W8 r8 Q: g' p( U3 g
- 6 y* a$ A8 {: V) n2 d
- tmpp = driver.find_element_by_id("currentpoints").text0 V: N2 u* b0 _6 P3 v0 S
) U0 m/ I. p( M8 _ G) K9 F& E- print "points: " + str(tmpp)# N# [. c& w, ^( k$ D, i7 e
3 Y; \! G; Y+ ~ Q- if points == tmpp:8 K0 k3 ^2 X( O, j6 H: }
) A3 o& k2 v, {. `" b) u% y- I1 c- print "Refresh..."
3 R7 \* |% {- D4 D: _6 U& M) d" f. C - 1 L0 k$ n ]; G$ R& n3 W8 k! C
- driver.get("https://youlikehits.com/youtubenew2.php"); d8 u* l# k T2 a% P; h1 l0 b& q2 h
- ' P8 L7 B( u4 n1 ^# q' x( u& c7 _
- time.sleep(5)
$ e% S' m9 V, V1 }. k2 M) A
6 S/ Y; I6 {2 |5 e- except Exception as e:
- ]% `: F! W/ V$ ^
[' l+ d! n6 U. L( b0 d- driver.get("https://youlikehits.com/youtubenew2.php")
7 _8 s0 j6 Y9 G2 p9 G$ N, K - 0 L( H* r" _1 M7 k4 P
- print 'error: ' + str(e)' K/ w8 B" j2 m% [5 c
2 y5 y' _) |& P& A1 e h: [: g- driver.quit()0 F# J' l+ l4 f+ _! d! W. N# j
复制代码
# d* m! T: Y. e ~! D8 K
! G# M# a, U: B# Z9 Q) S
! e2 l7 Z2 J N8 n* `7 B# @5 u 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 - b$ G# ]: u6 l1 S; s4 |+ J
1 `/ F( J& S% a1 u k2 s |
评分
-
查看全部评分
|