|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 : E" E$ f2 f8 P D7 i
+ f7 Q2 _4 S7 t3 c+ _( l
; l* f' K2 O U( m
% D" p+ i; [) H* M9 O( k- # -*- coding: utf-8 -*-
j/ `3 \" c) E
# m7 B1 L0 u+ I2 S# ]% O$ s! a- from aip import AipOcr
" c2 r7 Q2 y9 B+ a! Z! l' n. w# O - , H, g6 G3 e$ P7 k X, d: ^
- from selenium import webdriver
# O$ u1 P/ z e! m - ' Z6 e4 E0 W) {5 Q7 u$ E3 X
- import time; Z7 `( ?8 x" S- |
- # R( L9 c, h1 f$ }2 n
- import random5 `! }" l9 J+ K/ ?2 m
k! x2 y* I3 R- import sys,re* w# M, `- C& }
- # L2 R: ]( Y6 Z6 t% t7 b
- from PIL import Image, ImageDraw,ImageFont6 N1 J8 m4 ^; s& @5 N3 {
8 m( e5 b7 w7 h8 X; T+ I2 ~3 w2 k+ d- """ 你的 APPID AK SK """
; p, p$ o4 [+ c$ S9 \ t$ x - + |2 E' j7 {* `
- APP_ID = 'xxx'; W. {5 F; p4 A6 S3 K
- & j4 N2 W8 ]- c9 B( S3 f1 b
- API_KEY = 'xxx'+ v: j3 d6 i* U$ P! R
- # y% u4 R2 a$ P5 a
- SECRET_KEY = xxx'" T1 z' A5 {' S; ~# T! s3 ? @
y4 @9 e M8 U/ I- P2 _+ C' f1 T5 ?- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
2 R) x2 u; m m - 5 S: e7 U/ n; v9 y
- #PROXY = "127.0.0.1:8118"
@- c1 h9 h/ S( V7 X! D; k/ ]8 n - , ~- a* ~/ E$ f" D/ \) g
- chrome_options = webdriver.ChromeOptions()
( ^3 I- D6 s; w- k% C. Q - " i0 W3 z& j1 a9 E& q- j3 Z
- chrome_options.add_argument('--headless')
0 H* v' p" B2 a+ k O* t. ?
. k( v6 j' T$ c$ O, m2 Y/ `- chrome_options.add_argument('--disable-gpu')
8 X0 Z2 v1 x$ ~$ h' Q - 0 b' e8 ^+ ^$ k: e9 ` \
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)" F6 m" Z) U2 ?" ^/ Y+ U
* m7 V7 d( k* n' G2 t- chrome_options.add_argument("--incognito")& S0 x# l8 @7 _# F6 t7 J3 F
- - D. _2 ^4 J. S( b
- chrome_options.add_argument('--ignore-certificate-errors')
( t" c1 j/ d7 R
) X! _) c4 ]) P/ x$ j2 h" ~- # Win# s5 Z) s; I$ C& p
4 |7 C) w: D- I8 Y' y9 g- # chrome_options.add_argument("--log-level=3")% P7 Q0 N q" ^1 g* @4 P0 h+ f
- ! Z- [8 w6 R, r
- # chrome_options.add_argument("--disable-logging")
2 N" j2 e4 n, [: s( K2 V7 U# o - 6 j$ n* I9 L8 j( k/ c* o$ m; j
- # chrome_options.add_argument("--disable-logging")
$ X* a9 a# v1 i8 H" w* F& [ - + x( P/ y) e: A- [( |8 p
- #chrome_options.add_argument('--no-sandbox')
6 [. [' |, M9 T0 m5 V( x - 4 n! z- j( Y" |" w
- """ 读取图片 """8 R8 m% o$ Q4 d8 m) F( v- j% L
- `3 N) }: z6 |; i" s6 y- def get_file_content(filePath):
/ F8 u# o% |% z( b
/ v$ b( Z1 V5 f& p+ s8 ~" X- with open(filePath, 'rb') as fp:+ n3 T7 p: e( O, S- V
- / R: b l( m% T( Y, A
- return fp.read()% `- u: q: c8 k* A% w4 z3 Y& `
- , E3 f" c: L* r
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)+ y" T% s% Y1 T6 @
# Y% L# ^% p% c6 n* [- driver.set_window_size(1280, 727), Z( h( h1 i: T) E& r3 u
8 b8 [9 c$ W; J% x/ |% J6 t/ H- driver.get("https://youlikehits.com/")
6 ]5 }& M3 e- m# N0 N - " r- p% p" ?; Y8 b/ y
- time.sleep(5)* l$ e4 [; Y) S u
7 z2 a$ @1 x" B' Z- driver.find_element_by_id("username").send_keys('11111') #user
4 B/ R# f6 @$ I$ }% o" N5 o+ x. s - c! u7 L) ?! z6 E1 C7 B& X
- driver.find_element_by_id("password").send_keys('111111') #password* f& w: k; s0 C6 S
* Q# t J3 B( y# D C; g& a- driver.find_element_by_xpath("//input[@value='Login']").click()& d( U0 w6 x8 J8 |$ V& ]$ x7 N
- 2 M3 K8 A, B- g" |: y% {; j
- driver.get("https://youlikehits.com/youtubenew2.php")
$ ?' u8 {! S+ M( t7 m( t* [ - 7 ^# }: F; a( P4 h
- time.sleep(1)
1 g0 c7 {& M3 g% R% b* G
1 |2 l( [, q# j! |! y- #Try Again
& A9 F: f1 p% ~& @) o+ g - 1 S3 J' R5 _" U0 u& S- `
- def checkRefresh(driver):% z% B- x; @2 a; q
3 Z# s# {8 @- g$ r0 ^, A: [- try:
* m, A7 Z2 q& q# [' o' k
8 @+ ]: V) }5 k; s. B- q: ~- X- Refresh = driver.find_element_by_id('loadmore')
7 L E. i: a$ A( w - - W' n1 @ V: L2 w Z G
- Refresh.click()
m: e8 z( o9 {: _2 \0 M. \2 @ - / D# l1 {0 |4 s* [" S7 R8 C6 a& }
- driver.set_window_size(1280, 727)
n! q1 C6 J7 Z) F7 W - 9 x! V8 _ V2 f9 O! c& _
- except Exception as e:# x1 w% F( X- X6 ?" L4 D
6 K% A2 U1 {2 n& g" R' d- pass- P6 a7 O- k2 h4 i' L. [
- # Y# ^3 L) u0 S2 E1 R
- def checkcaptcha(driver):& _$ q! m8 R! N$ r
- * H }( k1 `$ B. F6 D' G' O
- try:
& g9 u% Z/ I. m* x% L! J1 K - 0 R& \7 ]. L" N5 _- l/ U
- captcha = driver.find_element_by_id('captcha')* I, ?- ~& v% o3 u9 T
- ' [" a! x& u0 ^
- print driver.get_window_size(), k! F) ? }5 y5 U3 U4 f
( V6 ]5 n5 k! H& U. H3 m/ p- time.sleep(2)
( q- P: i# ^. }, E1 [. W' u - $ Z4 G& W, ^& Q
- driver.save_screenshot('/tmp/screenshot.png')
) ?8 X- \& w. b1 M% a/ D6 t; e
- r& }* b( f) b6 n4 E; K' r- im = Image.open('/tmp/screenshot.png')
: n' W/ ?- B9 ~ v - 0 w; g; e# D, A' G: V
- #取消headless模式: z1 Q7 p0 z$ c; u' `2 ]# V. S
/ r) n' D# w6 |7 X+ z# M! ~- #a = im.resize((1269, 610),Image.ANTIALIAS)
( J3 Z' _0 p$ f7 a; t% B# _8 h
" I( x* b8 n# S7 H+ z) R- : x4 r; |- ~, @! Q
# o9 C! }' P8 F1 r- #开启headless模式2 h6 m' P- r! W3 m, o/ R/ N r$ x. C
3 j+ W% D3 P+ x( L1 O1 @8 t- a = im.resize((1269, 727),Image.ANTIALIAS)
6 o+ W6 Z- i9 h3 r2 H
5 M0 P& E Y& T6 R& R, f: W- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
' `1 [$ o* A' O- q
% H+ Q$ T; a/ j* ^) ~4 r' ^- left = element.location['x']" ` X0 J6 r5 K0 k3 ?5 ?
p/ `: g* {' p1 g9 _/ [- top = element.location['y']2 O3 l, F4 C, [' D
$ D! f! F2 @! a7 \5 v. g4 P- right = element.location['x'] + element.size['width'] V) u: t! @' H: T$ M0 g! M" D
- " M3 d0 [3 F/ E
- bottom = element.location['y'] + element.size['height']
7 V+ f. S, u/ k - / G8 K+ v ^( z9 O
-
/ k4 S1 N% y3 m1 ?* n7 c0 F" v
: v" u4 {6 _/ S. U! y8 c- #element = driver.find_element_by_xpath("//div['#captcha']//img")
! V, z2 S) @+ Q% H& m( E6 X - 2 ]' o0 l/ I) F. @% M- [% t: B2 N
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
2 c! ]9 }/ d. }' d6 O# E9 A - + K& U( q: ^- f' m# O, U
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")8 D# e" H5 S. b
- . a0 K& t2 j" T& R
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')# j: M: f3 U" z S" N
- 7 v) M3 u/ F3 ], \( N$ D
- image = get_file_content('/tmp/screenshot1.png')
& J2 D4 U& f w% J; c6 ` - * e6 ^1 b% J s& \ E# M
- a = client.basicGeneral(image)7 U, U8 q2 T7 ]
5 H" f' B# ^4 x9 o$ k. r6 ]- print a
* \- p0 q" T/ z+ F1 i3 ] - 0 h" b8 y& I# d' X
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
% a- f$ a+ Q, \0 [; k+ Z' k
`% x7 p1 J1 x, ?- X! V- yzm = re.sub(r'\xf7',r'/',yzm)
, C9 Z( J# t, |9 r) h0 ] - 5 V* j6 ]# C: g) Y A; U
- yzm = re.sub(r'x',r'*',yzm)0 Z6 P& Q5 {! Q
- 5 d4 U* _) ?+ W# x7 o
- yzm = re.sub(r'X',r'*',yzm)' u0 D; u3 i* ~7 H. H
- 2 K* h0 |" a% V: z! K. f4 q
- yzm = eval(yzm)
/ f9 M$ T4 J. z* v - % m& y8 ?/ r( j% D/ r5 E# k
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))' E; v W" n( u
! F* j: h! d7 O) A6 N- print '验证码: ' + str(yzm)- x7 ?; C3 p7 w' s
- * S+ S! a+ S2 H' n0 n" a
- driver.find_element_by_name('answer').send_keys(yzm)
9 i' n- G4 y3 x. p2 q( g! o
$ @( ~$ F! n s+ R* r, `2 u# {- driver.find_element_by_name('submit').click()& N* g' h' ?' G) l! _$ z. M" f w
3 B$ \1 p* V1 K8 i# x: f- time.sleep(3)- h+ m( b3 g* w# ~* b
- $ Q' p4 ~% |& X! V( Q, ^) i! [
- return 1
5 w; X) z! W: ]/ C( D - 4 r' J# a8 z8 B0 j
- except Exception as e:
. t, | c3 `( t* M1 s - ! N9 S g+ K) f0 b
- return 0
3 J y+ h8 Q8 ?$ [! v( k9 d - # j2 g6 m; f" B1 D, L: b
- def followbutton(driver):
; s9 s: T q# Z( [0 _# k Z! c5 F" P
' k5 D) u* ~5 t$ }) h' _2 k+ A- try:
. c- F, p$ \. n- X
) A* a2 ~& p6 p6 I) [- driver.switch_to_window(driver.window_handles[0])
/ S. g0 L" M5 u3 J k5 f
* l+ T$ I1 f7 S1 f5 i% f5 T8 J) v- points = driver.find_element_by_id("currentpoints").text* V* g9 f: \5 Z$ N! w
0 N* \7 A- }" \ Q- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
+ N- r: y- x4 b
% E0 r4 n4 `% l1 X- driver.switch_to_window(driver.window_handles[1])
, c: z6 K P7 F) s - ; I5 j6 e+ ~, U {" O h" I
- VideoSource = ''.join(driver.page_source.split())1 ^# W$ k4 L& d, M4 D
u; ]* q/ b j9 L8 Y* _- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
: W# f" Y- `' H* s - 4 Z7 Q- W6 h1 M' O& e
- print VideoSource. J" J Y/ H. ^6 M, b
! A* c2 |. F- s+ c7 V, w- driver.switch_to_window(driver.window_handles[0])8 V) m M3 s- K, h. L& g8 Q
- ' e# s/ @: N; [, y: g! x6 s. ~
- print "Refresh..."1 M* y9 e( ?+ a( y8 i% t3 ?! A
- 7 |* H8 s' Y; }0 W7 }3 u3 k7 N" p) I6 S
- driver.get("https://youlikehits.com/youtubenew2.php")
' B( i3 @, L6 y( o* p& \3 n - + f. M# y/ ~$ G& S- |! `# W
- time.sleep(1)7 ~ t. \0 Z( r) r# Z
- * ?; ]* R. t/ o9 M
- return points: R1 L, O7 e4 n; l5 W! I" d
- + O }9 M8 B% o
- except Exception as e:
# J$ b7 j+ R' @% ~) I: u6 F/ p4 J
! E# |, F4 \+ ?. b- return 0* M& @5 v6 {9 N+ X I
- # S# V" E$ F8 B p3 J; z) W$ o9 N
- for i in range(0,5000):
2 W1 G! E& ~" p. i+ C - , ?5 A/ V7 w0 H. i
- try:
: i3 r+ j- a4 O% ]$ A
0 d8 e g( \$ t* N- captcha = checkcaptcha(driver)! D9 N9 l4 E6 n: g+ l0 ?: f. \
- - m4 O) L7 F; M# h; o- y3 x% s
- time.sleep(1)" h. @1 A% G0 ]5 C& G
- - S8 _& J% m0 P/ }5 L: S
- checkRefresh(driver)
! k) J9 }4 n( U$ x& Z: @
! u& y& a2 S$ P4 e! k! B2 H8 v- points = followbutton(driver)
; s' V) i6 |; w( t' _! A Y
% X2 l3 p& `+ K4 N6 @2 J; C- time.sleep(65)
% L4 x$ ?! ^5 t$ [" Z
) Z' c2 l% u! ]- driver.switch_to_window(driver.window_handles[0])0 k( S2 [3 _) |* C7 u$ _5 F, E" k
- / L8 k% D0 N7 b) B
- tmpp = driver.find_element_by_id("currentpoints").text
! D* y# N4 L+ _9 p* Z1 `
$ X( A% B9 S& C) P4 `- print "points: " + str(tmpp)
- }1 `/ X/ K! { - 2 Y% @ k0 F: l4 l/ b6 {
- if points == tmpp:6 m8 Y4 S' x) l; F0 s9 G
- 0 P7 t5 Z% [! X( M4 A- m
- print "Refresh..."
2 ?2 C. ~+ I# E% U' d* p2 M6 k0 b - % t% b+ B9 d. ?9 g/ d* Y
- driver.get("https://youlikehits.com/youtubenew2.php"). N4 o% z$ Q* ~
- + R/ K/ k5 s1 i" x4 E* s7 f/ Q
- time.sleep(5)2 U! H5 |7 b! {
- $ k, A) q5 D7 J' X
- except Exception as e:, L) C5 R; d8 K$ B5 @: \
- 0 U( ~" j" ]# m) u3 o' e* d
- driver.get("https://youlikehits.com/youtubenew2.php")& Q4 q) c# X& ]
6 _) i- Q: y" w7 E" e- print 'error: ' + str(e)
5 w" Y7 R" c) P9 k
# @ f& m: U$ \8 j2 j5 N) v, h- driver.quit()
6 U+ s S0 v, @7 ~, O
复制代码
& j2 u% c C7 t. u6 q
3 M/ M* q" n: F1 V
4 n9 X2 `( E( X( s/ R2 }! w 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 ! ?9 A1 p4 v6 z. _
4 H3 j$ o: Y: `* O
|
评分
-
查看全部评分
|