|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
( C& J$ E3 j: h; I) b) N4 q. \" x/ C& n
* |' j& _* r$ W
0 n+ a! u: ?) P' E2 s$ |; V- # -*- coding: utf-8 -*-5 r8 t4 \) B, x' Y1 |9 k
- , D( d' w( G+ i8 V0 E
- from aip import AipOcr7 v; j; h1 h7 p1 x$ s k
' F( z( s6 H5 m- from selenium import webdriver; E. c3 R3 }$ ^3 G
- , T8 Z K6 e3 E0 J# U1 a
- import time7 x4 J9 H4 o# P$ d C
- 0 ~1 |9 V! Q6 f& J5 ]+ Q& g
- import random
/ z! k' z/ v9 T' P6 _5 ^) Z - - a6 W J8 h( O+ k k3 s6 r
- import sys,re/ G3 m1 M* G: i' L, |/ j& Q" h4 b
j' h/ ? _) v# H" v, J, R1 n- from PIL import Image, ImageDraw,ImageFont
5 B3 a$ ?; |" q1 t# \ - # s$ r2 |$ a$ @9 n, K4 Y. \( L
- """ 你的 APPID AK SK """
* L; {/ P( w& V. _% N% q( G) X8 c - - u, L- K* O. E m7 F
- APP_ID = 'xxx'
0 V, B4 h% l7 a. A! e
, h3 v; t! F, i. N/ Q7 C! h& F- API_KEY = 'xxx'+ ^" K7 L) f- T/ t: d# i
+ @0 ~: X/ M5 x/ r M- SECRET_KEY = xxx'$ U5 C& L e8 L) ^; c) x
* w4 S! ]5 ^8 J2 t6 h3 l- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
+ y1 G: q! T, ]. K' O: P, u - % |! T! N( r1 M% p! t1 J
- #PROXY = "127.0.0.1:8118"
. T% Z9 {" p" X- y7 B; H - : p; U4 Q3 e/ { {- L
- chrome_options = webdriver.ChromeOptions()5 e! I. g9 i& N
- + I* z$ v2 T: ^/ d& z& i
- chrome_options.add_argument('--headless')
! K- D: b; M( B: s; r" A - " d' p2 H3 X1 G4 X
- chrome_options.add_argument('--disable-gpu')
! C8 G) J' y8 |2 ^4 I - 6 D2 B6 X6 q$ j$ G! d. X
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)" p# m, Y2 U6 O# X" ^. l# d
3 |5 k& R% E; \8 ~: ?4 X- chrome_options.add_argument("--incognito")! ^0 Q5 a3 [) e% S3 U# I* E
- 6 s8 z) j' @" c) C9 Z; P, ?) S0 l+ Z, d
- chrome_options.add_argument('--ignore-certificate-errors')/ p6 U9 C# A( V$ ^. H* _
( G4 O9 Y3 Q( I- n" O& F9 X1 G6 A3 |- # Win
3 I) r# e: Q: n$ J+ k0 j1 s: f7 h- n
5 Q. ~, M: b( h- # chrome_options.add_argument("--log-level=3")
: a0 ?: k |) [" l5 \ - ; @5 ?* {1 W' G3 x* t! x0 ^3 G$ B
- # chrome_options.add_argument("--disable-logging")* b: S/ j6 k2 D: A+ P
- 9 k0 c7 ^2 \/ B! u
- # chrome_options.add_argument("--disable-logging")+ k5 N( W) T6 ?* L/ y' @
- 6 z- B2 u1 k+ }3 L( K
- #chrome_options.add_argument('--no-sandbox')
$ _0 _! d7 R) H! Q - % G' u# G/ U: q4 C5 J4 x
- """ 读取图片 """0 e5 A! x5 ~ f& Y3 v( { k
- : p* S6 C$ H, Q" d& K+ @
- def get_file_content(filePath):, ^' ^- U- H4 z, R+ L$ f
- 2 N: Z, V' m, L6 ~) ^; n
- with open(filePath, 'rb') as fp:
. o7 [% S* i6 x5 w - 7 A; E Q5 m" P1 a/ d2 Q g' R; a; F8 l
- return fp.read()
& b o. E7 Y1 t( y9 \ - 8 r/ l% G+ X. N: d& m
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
8 m! y8 j3 u( N. g( a% [
( L0 K- j) d8 W7 _- driver.set_window_size(1280, 727)
% w3 g7 V9 a( l; m+ V& ^ - " E! a9 I. z) {
- driver.get("https://youlikehits.com/")9 t! d7 B: v; r5 D
( M5 e( ^* @1 \9 k M$ y- time.sleep(5)
- D- u/ i/ m& M - i$ O; P4 `- }# M1 E/ d1 g
- driver.find_element_by_id("username").send_keys('11111') #user
! d' i( x+ f7 q, y' ~% o - 5 I+ o }; Y) ^# S
- driver.find_element_by_id("password").send_keys('111111') #password
3 ^) W1 R7 n# Y9 R" ^" B - x: z: m! I; h! X+ k3 j) g) L
- driver.find_element_by_xpath("//input[@value='Login']").click()
# N. ?5 Q6 q: C1 n |
$ d, T5 S# d: K, T, L6 f6 |- driver.get("https://youlikehits.com/youtubenew2.php")/ D, m4 c7 t( r: e4 M: s
5 d l, V. n, y9 ?' I1 F' E- time.sleep(1)
% u+ v% i0 Q/ Q0 T7 z, B
# j7 r _* M# ~$ u3 f- #Try Again
n- {( F& _' g
9 S, I6 n; P6 t5 d- def checkRefresh(driver):
j& O H E; g$ H3 ^* E' o/ [
+ ^2 G, }: `: \+ {1 N- try:
/ K# w, c1 N) Z! d4 ]5 d
! g! y' g# h" q9 C( n% Q; E$ [' D- A# l- Refresh = driver.find_element_by_id('loadmore')
3 b* o8 |% s* {$ T( C; Y8 q) y
; `7 E$ o6 `+ ?7 D5 i C- Refresh.click()! W! z ~; f* U: a: Y7 O4 p
- ; ?0 p& h/ K& E6 t
- driver.set_window_size(1280, 727)
0 e. T. G5 D: u- ]) l O* Z
0 Y; s1 }6 ?) Q% T* c9 o- except Exception as e:3 v$ d9 d4 ]8 e: S* O) @
- - y, W* V2 s! f# l% a4 K& q
- pass
5 W2 N1 j8 `$ Z- Z. o7 b - ) a# O. z( u( r3 K
- def checkcaptcha(driver):/ a% E1 u4 {8 s" _
8 x/ _5 D8 l+ Y0 N$ S, N- y- try:. v' I/ s, L1 |' d' y# S. _
* V2 x! x( A. _ ]$ {( E/ J- captcha = driver.find_element_by_id('captcha')* j, m/ U4 A4 W% K: Z: r
6 _# n- E& V9 \8 ?! k# h5 m+ t- print driver.get_window_size()
! }5 b5 P. Y6 J: }6 o1 y, \
* ?: r' n0 F# m) J- time.sleep(2)/ r% A3 I3 d- V; s4 K3 D% D
- W+ F) `7 z8 b- T- driver.save_screenshot('/tmp/screenshot.png')# j0 q1 [7 o. z# U* |
- , b2 i; m1 G/ O
- im = Image.open('/tmp/screenshot.png')
$ d! d) [0 g/ f X - 8 t, @0 Y5 `6 Z7 g. P2 F5 T# h# ?6 X
- #取消headless模式
5 @( a4 M7 ~" n+ M( K+ e - 1 A0 V2 {; f0 L- g1 C
- #a = im.resize((1269, 610),Image.ANTIALIAS)
2 f! X$ k& [% R! k& ]1 r6 N! ^) ^
8 ]; y6 \5 s% g, k: L& f R-
7 `# w" `" ]2 u7 h - : h$ E, L9 y c2 N1 H* o
- #开启headless模式
! X5 D: Y8 O. h$ a0 M' P' z6 g+ _
) g1 s4 h/ N% p- a = im.resize((1269, 727),Image.ANTIALIAS)
0 X2 _0 L g" t+ z9 L! w* z - % [7 ^6 f! s+ Z( L
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
4 ~0 g1 M! o* D3 |+ ~& T% }
6 l0 s% K! w0 X" _6 D- left = element.location['x']
; b. A) ~$ ~% V- M* o
. B% R( D' _7 f& m/ G' \- top = element.location['y']' t" q5 O U: j8 I5 p! a: x! v! z
; J3 P5 G+ i( y& I( [; Z- right = element.location['x'] + element.size['width']$ z* x( ]6 W5 a( I+ P+ W5 b
+ S' M( f8 Y2 Y2 R- bottom = element.location['y'] + element.size['height']
1 d1 y0 F0 ?7 R9 Y
0 J9 B4 t9 L- ?$ ? W2 O-
5 X) E. X! p/ ~. D% S% Y
; m) c& K6 O' _8 F( h: G& j- #element = driver.find_element_by_xpath("//div['#captcha']//img")% \7 U2 M% E$ G/ x9 t
- 1 |" g7 l4 h% T% p1 K
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
. ^/ [* H2 ~2 N2 J0 F. w% W - , w8 }* f2 J2 B% f$ Z9 T6 L9 b
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
" S; X% Z: P# z. v - 0 S1 x0 ]6 C. x- u L: d, b5 S
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')1 N1 L: V3 K* C% u8 V
- % o. v# P1 i, C
- image = get_file_content('/tmp/screenshot1.png')1 ?& p6 V3 a* x/ ~$ O! Z( n+ T
- ' T! x. H' e" O P2 s
- a = client.basicGeneral(image)
$ z" T; j0 ^5 y, t. r - - m, Y9 B1 {6 k$ a& |0 T- k
- print a! N4 j' g: n* @. }8 V9 \/ G
- " d4 H h0 g/ d$ Q
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
+ M8 ^# U+ P3 _ - ' _. r) A2 \2 t* o% A
- yzm = re.sub(r'\xf7',r'/',yzm), K% u( ?% |+ |( u6 T9 Z) x) L& `5 g: `7 e
. }* F; f5 _6 z- yzm = re.sub(r'x',r'*',yzm)( X1 W+ s8 m6 ~8 h+ I9 O
- & r+ n/ x1 D2 ?, T: z# Y
- yzm = re.sub(r'X',r'*',yzm)
$ V6 k: A0 M$ V x1 _0 Q7 a
3 t- Z! X* H1 h# Z4 ]" S5 O- yzm = eval(yzm)+ {/ K9 K. U1 G# r" i! P
3 Y9 [4 B, x; g! [/ h+ ^0 N$ ?- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
4 Q" G- G2 ]4 \( R4 E' {, v - . ^2 L$ L. o& x9 A& o
- print '验证码: ' + str(yzm)
7 n: c7 _ j" ]4 e/ ~1 S9 h - , }) f6 F+ T3 D* I1 A/ |
- driver.find_element_by_name('answer').send_keys(yzm)
, I! A+ I3 ]# ` - - n$ M* F( V- a& ?9 V+ ]; V4 _: J
- driver.find_element_by_name('submit').click()! f ^) R0 u$ h3 v, t
$ [- ` }" H- R1 l; |- time.sleep(3)# V& R o! j& P4 I5 \
- ! I* |! t2 y7 G7 s% B! y2 ?9 W6 v
- return 1
6 X2 r& @7 U# z# R# h+ Z1 q - * J$ H- @% V% m* ^: B
- except Exception as e:8 a8 j3 L C$ i! X G" H5 a
2 Q ]. X; f. w- F4 X. q- return 0" c5 @. o* D/ r- H* E* D# e
- $ c- a6 l" ^$ x! ?2 Y! N
- def followbutton(driver):0 I' U! y2 X' x2 {2 T2 R! f8 P5 O
/ l6 s! I6 I7 N- try:2 b/ k2 e2 l% A3 p
6 W: X/ B: {( H6 _( I# a- driver.switch_to_window(driver.window_handles[0])
- C, X/ m4 h" k3 p: D- q6 ^( U
1 }' @' M7 \9 s5 `% f- points = driver.find_element_by_id("currentpoints").text6 ~" Q4 n2 S1 N
- & A/ q& S8 F% z- ?$ K
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()) r3 i8 ?- @6 O: }0 c( M- a+ ]
- $ T; e B- r; |" Q% o# V2 ?9 r& n6 o
- driver.switch_to_window(driver.window_handles[1]); @! m$ j2 h+ G. e5 l
- " \) j4 Y7 h( x9 r
- VideoSource = ''.join(driver.page_source.split())" }! P: l% O Z' @
& @/ z) P1 I+ r4 b4 E( e- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:+ e! r: c; j, {5 @- a: @7 ] t
G e, |9 l2 c) w6 x6 C* O- print VideoSource6 e9 s6 n' c8 L3 o* y" V% x
- # A- ?- ]/ V3 r& }( n1 H6 K% d
- driver.switch_to_window(driver.window_handles[0])
3 H9 l; Q9 A2 ~# ^6 v, D) [/ ^: C
4 M) @5 J) E9 o7 |! R- print "Refresh..."5 _0 c; ~ X y
- 9 p" Y6 h" `; l/ z! d
- driver.get("https://youlikehits.com/youtubenew2.php")5 v4 f, h5 X% l2 K$ X. c, }
1 u2 G; G( R" R- time.sleep(1)
$ K' B+ V5 Q G# Z- U - $ L5 ^" K+ Y8 M7 i
- return points
- Z' W& Y, q# E! m! b7 B* ` - ; ?6 `- k2 Z+ v! U7 u1 |# ?& c& U& b
- except Exception as e:. p) x- o( f; s; o' F5 T' m
# m/ T1 a( y" e+ K L- return 0$ w5 X* B9 i ^* w
- 9 F5 U8 S6 ~& b- z9 ~
- for i in range(0,5000):/ k' C$ h+ U- s) _
- 5 N' c+ ?$ W; ]" O+ H# K4 S
- try:! `" g% d# G+ a: A, {
- $ L8 Q8 s- a8 E3 P
- captcha = checkcaptcha(driver)8 W, @( g2 D! D% W# @
6 k& V- V+ B7 D3 O3 E! H- time.sleep(1)$ d [9 v% |1 o, v& @8 Z
- , E# c+ y$ p" g0 p1 }
- checkRefresh(driver): d& e1 P2 y5 |; ^
o& ~2 I" o9 o* x: {8 ~# E$ e5 |- points = followbutton(driver)
+ [# p5 Z9 A1 N& n9 T- u4 A
" i9 {. t1 _+ q0 U; b7 r$ Y- time.sleep(65)
- h5 u5 {- N. P, \3 G$ i4 O" L# h
/ C2 o* Y/ T1 b. Z2 K4 Q* O' D5 ]- driver.switch_to_window(driver.window_handles[0]); ^) X7 L, c# v9 G8 |
- & m8 L3 o6 v2 ^: _
- tmpp = driver.find_element_by_id("currentpoints").text( P" Z5 M3 E8 T# I
& t: X' ^. a/ Y- print "points: " + str(tmpp)' `. ^- V* N- y" W
- ( S! R; X/ I6 ?2 s: }# F( Y% P1 F
- if points == tmpp:, [ r6 V2 E( s
/ z3 }3 {' n, t* v$ O0 U0 D- print "Refresh..."1 X% l0 C" @8 b; t
3 {& I. w; f+ D/ {- driver.get("https://youlikehits.com/youtubenew2.php")
& C0 z# K7 }1 ~$ A# w, O' | - & G# {) m; o$ T+ i
- time.sleep(5)8 z I0 }% S6 t2 Z
- 1 v3 H: n- o: z! ]$ h- F
- except Exception as e:& L2 A5 K1 q7 t& |% A; p0 t% l: @
- 3 b6 R, v7 s5 o4 W4 J/ M" r
- driver.get("https://youlikehits.com/youtubenew2.php")
. l# T# [" U6 O( P' j6 x I - $ V1 ^! ?9 K. {# G+ f& ]4 ]" ?7 J- b
- print 'error: ' + str(e)
: {, ]# p4 p3 h
9 s) r' H8 q- K" f2 m- driver.quit()
2 y+ l5 ~; G6 A0 G
复制代码
4 Z5 W; k& R* a/ O% i) e7 N: W R+ X' J# F* ?+ o2 x- U1 d
, Q3 w& m# B4 T% U2 C5 y) F/ g& h3 K
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
* R! \! [, J# P9 S- t' F T5 a, z( a- Q' L" t# r
|
评分
-
查看全部评分
|