|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
( t7 u5 t4 X6 q$ Q o# F
0 \- [- {4 v, h% @+ H- f7 Y3 X# {! `/ V1 ~
- . T# I/ P% H! M+ n: t4 j# X+ Z# G
- # -*- coding: utf-8 -*-: \+ c- ~+ t+ A. ]1 l$ }- ^' Z. X
- / L) m& M1 T% o$ ^/ C) A
- from aip import AipOcr2 {/ t: Q0 H" \$ r
- " g) U# D* X: \: X/ b" `
- from selenium import webdriver& y7 v! R8 V, f) ~
- + E' g+ K' r, ~9 \+ v3 }+ j) [
- import time Y* |- V9 g5 b2 Y; g
- ) Q2 o, H9 t$ ~0 ]5 W% B
- import random, K) O6 \: I: [- E/ o* X
9 l& ]; l7 j3 Y4 F# O- import sys,re2 E9 X# y! e9 \2 |: x
- 8 |$ a9 X6 W( U
- from PIL import Image, ImageDraw,ImageFont
5 w6 q; P3 r3 V K
- N" k4 _" O9 U- k( V! a; h% }- """ 你的 APPID AK SK """
: _* J; a) ~, E, w0 I - $ ]9 I {5 s$ W9 S* u2 l
- APP_ID = 'xxx'8 S' I9 o1 F. J4 u. Y
- # i1 N! R9 w, @* B
- API_KEY = 'xxx'
3 x9 F0 r6 O3 M' V$ S) q - $ T( j. w, z5 `8 i% |+ J5 _4 j
- SECRET_KEY = xxx'8 s( f) F4 T- ^# T( Q2 Y5 N
- 7 D: D# g! E9 T" A: e7 S. Y
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)6 r: N/ D6 d2 ^, `+ j
- ' |4 ~2 t! X s8 r( A; ^
- #PROXY = "127.0.0.1:8118"
8 w3 J# D) b4 D( p3 [ - 3 l% p" z9 | N# L
- chrome_options = webdriver.ChromeOptions()
# x8 ]! b6 ~5 N! B$ e; \ - / x$ t+ D+ I7 V5 q5 F# X5 H
- chrome_options.add_argument('--headless')! |$ O9 F' b8 e7 y
: o" T2 h7 [0 H- chrome_options.add_argument('--disable-gpu')
0 r; @7 g5 ^6 @+ C - ' _1 L4 d" ?. r- Y* I: a
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)& ] D3 i5 ^' Y" Y) ]
8 F$ O& W' t9 Q4 o- chrome_options.add_argument("--incognito"), j0 F5 E' {3 J; V3 P: P
- 4 a2 J4 H8 c$ J1 ^ H7 N
- chrome_options.add_argument('--ignore-certificate-errors')
3 f9 o2 [6 m5 G3 E/ s. K - 1 ~6 T/ d6 Z+ Q
- # Win6 _4 i) C( p. T g t. V) Q
- ; c w0 y1 D$ u" u2 m
- # chrome_options.add_argument("--log-level=3")3 o2 ^/ B/ k4 x- I3 t2 y( o' r
- 4 \" X6 d5 E, ?2 g( q8 @
- # chrome_options.add_argument("--disable-logging")
, n; H( L4 P( ~- I/ A! \
$ Z4 D8 {6 n2 K! I- # chrome_options.add_argument("--disable-logging")
+ e0 \1 t$ ]* y" H7 Z& Y! r - # I+ q! r4 {% m8 v7 E+ P
- #chrome_options.add_argument('--no-sandbox')4 P- k' R7 O2 s! ~; t
- + H+ P8 u- b% L$ P) }% L
- """ 读取图片 """
5 G9 r/ g) _( z6 v
. h) ?1 O; G( B# G. k4 g* R ]4 _. s- def get_file_content(filePath):
" L( m$ H. O9 T" @; n3 n5 a0 {9 J
+ d/ e$ _! y1 g3 I- with open(filePath, 'rb') as fp:
0 }% K- b0 M1 B) k# o+ p. C - 0 _) x/ y: [3 ^' @
- return fp.read()" y* j, X$ j/ a' f% Y& ^9 q
& V {/ A% U1 C/ Y: F9 ?- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
' Y6 ]9 c @ t- l. H - " }4 G2 N" C# Z; Q0 T. N! E- z
- driver.set_window_size(1280, 727)
j' C* y( O* C4 m" q - " g7 Q8 e6 c3 q2 n3 j# f8 `4 E
- driver.get("https://youlikehits.com/")" j$ F# V8 W' y" V, W+ T. }0 [
- 7 Z# j2 n5 D C! b% ?/ u9 n
- time.sleep(5)4 z) \- F; U4 C
- , X! w+ D9 D* E! i1 N
- driver.find_element_by_id("username").send_keys('11111') #user& ?) [) i" B/ E, _
- & L. f k% O: _& @/ g
- driver.find_element_by_id("password").send_keys('111111') #password' @4 ?. X# `% f4 p8 G- Y4 x
' [" m& h1 M0 \0 t- driver.find_element_by_xpath("//input[@value='Login']").click()
! U0 P, h4 q, `6 B
; B# |8 P0 Z/ }" Y, v6 E- driver.get("https://youlikehits.com/youtubenew2.php"); v/ S9 R3 }% X9 c
- ' f' c {6 L) ^. k) I
- time.sleep(1)
8 ~6 S' u) U: t/ E1 H
" f- f% F. d0 g/ O- #Try Again
0 t7 X; [- k1 u" V9 \; J
1 M8 R0 c0 l& h- {2 n- def checkRefresh(driver):0 N5 \' P8 E. c; `( w: R: P9 P, f
5 w& d, e; i8 d- try:
0 V- H4 d, p. G. R. R, C
+ @/ `; k" Z4 f) K8 m- Refresh = driver.find_element_by_id('loadmore') u; b$ N! {0 p8 {0 s" ~+ x
4 C0 Y) p( E0 S$ v+ Z" Z4 y3 H- Refresh.click()) b$ q3 q h9 a1 I, y
- , e8 P& o s K8 E: e/ X
- driver.set_window_size(1280, 727)3 ^5 P1 G. p& R! x! r- j
- " D( K/ @# U5 Q$ A$ b# @! D
- except Exception as e:+ p- @% y% S. ]8 a' e
- 6 v0 P& F5 ^$ F" c! C
- pass
* b- Y# @0 M- ]2 ?2 {
0 g8 q8 j4 h8 g4 m- def checkcaptcha(driver):, P a; m- C/ j/ J$ I& \
- j$ C+ x4 m$ ?1 u+ I- try:. Z1 N6 A+ e$ A0 _+ j8 t6 J
- J1 X% O. r/ H/ i% J- @/ l- captcha = driver.find_element_by_id('captcha')
- x) ^; \! m+ E9 c% @0 ?! ]
) W: B- p: T5 [& }- print driver.get_window_size()7 X+ B0 c- p; o* A1 S
- / Q: [! |3 c# V7 U! W2 i4 k
- time.sleep(2)0 t0 p: p% {" [
7 p- t+ c5 U% k a# ~0 I: Q5 W- driver.save_screenshot('/tmp/screenshot.png')! b7 R- N% w( d. d% h
- , z7 h6 G! h; }, j s
- im = Image.open('/tmp/screenshot.png')
4 w, b( L2 q% i1 i' G( k- b; g# e - 5 y1 C# }0 S" u9 z
- #取消headless模式
/ _! \; c4 V: Z/ y' h - 0 Q! R, m8 s% }% o
- #a = im.resize((1269, 610),Image.ANTIALIAS)6 J: U2 |4 T8 A; Y! p3 y
- 1 q$ G# D, K9 M/ U% U
-
9 ^% H9 D4 k# T) \ - ) K6 y, \: s5 }
- #开启headless模式
5 p9 m: k" @9 F# i" t9 m - ! m3 J- K) b' y- s. f1 _
- a = im.resize((1269, 727),Image.ANTIALIAS)* f' ^9 M% S O5 {8 U) y
% n' X6 [9 U& k- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
9 P6 ~; p1 ^8 k" T
7 }* z. \2 q! F, N- left = element.location['x']
6 X- y" W' y) V: y* H7 ^( }/ g4 I9 K
1 @/ i5 k) W! k1 p' `- top = element.location['y']: O9 Q/ B, o8 f" ?( j
: c5 }+ h, i% q) b# F- right = element.location['x'] + element.size['width']
: H. v/ p6 ]! s+ q( p
/ g' v/ K ?' r! M* U' G- bottom = element.location['y'] + element.size['height']! X! I$ R: E* n* r0 I6 i7 r. [
# f- _5 L4 s4 B6 l7 e/ d* \0 t-
6 c( J- O( o+ N7 U" @1 k+ O - / X# p5 O/ y4 y* V
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
) A; a, K3 o: v9 T - + h4 p6 W4 d4 z' R
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
. D, c! Y! |7 ?3 g: F
6 i1 M( l' t8 h" P& _% z- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")5 ^5 V, w8 c; l2 Y
# x `2 a( h. u6 v- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
d4 h; r, y/ o, s - 5 D" {7 [$ Q( S L) b& M
- image = get_file_content('/tmp/screenshot1.png')7 X0 X6 `0 S! q" \0 \! A) P( r
- + O$ Z( i: {3 t# m( C' O4 E, o
- a = client.basicGeneral(image)) k1 o9 \& e; `2 a6 `% M
- |4 G- L; ]: x& [; H0 G- print a% W, r3 T1 U& u0 H: M8 t
- d6 }% g' D0 x& W5 A" R& d# C6 o- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])2 ?0 f6 I( q: q
- * A0 j E7 @3 Q/ B& J/ G
- yzm = re.sub(r'\xf7',r'/',yzm)2 n9 w6 i! A- }2 ~4 c6 V& P, n' `
- , A7 |' }9 x, q% m
- yzm = re.sub(r'x',r'*',yzm)
+ Z4 x0 I( J) C; C Z" \% y3 j - 2 P3 M1 {9 _! o
- yzm = re.sub(r'X',r'*',yzm)
" I! _/ \9 _1 J( m* q - ( m) @ @% M/ i1 C6 n D
- yzm = eval(yzm)
! Y& I: j4 B) a' V! l - : N0 K9 E! s! v3 O7 Y) z
- #yzm = eval(yzm.replace('x','*').replace('÷','/')); \ n' j5 \0 Y, X) _
2 N' S" h2 P$ Z; A' G7 o- print '验证码: ' + str(yzm)
. B6 ]/ s( |5 f) |1 A
c2 Q' a# {8 y) @& ?- driver.find_element_by_name('answer').send_keys(yzm)
! h% O0 w4 i5 R5 P4 |+ v8 n% A/ F - 5 v- `. Y" z4 z8 a; H# o3 P
- driver.find_element_by_name('submit').click()6 X5 @2 [( s% U3 x. q: B: K9 k0 q
- 9 j" F1 c6 Q2 H
- time.sleep(3)& v4 t7 P0 v0 @! I5 c7 b3 j& D
8 l( ^( G6 w' l6 u- return 1
0 ^$ K1 N; c7 F8 O) b( U: R9 s8 l
4 E, i j" r6 ^% O* M4 W- except Exception as e:; M# G% K; B5 _' U4 H* J; E: m
- & j/ Z+ \5 B4 V* r
- return 0# j- }" [5 d5 w* c U( O" `4 p
- : ^1 a5 f( U+ N& z3 i2 k
- def followbutton(driver):
& @3 A: G; M, t6 n/ n. x
3 }, T6 A1 R% ?# L0 ]( ]- try:
4 j) U4 P [1 D5 c& ]2 @
* {* Y9 Q' A5 Y7 |- driver.switch_to_window(driver.window_handles[0])
, T5 ~0 v, W' n& C
+ }" W5 B q3 A- points = driver.find_element_by_id("currentpoints").text
" P, L" G1 x# n3 j; {7 Z* [) x% G8 L - & |2 `* T2 ^ C* p! n+ D+ a$ f
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
! |$ z0 F9 e% m+ |
! D- f8 @! K/ C' @2 y0 ]- driver.switch_to_window(driver.window_handles[1])
$ A4 m, n1 N C1 G! r+ Y5 n# X2 t/ q - 4 d3 K" z, O2 Z6 s( D
- VideoSource = ''.join(driver.page_source.split())! o/ d. d4 [8 E! v- C$ C
& g* K- w7 q& [0 ]! |4 v0 O- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
# ~4 s2 m! K! ^7 z5 n( t8 e - # \/ e8 w6 ^7 u, a6 `9 _4 k& X
- print VideoSource4 s9 w3 s" i8 c3 `, s, I* ^
- # Y' X7 ]. k+ g) z% m' }
- driver.switch_to_window(driver.window_handles[0])* D5 {% Q, A8 i8 J
& F; c$ M- o8 d5 N) v. |- print "Refresh..."
0 K. o R1 \" D5 P* X) w- K - . D5 M+ X- v5 I) N% ~
- driver.get("https://youlikehits.com/youtubenew2.php")
& E; p( ]3 W5 \" t) \& m! `: n - ; M( c& {* `2 ?1 T5 X" Z5 _
- time.sleep(1)- |! b* |( N, d1 `# V
7 p' k+ Q6 L# y) S* O- return points
8 o+ Z. f% b% }) g# X1 F
1 l5 ]; x9 ^# _* C) {- except Exception as e:
. h3 ?' `$ m& z
& C# N3 k8 Y1 R, ^- return 0 ~4 Z8 Y* U0 l5 I
3 A6 O7 c' e5 H; h7 c- for i in range(0,5000): W( O+ [6 K. ?: E* _; @
- 2 c7 E' z; U- n& Q
- try:
+ X& Z# h! N6 { - " o0 g8 `3 ?' Y2 S1 l C: ~' o
- captcha = checkcaptcha(driver)/ {- b* G# y$ R" n
+ p. q6 m: s7 N$ e- time.sleep(1)3 m+ _+ P. o7 x, x9 ?
- 2 ~* l+ _! k% Q* m
- checkRefresh(driver)
" Y1 ]+ P5 {4 I& `# T5 u! u - ! A" m9 m/ _/ e7 n- ?; e
- points = followbutton(driver)# r- g, i6 U2 d
1 l2 m6 b U! G3 A8 ?8 H3 C/ a- time.sleep(65)
, |4 L6 g0 h+ A4 l- F. X6 L) c - + F$ X/ v3 D1 s8 B) x' v& K
- driver.switch_to_window(driver.window_handles[0])$ Z- e1 D H% X' L. p
) T( ~2 z: x5 y4 V- tmpp = driver.find_element_by_id("currentpoints").text
6 a' H. W1 U5 P$ u
( M$ r5 N3 m' H4 r9 Y5 J+ X7 i u, F- print "points: " + str(tmpp)
, d- [1 S+ s& }" G& L1 r' U2 M
6 X) Z, K0 Z' K' G4 U* r- if points == tmpp:
6 ?/ l9 [! i G - 2 x6 p( F: [* H. e, s& s
- print "Refresh..."5 L+ H1 g* l+ m' B, R/ G% ]- {
" [( c5 W% Z" K6 n- driver.get("https://youlikehits.com/youtubenew2.php"), h3 h+ Z2 Q- w( ]
- 4 ]+ ?% o5 W7 b# ~* m( X# A/ F
- time.sleep(5); z7 x, g. x+ n' y' O5 Y6 \) W
- z6 H" t7 v, g3 H- except Exception as e:
, P2 H; d" N. } h7 ^6 J! |9 s! U3 |
4 T* ]# R& \: s- t6 X- driver.get("https://youlikehits.com/youtubenew2.php")
c5 H% g8 ~/ c/ _7 X M, @3 }2 Z
0 L# J# h, X% o, o* Z, A- print 'error: ' + str(e)( [1 b, e# l8 N Y6 i) o2 c4 h- K
- # O$ M% w, R7 v) u* _9 y/ O
- driver.quit()
) E3 \' g1 E! ~! [; j
复制代码 ( i, j3 x# }2 O# Q. r: ^
; H E O' x7 f+ o7 I! R1 R9 w5 D+ t+ A+ h
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 % A- { ^% P* \/ i6 p
4 T' P& J, y4 V' T, U
|
评分
-
查看全部评分
|