求一个英文转化语音的网站
用了一下这个网站:http://www.readthewords.com/ ,虽然可以把英文转化为语音,但是只能转化三次,而且一次是30秒,然后需要付费了,有没有其他可以把英文转化为语音的网站?google一下,好多。免费的,不限次... naturalreaders
# -*- coding: UTF-8 -*-
import requests
import os
import subprocess
import sys
from tqdm import tqdm
import time,random
from HTMLParser import HTMLParser
import sys
reload(sys)
sys.setdefaultencoding('utf8')
class WebRequest(object):
def __init__(self, *args, **kwargs):
pass
@property
def user_agent(self):
"""
return an User-Agent at random
:return:
"""
ua_list = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
]
return random.choice(ua_list)
@property
def header(self):
"""
basic header
:return:
"""
return {'User-Agent': self.user_agent,
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8'}
def get(self, url, header=None, retry_time=5, timeout=30,
retry_flag=list(), retry_interval=5, *args, **kwargs):
"""
get method
:param url: target url
:param header: headers
:param retry_time: retry time when network error
:param timeout: network timeout
:param retry_flag: if retry_flag in content. do retry
:param retry_interval: retry interval(second)
:param args:
:param kwargs:
:return:
"""
headers = self.header
if header and isinstance(header, dict):
headers.update(header)
while True:
try:
html = requests.get(url, headers=headers, timeout=timeout, stream=True)
print 'content size: %d' % len(html.content)
if any(f in html.content for f in retry_flag):
raise Exception
if not html.content:
print 'content is Null,retry...~' + url
raise Exception
return html
except Exception as e:
print(e)
def DownloadAudio(url, path, refer=None):
with open(path, 'wb') as handle:
if refer:
hdr = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Referer': refer}
else:
hdr = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
wr = WebRequest()
#print hdr
response = wr.get(url,header=hdr)
#html = requests.get('https://www.example.com', proxies={"http": "http://{}".format(p!r!o!x!y)})
#response = requests.get(url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
#print url
for block in tqdm(response.iter_content(),ascii=True, desc='ImageDownload'):
if not block:
break
handle.write(block)
def translate(to_translate, to_langage="auto", langage="auto"):
'''Return the translation using google translate
you must shortcut the langage you define (French = fr, English = en, Spanish = es, etc...)
if you don't define anything it will detect it or use english by default
Example:
print(translate("salut tu vas bien?", "en"))
hello you alright?'''
agents = {'User-Agent':"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)"}
before_trans = 'class="t0">'
link = "http://translate.google.com/m?ie=UTF-8&hl=%s&sl=%s&q=%s" % (to_langage, langage, to_translate.replace(" ", "+"))
request = requests.get(link, headers=agents)
page = request.content
#print page
result = page
result = result.split("<")
return result
tsxt_sen = []
from nltk.tokenize import sent_tokenize
def sentence_split(sentence): # 编写分句函数
text_sen = []
for s in sentence.split(','):
# if '?' in s:
# tsxt_sen.extend(s.split('?'))
# elif ',' in s:
# tsxt_sen.extend(s.split(','))
# else:
text_sen.append(s)
return tsxt_sen
str = '''
It has not quite been greeted with the enthusiasm of the bottles of Scotch in the novel Whisky Galore, but the arrival of a cargo of pineapples on the shores of northern Scotland has sent scavengers rushing to the beach.
The fruit, which has appeared on Shetland and on beaches across the Western Isles, is thought to have come from several containers lost overboard in the Atlantic last October by cargo ship MV Lombok Strait.
Shetland islanders have now shared photos of their fruitful haul, in a story that echoes the plot of Whisky Galore, the 1947 novel by Sir Compton Mackenzie, later made and remade for the big screen.
The story tells of how the locals find their island awash with whisky after a ship loaded with liquor is wrecked off a fictional Scottish island.
The plot was based on the real-life salvage operations carried out by locals when the SS Politician, carrying 280,000 bottles of malt whisky, ran aground on Eriskay in 1943.
The arrival of the pineapples will be far less lucrative for beachcombers, and not as profitable, either, as the grounding of the cargo ship MSC Napoli off Branscombe beach in Devon in 2007.
Then scavengers ignored police advice and arrived in their droves to explore shipping containers, which contained motorbikes, pet food, wine barrels and anti-wrinkle cream.
'''
text = ''.join(str).strip().lstrip().rstrip().replace('\n',' ')
with open('~/text.txt', 'wb') as handle:
handle.write(text)
str = str.replace('”','"')
str = str.replace('“','"')
def splitStr(str):
arr = str.split(' ')
sz = len(arr)
return [' '.join(arr),' '.join(arr[(sz/2):])]
#print splitStr('with wording from North Korea about getting rid of its nuclear weapons and a guarantee from the United States that it would not interfere with the North’s regime or demand redress for human rights abuses.')
keyArr = []
sent_tokenize_list = sent_tokenize(str)
for s in sent_tokenize_list:
sz = len(' '.join(s.split()))
if sz <= 190:
keyArr.append(' '.join(s.split()))
#print '[ '+' '.join(s.split())+' ]'
else:
for ses in s.split(','):
if len(ses) > 190:
keyArr.extend(splitStr(ses))
else:
keyArr.append(' '.join(ses.split()))
API_URL = "http://translate.google.com/translate_tts?ie=UTF-8&tl=en-us"
#key = HTMLParser.unescape.__func__(HTMLParser, translate(str,'en'))
#keyArr = sentence_split(str)
#print( for i in xrange(0, len(text), n)])
os.system('rm ~/tmp/audio/*.mp3')
namei = 0
for i in keyArr:
juzi = ' '.join(i.split())
sz = len(juzi)
link = API_URL+"&q="+juzi+"&client=tw-ob"
print link
path = "~/tmp/audio/%d.mp3" % namei
#print path
DownloadAudio(link,path)
os.system('file '+path)
time.sleep(1)
namei = namei + 1
https://azure.microsoft.com/en-us/services/cognitive-services/text-to-speech/
微软的,应该不错吧!~
页:
[1]