位置:海鸟网 > IT > ASP.NET >

远程抓取GOOGLE的自动翻译结果

1        Function RemoveHTML()Function RemoveHTML(strhtml)
 2            if strhtml<>"" then
 3                 dim s_str as string
 4                     s_str=regex.replace(strHTML,"<[^>]+>","")
 5                     s_str=replace(s_str," ","")
 6                     return s_str
 7            end if
 8        End Function
 9       
10        Function china_to_english()Function china_to_english(texts,languages)
11            Dim payload As String = "hl=zh-CN&ie=UTF8&text="& texts &"&langpair="& languages &""
12            dim str_return as string 
13            Dim req As WebRequest = WebRequest.Create("") ' 开始取连接.
14            req.Credentials = CredentialCache.DefaultCredentials       '取得默认
15            req.Method = "POST"                                        '以POST方式发送,这里默认是以GET方式发送
16            req.ContentType = "application/x-www-form-urlencoded"      'POST方式需在传送这个编码,如果上传文件,则修为multipart/form-data
17            req.timeout=10000                                          '连接超时定时
18            req.ContentLength = payload.Length                         '头部长度
19            Dim encoding As Encoding = Encoding.GetEncoding("UTF-8")   '转换成流,大部网站一般转换成UTF-8就可以了,注意是大写的编码
20            Dim bytes As Byte() = Encoding.GetBytes(payload)           '转换成流
21            req.ContentLength = bytes.Length                           '传送流的长度
22            Dim newStream As Stream = req.GetRequestStream()           '转换写入
23            newStream.Write(bytes, 0, bytes.Length)                    '写入传送流
24            newStream.Close()                                          '关闭
25            '上面发送完成,下面取得服务器返回
26            Dim res As HttpWebResponse = CType(req.GetResponse(), HttpWebResponse)  ' 传递返回标识
27            if res.StatusDescription="ok" then  ' 返回取得状态.
28               current.response.write("暂时无法连接到网站,请换用另一个程序")
29               current.response.end()
30            end if
31            Dim dataStream As Stream = res.GetResponseStream()         ' 返回给指针
32            Dim reader As New StreamReader(dataStream,encoding.getencoding("gb2312")) ' 读
33            Dim responseFromServer As String = reader.ReadToEnd()      ' 读取所有
34           
35            str_return=responseFromServer                              '赋值回传
36           
37            reader.Close()                                             '接下来三个关闭
38            dataStream.Close()
39            res.Close()
40
41            dim ss as string = str_return
42            ss = regex.replace(ss,"(?i:(.+)(<div)(.+)(>)(.+)(</div>)(.+))","$5") '提取我们所要的译文
43            ss = RemoveHTML(ss)                                       '删除HTML
44            ss = ss.substring(3)                                      '删除翻译二字
45            return ss                                                 '函数返回
46    End function
47       
48    在调用china_to_english(texts,languages)需要传两个参数第一个为要译的文字,第二个是要进行相对译的语种代码.
   例:中译英,其第二个参数为:zh-Cn|en
   我导入的命名空间如下:
Imports System
Imports System.Web
Imports System.IO
Imports Microsoft.VisualBasic
Imports System.Web.HttpContext
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Text
Imports System.Text.RegularExpressions
Imports System.Net