urllib.request.urlopen(url [, data])其中,参数 url 是一个 URL 字符串,参数 data 用来指定一个 GET 请求。
import urllib from urllib import request htmlpage = urllib.request.urlopen("http://www.baidu.com") htmlpage.read()urlopen() 方法返回的 stream 对象有两个属性,即 url 与 headers。url 属性是设置的 URL 字符串值;headers 属性是一个字典集,包含网页的表头。
htmlpage.url 'http://www.baidu.com'下面的示例显示刚才打开的 htmlpage 对象的 headers 属性。
for key, value in htmlpage.headers.items(): print (key, " = ", value) Server = Apache-Coyote/1.1 Cache-Control = Content-Type = text/html;charset=UTF-8 Content-Encoding = gzip Content-Length = 1284 Set-Cookie = ucloud=1;domain=.baidu.com;path=/;max-age=300 Pragma = no-cache
import urllib.request urllib.request.urlretrieve("http://www.python.org", "copy.html") ('copy.html', <http.client.HTTPMessage object at 0x02DE28B0>)
import urllib.request urllib.request.quote("This & that are all books\n") 'This%20%26%20that%20are%20all%20books%0A'
import urllib.request urllib.request.unquote("This%20%26%20that%20are%20all%20books%0A") 'This & that are all books\n'
import urllib.request response = urllib.request.urlopen("http://www.python.org") html = response.read()也可以使用以下代码实现上述功能:
import urllib.request req = urllib.request.Request("http://www.python.org") response = urllib.request.urlopen(req) the_page = response.read()
import urllib.request #打开网页文件 htmlhandler = urllib.request.urlopen("http://www.python.org") #在本机上创建一个新文件 file = open("13.2.html", "wb") #将网页文件存储到本机文件上,每次读取512字节 while 1: data = htmlhandler.read(512) if not data: break file.write(data) #关闭本机文件 file.close() #关闭网页文件 htmlhandler.close()保存并运行程序,即可将 http://www.python.org 网页存储到本机的 13.2.html 文件中。
import urllib.parse url = "http://home.netscape.com/assist/extensions.html#topic1?x= 7&y= 2" urllib.parse.urlparse(url) ('http', 'home.netscape.com', '/assist/extensions.html', '', '', 'topic1?x= 7&y=2') ParseResult(scheme='http', netloc='home.netscape.com', path='/assist/extensions.html', params='', query='', fragment='topic1?x= 7&y= 2')
import urllib.parse t = ("http", "www.python.org", "/News.html", "", "", "") urllib.parse.urlunparse(t) 'http://www.python.org/News.html'
import urllib.parse urllib.parse.urljoin("http://www.python.org", "/News.html") 'http://www.python.org/News.html'
本文链接:http://task.lmcjl.com/news/16831.html