URLError异常处理

SRE实战 互联网时代守护先锋,助力企业售后服务体系运筹帷幄!一键直达领取阿里云限量特价优惠。

例:from urllib import request as sa

         from urllib import error as er

         try:

             sa.urlopen('http://blog.csdn.net')

except er.HTTPError as ee:

             print(ee.code)

             print(e.reason)

    except er.URLError as e:

             if hasattr(e,'code'):

                 print(e.code)

             if hasattr(e,'reason'):

                 print(e.reason)

search() 正则表达式

例:import re

         p1 = 'py.*n'

         p2 = 'cd{2}'

         p3 = 'cd{3}'

         p4 = 'cd{2,}'

         s  = 'abcddddefphp345python_py'

         r1 = re.search(p1,s)

         r2 = re.search(p2,s)

         r3 = re.search(p3,s)

         r4 = re.search(p4,s)

         print(r1)

         print(r2)

         print(r3)

         print(r4)

urlretrieve() 下载文件

例:import urllib.request as res

         import urllib.error as er

         import re

         headers = ('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6776.400 QQBrowser/10.3.2601.400')

         def sd(u,y):

             dds = res.build_opener()

             dds.addheaders = [headers]

             p = '<img width="220" height="220" data-img="1" src="//(.+?\.jpg)">'

             h = res.urlopen(u).read()

             o = re.compile(p).findall(str(h))

             x = 1

for i in o:

                 im = './'+str(y)+str(x)+'.jpg'

                 ig = 'http://'+i

                 print(i)

                 try:

                     res.urlretrieve(ig,filename=im)

                 except er.URLError as e:

                     if hasattr(e,'code'):

                         x+=1

                     if hasattr(e,'reason'):

                         x+=1

                 x+=1

         for y in range(1,50):

             u = 'https://list.jd.com/list.html?cat=9987,653,655&page='+str(y);

             sd(u, y)

链接获取

例:import urllib.request as res

         import urllib.error as er

         import re

         def getlink(url):

             #模拟浏览器

             headers = ('User-Agent',

               'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6776.400 QQBrowser/10.3.2601.400')

             op = res.build_opener()

             op.addheaders = [headers]

             #将opener安装为全局

             res.install_opener(op)

             f = res.urlopen(url)

             d = str(f.read())

             print(d)

             #根据需求构建好链接表达式

             p = '(https?://[^s)";]+\.(\w|/)*)'

             l = re.compile(p).findall(d)

             #去除重复元素

             l = list(set(l))

             return l

         url = "https://www.landi.com/";

         l = getlink(url)

         print(l)

         for ll in l:

             print(ll[0]) 

扫码关注我们
微信号:SRE实战
拒绝背锅 运筹帷幄