Get_Web_banner(批量获取网站banner)
这算是实现的第一个flag吧,批量获取网站的banner,并写入csv中。获取的info包括:状态码、title、server、content-type,对URI进行去重处理,会在当前目录下新建一个new_url.txt存入去重后的url。
SRE实战 互联网时代守护先锋,助力企业售后服务体系运筹帷幄!一键直达领取阿里云限量特价优惠。
# -*- coding: utf-8 -*- import requests import re import csv import chardet import threading targets = [] csv_file = 'jd.csv' def get_banner_export_csv(): with open('urls.txt','r') as a: for target in a: if target not in targets: targets.append(target) else: continue for urls in targets: with open('new_urls.txt','a+') as j: j.write(urls.strip() + "\n") for url in targets: url = str(url).strip() if 'http' or 'https' not in url: url = 'http://' + url try: req = requests.get(url,timeout=(5,20),verify=False,allow_redirects=False) #发出一次请求,禁止302跳转。 if 'charset' not in req.headers.get('Content-Type'," "): req.encoding = chardet.detect(req.content).get('encoding') #解决网页编码问题 pattern = re.compile(r'<title>(.*?)</title>',re.S) title = re.findall(pattern,req.text)[0] stat_code = str(req.status_code) if '30' in stat_code: location = req.headers['Location'] else: location = '' if 'Server' in req.headers: server = str(req.headers['Server']) else: server = '' if 'Content-Type' in req.headers: type = str(req.headers['Content-Type']) else: type = '' if '30' not in stat_code: print("{} {} {} {}".format(stat_code,url,title,type,server)) else: print("{} {} {} {} {}".format(stat_code,url,title,location,type,server)) with open(csv_file,'a+',encoding='utf-8',newline='') as f: #写入相关信息 writer = csv.writer(f) writer.writerow([stat_code,url,title,location,type,server]) except Exception as e: print(url + ' ' + str(e)) with open(csv_file,'a+',encoding='utf-8',newline='') as o: writer_error = csv.writer(o) writer_error.writerow([url,'error']) def main(): with open(csv_file, 'a+', encoding='utf-8', newline='') as g: writer = csv.writer(g) writer.writerow(['stat_code', 'url', 'title', 'loaction', 'type', 'server']) thread = threading.Thread(target=get_banner_export_csv,) thread.start() if __name__ == '__main__': main()
使用方法:
1.在当目录下urls.txt中放入需要获取banner的url(可有http可无http)。
2.csv文件名/路径需要自己修改

更多精彩