爬IP代码
import requests import re import dauk from bs4 import BeautifulSoup import time def daili(): print('[+]极速爬取代理IP,默认为99页') for b in range(1,99): url="http://www.xicidaili.com/nt/{}".format(b) header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'} r=requests.get(url,headers=header) gsx=BeautifulSoup(r.content,'html.parser') for line in gsx.find_all('td'): sf=line.get_text() dailix=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',str(sf)) for g in dailix: po=".".join(g) print(po) with open ('采集到的IP.txt','a') as l: l.write(po+'\n') daili() def dailigaoni(): print('[+]极速爬取代理IP,默认为99页') for i in range(1,99): url="http://www.xicidaili.com/nn/{}".format(i) header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} r=requests.get(url,headers=header) bks=r.content luk=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',str(bks)) for g in luk: vks=".".join(g) print(vks) with open('采集到的IP.txt','a') as b: b.write(vks+'\n') dailigaoni() def dailihtp(): print('[+]极速爬取代理IP,默认为99页') for x in range(1,99): header="{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}" url="http://www.xicidaili.com/wn/{}".format(x) r=requests.get(url,headers=header) gs=r.content bs=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',gs) for kl in bs: kgf=".".join(kl) print(kgf) with open ('采集到的IP.txt','a') as h: h.write(kgf) dailihtp() def dailihttps(): print('[+]极速爬代理IP,默认为99页') for s in range(1,99): url="http://www.xicidaili.com/wt/{}".format(s) header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} r=requests.get(url,headers=header) kl=r.content lox=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',kl) for lk in lox: los=".".join(lk) print(los) with open('采集到的IP.txt','a') as lp: lp.write(los) dailihttps()
端口代码
import requests import re from bs4 import BeautifulSoup def daili(): print('[+]极速爬取代理IP端口,默认为99页') for b in range(1, 99): url = "http://www.xicidaili.com/nt/{}".format(b) header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'} r = requests.get(url, headers=header) gsx = BeautifulSoup(r.content, 'html.parser') for line in gsx.find_all('td'): sf = line.get_text() dailix = re.findall( '<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>', str(sf)) for g in dailix: po = ".".join(g) print(po ) with open('采集到的端口.txt.txt', 'a') as l: l.write(po + '\n') daili() def dailigaoni(): print('[+]极速爬取代理IP的端口,默认为99页') for i in range(1, 99): url = "http://www.xicidaili.com/nn/{}".format(i) header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} r = requests.get(url, headers=header) bks = r.content luk = re.findall( '<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>', str(bks)) for g in luk: vks = ".".join(g) print(vks) with open('采集到的端口.txt.txt', 'a') as b: b.write(vks + '\n') dailigaoni() def dailihtp(): print('[+]极速爬取代理IP,默认为99页') for x in range(1, 99): header = "{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}" url = "http://www.xicidaili.com/wn/{}".format(x) r = requests.get(url, headers=header) gs = r.content bs = re.findall( '<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>', gs) for kl in bs: kgf = ".".join(kl) print(kgf) with open('采集到的端口.txt.txt', 'a') as h: h.write(kgf) dailihtp() def dailihttps(): print('[+]极速爬代理IP的端口,默认为99页') for s in range(1, 99): url = "http://www.xicidaili.com/wt/{}".format(s) header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} r = requests.get(url, headers=header) kl = r.content lox = re.findall( '<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>', kl) for lk in lox: los = ".".join(lk) print(los) with open('采集到的端口.txt', 'a') as lp: lp.write(los) dailihttps()
调用代码
print(''' _ooOoo_ o8888888o 88" . "88 (| -_- |) O\ = /O ____/`---'\____ .' \\| |// `. / \\||| : |||// \ / _||||| -:- |||||- \ | | \\\ - /// | | | \_| ''\---/'' | | \ .-\__ `-` ___/-. / ___`. .' /--.--\ `. . __ ."" '< `.___\_<|>_/___.' >'"". | | : `- \`.;`\ _ /`;.`/ - ` : | | \ \ `-. \_ __\ /__ _/ .-` / / ======`-.____`-.___\_____/___.-`____.-'====== `=---=' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 佛祖保佑 永无BUG ''') print('[!]爬虫速度过快,导致IP被封请更换IP') print('[*]极速爬取代理IP') print('1.普通代理IP') print('2.高匿代理IP') print('3.http代理IP') print('4.https代理IP') bk=input('请选择:') def xs(): import 代理.daili import 代理.dauk if bk=='1': 代理.daili.daili.daili() 代理.dauk.daili() exit() elif bk=='2': 代理.daili.daili.dailigaoni() 代理.dauk.dailigaoni() exit() elif bk=='3': 代理.daili.daili.dailihtp() 代理.dauk.dailihtp() exit() elif bk=='4': 代理.daili .daili.dailihttps() 代理.dauk.dailihttps() exit() elif bk=='q': exit() else: print('[-]没有找到你要的选项') xs()
2018-02-17
来源:https://www.cnblogs.com/haq5201314/p/8451683.html