import random uapools = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.5", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0" ] defUA(): opener = urllib.request.build_opener() newua = random.choice(uapools) ua = ("user-agent",newua) opener.addheaders=[ua] urllib.request.install_opener(opener) print("当前正在使用UA:"+str(nowua)) ''' for i in range(0,10): UA() data = urllib.request.urlopen(url).read().decode("utf-8","ignore") print(len(data)) ''' #每隔三次换一下UA for i in range(0,10): if (i%3)==0: UA() data = urllib.request.urlopen(url).read().decode("utf-8","ignore") print(len(data))
classAliFirstPipeline(object): defprocess_item(self, item, spider): for i in range(len(item["title"])): print("-------") print(item["title"][i]) return item