
import urllib2
import urllib
import re
import thread
import time
class SpiderModel:
def __init_(self):
self.page = 1
self.pages = []
self.enable = False
def GetPage(self,page): myUrl = "http://qiushibaike.com/hot/page/" + page #'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'下面这个是我在网页源码看的 user_agent = 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/43.0.2357.134 Safari/537.36' headers = {'User-Agent': user_agent} req = urllib2.Request(myUrl,headers = headers) myRespOnse= urllib2.urlopen(req) myPage = myResponse.read() unicodePage = myPage.decode('utf-8') myItems = re.findall('<div class="content">(.*?)</div>',unicodePage,re.S) items = [] for item in myItems: items.appen([item[0].replace("\n","")]) return items def LoadPage(self): while self.enable: if len(self.pages) < 2: try: myPage = self.GetPage(str(self.pagg)) self.page += 1 self.pages.append(myPage) except: print '无法加载' else: time.sleep(1) def ShowPage(self,nowPage,page): for items in nowPage: print u'第%d页' %page, items[0], items[1] myInput = raw_input() if myInput =="quit": self.enable = False break def Start(self): self.enable = True page = self.page print u'正在加载。。' thread.start_new_thread(self.LoadPage()) while self.enable: if self.pages: nowPage = self.pages[0] del self.pages[0] self.ShowPage(nowPage, page) page += 1 糗事百科 """
print u'按下回车键浏览内容:'
raw_input(' ')
myModel = Spider_Model()
myModel.Start()
代码如上。编译器没有报错。然后按enter键显示无法加载,不知道错误出在哪了
新人刚刚开始学python,在这请教各位了。麻烦了~~~
1 rrrkren 2015-07-22 04:03:51 +08:00 |
2 WKPlus 2015-07-22 12:47:30 +08:00 self.GetPage(str(self.pagg)) 这里应该是self.page吧 另外,遇到问题可以把try except去掉,看看异常提示 |