1.

How to fix a : TypeError 'tuple' object does not support item assignment

tuple is immutable, cannot iterate for item ,can transform to list with command list(tuple)

2.

UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 20: ordinal not in range(128)

BeautifulSoup(html_text,from_encoding="utf-8")
# encoding=utf8
import sys
reload(sys)
sys.setdefaultencoding('utf8')

Python: Check if a File or Directory Exists

os.path.isfile('./file.txt')

os.path.isdir('./file.txt')

os.path.exists('./file.txt')

Find current directory and file's directory

import os
cwd = os.getcwd()
import re
import time
import urllib
import os
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

class MaoYan:
    def __init__(self,baseurl):
        self.baseurl = baseurl

    def getPageUrl(self,PageNum):
        offset = str(10*(PageNum-1))
        try:
            url = self.baseurl + offset
            request = urllib2.Request(url)
            response = urllib2.urlopen(request)
            return  response.read().decode('utf-8')
        except urllib2.URLError,e:
            if hasattr(e,'reason'):
                print 'link lost'
                return None

    def makedir(self,path):
        path = path.strip()
        isExist = os.path.exists(path)
        if not isExist:
            print 'create new file %s' %path
            os.makedirs(path)
            return True
        else:
            print 'already have the path'
            return False

    def writefile(self,content,listflag):
        if listflag == True:
            contentlist = list(content)
            print contentlist[0]

            contentlist[0] = 'rank is : ' + content[0]
            #print contentlist[0]
            contentlist[1] = 'image url is : ' + content[1]
            contentlist[2] = 'title is : ' +  content[2]
            contentlist[3] = 'actor are : '  +  content[3]
            contentlist[4] = 'the score is  '  +  content[4] + content[5]
            with open('result.txt','a') as f:
                for item in contentlist:
                    f.write(item + '\n')
        else:
            with open('result.txt','a') as f:
                f.write(content)
        f.close()

    def getPageInfo(self,PageNum):
        page = self.getPageUrl(PageNum)
        #pattern = re.compile('<dd>.*?board-index.*?>(\d+)</i>.*?data-src="(.*?)".*?name"><a.*?>(.*?)</a>.*?star">(.*?)</p>.*?releasetime">(.*?)</p>.*?integer">(.*?)</i>.*?fraction">(.*?)</i>.*?</dd>', re.S)
        pattern = re.compile('<dd>.*?board-index.*?>(\d+)</i>.*?<img data-src="(.*?)".*?name"><a.*?>(.*?)</a>.*?star">(.*?)</p>.*?integer">(.*?)</i>.*?fraction">(.*?)</i>',re.S)
        result = re.findall(pattern,page)
        return result

    def parsePage(self,PageNum):
        result = self.getPageInfo(PageNum)
        for item in result:
            self.writefile(item,True)
            self.writefile('\n----------------------------------------------------------------------------\n',False)
            self.makedir(item[2])
            os.chdir(item[2])
            filename = item[2] + '.jpg'
            urllib.urlretrieve(item[1],filename)
            print 'saving picture %s ' %filename
            os.chdir('../')

    def loadPages(self,start,end):
      # start -= start
      # end -= end
        for i in range(start,end+1):
            print 'downloading the %sth page film'%i
            self.parsePage(i)
          #  time.sleep(3)

baseurl = 'http://maoyan.com/board/4?offset='
maoyan = MaoYan(baseurl)
maoyan.loadPages(1,3)

results matching ""

    No results matching ""