图片下载程序写好了
(2019-07-30 19:36:22)
下一个
Python 程序写好了,把你的博客地址写上,执行,会自动下载你的博客所有文章和图片。这个程序主要是针对 tinypic.com 来的,因为那个图片网站今年要关闭,所以把图片都存起来。。
如果你会用Python 程序,希望试用,希望听到你的改进意见。
)', s1, re.DOTALL).group().strip()
#print(content.decode("utf8") )
img = re.findall('
', content, re.IGNORECASE)
cnt = 1
for x in img:
src = re.findall('http.+jpg', x, re.IGNORECASE)
if len(src) == 0:
continue
des = "images/P%04dI%03d.jpg" % (page, cnt)
try:
f1 = urllib2.urlopen(src[0])
f2 = open(des, "wb")
f2.write(f1.read())
f2.close()
except:
pass
content = content.replace(src[0], des)
cnt += 1
f2 = open("P%04d.htm" % page, "wb")
f2.write(_link + " " + time + "
n" + content + "n")
f2.close()
# process pages
def getPage(_page, url1):
id = re.findall("[0-9]+", url1)[0]
url2="http://blog.wenxuecity.com/blog/frontend.php?page=0&act=articleList&blogId="
url2 = url2.replace("0", str(_page))+id
f1 = urllib2.urlopen(url2)
s1 = f1.read().split("n")
count = 0
for i in range(len(s1)):
if s1[i].find('class="atc_title"')>0:
link = s1[i+1].strip()+s1[i+2].strip()+s1[i+3].strip()
count = count+1
if(count > 0):
print _page*60+count, link.decode("utf8")
time = re.search('(?<=>).+(?=<)', s1[i+10]).group(0)
link = link.replace("/my", "http://blog.wenxuecity.com/my")
saveHtml(_page*60+count, link, time)
i = i+10
return count
#Main loop
def getBlog(url1):
count = 60
_page = 0
try:
os.makedirs("images")
except:
pass
while count == 60:
count= getPage(_page, url1)
_page = _page + 1
getBlog(url1)