Python抓取3D打印笔天猫评论(3)

2023-02-11 271 0

现在天猫的详情页又改版了,以前那套不行了

刚摸索,暂时只能下载一页的评论,这次改进了一下,能够得到用户的买家等级,还能够下载晒图和评论

import requests
from bs4 import BeautifulSoup
import json
from urllib.parse import quote


url = "https://h5api.m.tmall.com/h5/mtop.alibaba.review.list.for.new.pc.detail/1.0/?jsv=2.7.0&appKey=12574478&t=1676094291598&sign=e62a7992325ce571cdbc1e8fada34de7&api=mtop.alibaba.review.list.for.new.pc.detail&v=1.0&isSec=0&ecode=0&timeout=10000&dataType=json&valueType=string&ttid=2022%40taobao_litepc_9.17.0&AntiFlood=true&AntiCreep=true&preventFallback=true&type=json&data=%7B%22itemId%22%3A%22623333281518%22%2C%22bizCode%22%3A%22ali.china.tmall%22%2C%22channel%22%3A%22pc_detail%22%2C%22pageSize%22%3A20%2C%22pageNum%22%3A1%7D"
headers = {
    'cookie': 'enc=L1%2BEWKfqEhWH1WILeWEF1KOiuDf2Cajd%2F0eZYzQgcI3e%2FsTc5rVan3hyj4mSQDEslXHbyj4chZunVGKjZ4fTTheXwGRUVwKZANtPTzFrMBg%3D; cna=Vuo7F5s2vysCAXFoyUirZNF7; lid=%E7%88%B1%E4%BC%98%E8%96%87%E5%B0%8F%E7%AB%99; sgcookie=E100sFv1Jq8zyR2%2BV6PYX8twvM2K5OxKDZd16zUNYGMIrhJEcnqejo0k59FzAn0xncspSIvgbhPa%2F8ziRd%2B24flTiZmUn7vUDwzU3ffxHZsNNuA%3D; uc1=cookie14=UoezSc9mLXvypQ%3D%3D; t=323ff6f09758b92c2bb98b49aa8e32af; uc3=nk2=0%2B51cgTWYmdThg%3D%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&vt3=F8dCvj0rqNMmz9KawrQ%3D&id2=UUpgRKg%2BihSZXdQWgw%3D%3D; tracknick=%5Cu7231%5Cu4F18%5Cu8587%5Cu5C0F%5Cu7AD9; uc4=nk4=0%400VVcJRa17G%2BtuRi%2FBWumX11VBdTt&id4=0%40U2gqy1kEXQj8TJ1In4Gzs4imDCzYrdU5; lgc=%5Cu7231%5Cu4F18%5Cu8587%5Cu5C0F%5Cu7AD9; _tb_token_=e53eedf5e5755; cookie2=1a9379cedd260e3b12144e85c2ad3bfa; _m_h5_tk=7e1761ca7fbec2bfaf3c98bc9673d1c9_1676103048455; _m_h5_tk_enc=3da53fe03306dcd0d188d7c32dc6e52e; xlly_s=1; l=fBgzLE7VLtIL9PufBO5CPurza779gIRbzsPzaNbMiIEGa6OR_eMjANCeqBTk8dtjgTfxVetz_ANlDdhH8P438EMWXM-5KXIpBqv6-bpU-L5..; tfstk=c2YFBPiHc23EuTA-HNby00V_B5wdZTOHZP5fxYTbqML8VGShil2Rsy8h__NaEMf..; isg=BFpa9lZN7bKqBWAJ49dcKlq8qwB8i95lZ1xmhmTTl-231_gRTBredD1hp6PLB1b9',
    'referer': 'https://detail.tmall.com/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
html = requests.get(url, headers=headers,timeout=23).text
start = html.find('{"api"')
ends = html.find('"1.0"}')+len('"1.0"}')
count = 0

#解析json文件
for p in json.loads(html[start:ends])["data"]["module"]["reviewVOList"]:
    user_level=p['buyCreditLevelPic']
    comment = p['reviewPicPathList']
    pl = p['reviewWordContent']
    try:

#天猫晒图有的晒图的少,有的不晒图。
        for n in range(0,5,1):
            commentPic='https:'+comment[n]
            res=str(commentPic)
            print(res)
            T = requests.get(res,timeout=23,verify=False)
            with open('pinglun2/{}.jpg'.format(count), 'wb') as f:
                f.write(T.content)
                count += 1
    except:
        pass



#判断会员等级
    if str(user_level) == "https://img.alicdn.com/imgextra/i1/O1CN01Qym7UU1XAv0yfDV6O_!!6000000002884-2-tps-92-45.png":
        print('用户会员等级:皇冠2')
    elif str(user_level) =="https://img.alicdn.com/imgextra/i4/O1CN01ZlCYMx1nbVriw6u0S_!!6000000005108-2-tps-92-45.png":
        print('用户会员等级:蓝钻4')
    elif str(user_level) =="https://img.alicdn.com/imgextra/i3/O1CN01QZTjHW1F4oegDvuEX_!!6000000000434-2-tps-92-45.png":
        print('用户会员等级:蓝钻3')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i3/O1CN01PIYxrZ22FGrmiDphN_!!6000000007090-2-tps-92-45.png":
        print('用户会员等级:皇冠1')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i4/O1CN019QZnaG1U1LtUAPn6e_!!6000000002457-2-tps-92-45.png":
        print('用户会员等级:蓝钻5')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i2/O1CN01vy8zTE1Zeo5lC6lTQ_!!6000000003220-2-tps-92-45.png":
        print('用户会员等级:蓝钻2')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i1/O1CN01Qym7UU1XAv0yfDV6O_!!6000000002884-2-tps-92-45.png":
        print('用户会员等级:红心5')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i2/O1CN01CNstjx25mCBct4Rc6_!!6000000007568-2-tps-92-45.png":
        print('用户会员等级:红心4')
    elif str(user_level) == "https://img.alicdn.com/imgextra/i1/O1CN01Bm9gkz1yASEqBM8xQ_!!6000000006538-2-tps-92-45.png":
        print('用户会员等级:蓝钻1')
    else:
        print('其他未收录等级')
    print(pl)

几个小时就要换一下url和cookie。

先找到对应的json文件并解析
结果如下,可以看到会员等级,最后一步就是把晒图的图片批量
图片下载了,但是有小部分图片显示图片损坏。

之前有一篇自动爬取全景网图片,下载的图片都是正常的,初步怀疑是天猫的问题。以后学精了再回来分析

相关文章

python 免费下载歌曲和破解VIP视频
Python抓取淘宝评论(1)
利用python对电脑文件进行分类整理
Python抓取3D打印笔天猫评论(1)
python抓取唯品会3D打印笔信息
python爬取新浪财经新闻内容

发布评论