获取抖音用户作品列表信息-进阶3
# 概况
经过前面2轮的优化,已经可以满足基本的需求了, 但是又面临一个问题, 错误处理!!!
如何在错误的时候自我更正,达到无需人工处理的地步
# 分析
- 出错点基本是在操作频繁被登出或者报错,得不到数据
- 操作频繁发生在获取列表频繁及获取接口频繁
# 解决
- 针对列表频繁的措施
- 思路:不要一次性获取很多列表,而是一页列表获取完成之后, 获取列表里的每一位达人的相关数据,
等获取完成之后再根据是否有下一页,再获取下一页里每项的内容,依次类推,这样就不会造成列表刷的过快的问题了
- 措施:使用递归方式,有下一页循环本身,没有则返回一个结果, 循环本身的时候,先执行一个获取每一项的任务
- 错误处理
思路: 如果出错了, 程序还能继续执行
措施: 等待执行结果,如果没有得到相应的结果,应该是cookie有问题了,更换cookie,循环退回上一步,继续执行
# 结论
通过错误处理及深入循环,能模拟人工行为,避免触发防刷机制
# 相关
# 代码
// 获取抖音用户信息~
import { logger } from 'fd-framework';
import * as saveSvr from '../services/savestar';
import {IAuthorListArgs, fetchSingle} from '../services/star'
const puppeteer = require('puppeteer');
const log = logger.get('douyin');
let len = 0
let page = 1;
let requestDataList = {} as Record<string, any>;
let cookieIndex = 0 // cookie自增
/**
* 循环达人列表
* @param args
*/
export const loopList = async (args: IAuthorListArgs):Promise<any> =>{
let fetchedData: Record<string, any>;
fetchedData = await fetchSingle({ ...args, page });
let authors = fetchedData.authors;
len += authors.length
log.info(`第${page}页, 共${len}个达人用户,开始获取`);
const {cookieList} = args
const ids = authors.map((author:Record<string, any>) => author.id)
await doSaveUserInfoTask(ids, cookieList)
if(fetchedData.pagination.has_more){
page++
return loopList(args)
} else {
return new Promise(resolve => {
log.info(`共${page}页,共获取${len}条数据`);
resolve(len)
page = 1
len = 0
})
}
}
/**
*
* @param authorId 作者id
* @param cookies_str cookie字符串
*/
const loopDetailList = async (
authorId: string,
cookies_str: string
): Promise<any> => {
return new Promise(resolve => {
const doPuppeteer = async () => {
const data = await checkAuthorCoreUserId(authorId);
log.info(`checkAuthorCoreUserId-${authorId}`, data);
if (data) {
requestDataList = {}
resolve()
return
}
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
let cookies = cookies_str.split(';').map(pair => {
let name = pair.trim().slice(0, pair.trim().indexOf('='));
let value = pair.trim().slice(pair.trim().indexOf('=') + 1);
return { name, value, domain: 'star.toutiao.com' };
});
await page.setCookie(...cookies);
page.on('requestfinished', (request: any) => {
// 查看所有请求地址
if (request.resourceType() === 'xhr') {
if (
request
.url()
.indexOf('https://star.toutiao.com/v/api/user/author_page') !==
-1
) {
const waitResult = async () => {
try{
let res = await request.response();
let result = await res.json();
const author = result.data;
const tags: Array<string> = JSON.parse(author.tags);
const tags_style: Array<string> = JSON.parse(
author.tags_author_style
);
const param = {
avatarUri: author.avatar_uri,
city: author.city,
follower: author.follower,
gender: author.gender,
id: author.id,
nickName: author.nick_name,
priceInfo: JSON.stringify(author.price_info) + '',
province: author.province,
shortId: author.short_id,
tags: JSON.stringify(tags.concat(tags_style)) + '',
coreUserId: author.core_user_id,
};
requestDataList.param = param;
}catch(e){
requestDataList.param = 'error'
}
};
waitResult();
}
if (
request
.url()
.indexOf(
'https://star.toutiao.com/v/api/demand/author_daily_fans'
) !== -1
) {
const waitResult = async () => {
try {
let res = await request.response();
let result = await res.json();
requestDataList.dailyFans = result.data;
} catch (err) {
log.error(`saveDailyFans-${authorId}`, authorId);
}
};
waitResult();
}
if (
request
.url()
.indexOf(
'https://star.toutiao.com/v/api/demand/author_item_data'
) !== -1
) {
const waitResult = async () => {
try {
let res = await request.response();
let result = await res.json();
requestDataList.itemData = result.data;
} catch (err) {
log.error(`saveItemData-${authorId}`, authorId);
}
};
waitResult();
}
}
});
// 进入页面
await page.goto(
`https://star.toutiao.com/ad#/author/douyin/${authorId}/?recommend=false`
);
await page.waitFor(4000);
requestDataList.param &&
(await saveSvr.saveAuthor(requestDataList.param).catch(() => {
log.error(`${authorId}-saveAuthor`, requestDataList.param);
}));
requestDataList.dailyFans &&
(await saveSvr
.saveDailyFans(authorId, requestDataList.dailyFans.daily)
.catch(() => {
log.error(`${authorId}-saveDailyFans`, requestDataList.dailyFans);
}));
requestDataList.itemData &&
(await saveSvr
.saveItemData(authorId, requestDataList.itemData.description)
.catch(() => {
log.error(`${authorId}-saveItemData`, requestDataList.itemData);
}));
log.info(`--------------${authorId}-执行完毕`);
browser.close();
resolve();
} catch (e) {
log.error('doPuppeteer-error', e);
resolve();
}
};
doPuppeteer();
});
};
/**
* 查看是否已有coreUserId
* @param authorId
*/
const checkAuthorCoreUserId = async (authorId: string) => {
return await saveSvr.checkAuthorCoreUserId(authorId);
};
/**
* 递归执行保存方法 执行完成返回promise
* @param arr 列表里的达人数组
* @param cookieList cookie列表
*/
export const doSaveUserInfoTask = async (
arr: string[],
cookieList: string[]
): Promise<any> => {
let index = 0
while (index < arr.length) {
log.info(
`当前位置:------------------第${page}页第${index+1}条---${arr[index]}, 共${arr.length}条记录`
);
const cookie = cookieList[cookieIndex]
await loopDetailList(arr[index], cookie);
if(requestDataList.param === 'error'){
index--
if(cookieIndex < cookieList.length -1 ){
cookieIndex +=1
} else {
cookieIndex = 0
}
log.error(
`当前位置:------------------第${page}页第${index+1}条---${arr[index]},发生错误,更换cookie`
);
continue;
} else {
index++
}
}
return new Promise(resolve => {
resolve(`第${page}页${arr.length}条数据已经保存完毕`);
});
};
上次更新: 2021/12/19, 18:05:42