扇贝单词逆向
...大约 6 分钟
扇贝单词逆向
前言:
由于想要获取一些单词信息,记录自己的单词学习记录。
1、分析网站
请求头分析
"authority": "apiv3.shanbay.com",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
"origin": "https://web.shanbay.com",
"referer": "https://web.shanbay.com/",
"sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"x-csrftoken": "4a08ae479d17443e5e68ef71613ff1be"
观察所有的header,其中比较有可能是加密请求参数的只有X-Csrftoken。
通过定位和搜索可以找关键位置
r.isStandardBrowserEnv()) {
var y = n(987)
, g = (e.withCredentials || c(m)) && e.xsrfCookieName ? y.read(e.xsrfCookieName) : void 0;
g && (d[e.xsrfHeaderName] = g)
}
其中g就是我们想要的参数,通过分析可以看到g = y.read(e.xsrfCookieName)
其中y.read为
read: function(e) {
var t = document.cookie.match(new RegExp("(^|;\\s*)(" + e + ")=([^;]*)"));
return t ? decodeURIComponent(t[3]) : null
},
通过分析可知,改参数从cookie中获取的,不为加密参数,可以在程序运行前直接复制即可。
import requests
headers = {
"authority": "apiv3.shanbay.com",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
"origin": "https://web.shanbay.com",
"referer": "https://web.shanbay.com/",
"sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"x-csrftoken": "4a08ae479d17443e5e68ef71613ff1be"
}
cookies = {
"sajssdk_2015_cross_new_user": "1",
"__utma": "183787513.474190524.1686565691.1686565691.1686565691.1",
"__utmc": "183787513",
"__utmz": "183787513.1686565691.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)",
"__utmb": "183787513.1.10.1686565691",
"_ga": "GA1.2.474190524.1686565691",
"auth_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6OTMyNDI5MjYsImV4cCI6MTY4NzQzMDAyMCwiZXhwX3YyIjoxNjg3NDMwMDIwLCJkZXZpY2UiOiIiLCJ1c2VybmFtZSI6Im1vYmlsZV9jMzI4MjNiYWI4IiwiaXNfc3RhZmYiOjAsInNlc3Npb25faWQiOiJlNjYwZDUzZTA5MGIxMWVlYjVjMWZlMmJkMjY3MTFmNSJ9.xL7QJ-wzW7K-SC3grzoOsBwKQvBuAjx1W6uwDPCmzww",
"csrftoken": "4a08ae479d17443e5e68ef71613ff1be",
"sensorsdata2015jssdkcross": "%7B%22distinct_id%22%3A%22vzudct%22%2C%22%24device_id%22%3A%22188af261ddceaf-05ff5ebf672ef7-26031d51-3686400-188af261ddd5c1%22%2C%22props%22%3A%7B%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.google.com%2F%22%2C%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%2C%22first_id%22%3A%22188af261ddceaf-05ff5ebf672ef7-26031d51-3686400-188af261ddd5c1%22%7D"
}
url = "https://apiv3.shanbay.com/wordsapp/user_material_books/jjbtq/learning/words/unlearned_items"
params = {
"ipp": "10",
"page": "1"
}
response = requests.get(url, headers=headers, cookies=cookies, params=params)
print(response.text)
print(response)
然后尝试直接发起请求,请求成功。
<Response [200]>
{"data":"NZIZHRKXMLHW3B5H..."}
此时我们可以看到返回的结果为加密的,下一步解密。
2、解析返回结果data
观察返回值比较像base64,于是直接用base64解密,毫无疑问失败了,应该是定制的base64。
于是根据启动器,打断点,debug,找到解密位置
Na = function(e) {
return JSON.parse(window.bays4.d(e))
}
其中window.bays4.d(e)为解密方法。
单步执行进去可以看到
key: "d",
value: function(t) {
if (!this._checkVersion(t))
return "";
var e = new a.default;
e.init(t.substr(0, 4));
var r = e.decode(t);
return u.Base64.decode(r)
}
确认解密位置无疑了。
接下来的步骤就扣代码了,该网站比较单纯,不需要补环境,扣完就能跑。
完整代码
import json
import requests
import execjs
headers = {
"authority": "apiv3.shanbay.com",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
"origin": "https://web.shanbay.com",
"referer": "https://web.shanbay.com/",
"sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"x-csrftoken": "4a08ae479d17443e5e68ef71613ff1be"
}
cookies = {
"sajssdk_2015_cross_new_user": "1",
"__utma": "183787513.474190524.1686565691.1686565691.1686565691.1",
"__utmc": "183787513",
"__utmz": "183787513.1686565691.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)",
"__utmb": "183787513.1.10.1686565691",
"_ga": "GA1.2.474190524.1686565691",
"auth_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6OTMyNDI5MjYsImV4cCI6MTY4NzQzMDAyMCwiZXhwX3YyIjoxNjg3NDMwMDIwLCJkZXZpY2UiOiIiLCJ1c2VybmFtZSI6Im1vYmlsZV9jMzI4MjNiYWI4IiwiaXNfc3RhZmYiOjAsInNlc3Npb25faWQiOiJlNjYwZDUzZTA5MGIxMWVlYjVjMWZlMmJkMjY3MTFmNSJ9.xL7QJ-wzW7K-SC3grzoOsBwKQvBuAjx1W6uwDPCmzww",
"csrftoken": "4a08ae479d17443e5e68ef71613ff1be",
"sensorsdata2015jssdkcross": "%7B%22distinct_id%22%3A%22vzudct%22%2C%22%24device_id%22%3A%22188af261ddceaf-05ff5ebf672ef7-26031d51-3686400-188af261ddd5c1%22%2C%22props%22%3A%7B%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.google.com%2F%22%2C%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%2C%22first_id%22%3A%22188af261ddceaf-05ff5ebf672ef7-26031d51-3686400-188af261ddd5c1%22%7D"
}
def start():
context = execjs.compile(open("shanbei.js", "r", encoding="utf-8").read())
url = "https://apiv3.shanbay.com/wordsapp/user_material_books/jjbtq/learning/words/unlearned_items"
params = {
"ipp": "10",
"page": "1"
}
response = requests.get(url, headers=headers, cookies=cookies, params=params)
print(response)
print(response.text)
# if response.status_code == 200:
# # print(response.json()['data'])
# res = context.call("shanbeiDecode", response.json()['data'])
# print(res)
# print(json.dumps(json.loads(res), sort_keys=True, indent=4, separators=(',', ':')))
if __name__ == "__main__":
start()
至此逆向完毕。
Powered by Waline v2.15.5