fix yandex web
This commit is contained in:
parent
3e1487e614
commit
f73b5f0298
|
@ -14,7 +14,7 @@ class yandex{
|
||||||
// backend included in the scraper functions
|
// backend included in the scraper functions
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = [], $nsfw){
|
private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
|
@ -25,19 +25,55 @@ class yandex{
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||||
|
|
||||||
|
// extract "i" cookie
|
||||||
|
if($get_cookie === 0){
|
||||||
|
|
||||||
|
$cookies_tmp = [];
|
||||||
|
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
|
||||||
|
|
||||||
|
$length = strlen($header);
|
||||||
|
|
||||||
|
$header = explode(":", $header, 2);
|
||||||
|
|
||||||
|
if(trim(strtolower($header[0])) == "set-cookie"){
|
||||||
|
|
||||||
|
$cookie_tmp = explode("=", trim($header[1]), 2);
|
||||||
|
|
||||||
|
$cookies_tmp[trim($cookie_tmp[0])] =
|
||||||
|
explode(";", $cookie_tmp[1], 2)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $length;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
switch($nsfw){
|
switch($nsfw){
|
||||||
case "yes": $nsfw = "0"; break;
|
case "yes": $nsfw = "0"; break;
|
||||||
case "maybe": $nsfw = "1"; break;
|
case "maybe": $nsfw = "1"; break;
|
||||||
case "no": $nsfw = "2"; break;
|
case "no": $nsfw = "2"; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch($get_cookie){
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
$cookie = "";
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
$cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$cookie = "Cookie: i=" . $get_cookie;
|
||||||
|
}
|
||||||
|
|
||||||
$headers =
|
$headers =
|
||||||
["User-Agent: " . config::USER_AGENT,
|
["User-Agent: " . config::USER_AGENT,
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
"Accept-Encoding: gzip",
|
"Accept-Encoding: gzip",
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"DNT: 1",
|
"DNT: 1",
|
||||||
"Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw,
|
$cookie,
|
||||||
"Referer: https://yandex.com/images/search",
|
"Referer: https://yandex.com/images/search",
|
||||||
"Connection: keep-alive",
|
"Connection: keep-alive",
|
||||||
"Upgrade-Insecure-Requests: 1",
|
"Upgrade-Insecure-Requests: 1",
|
||||||
|
@ -59,6 +95,17 @@ class yandex{
|
||||||
|
|
||||||
$data = curl_exec($curlproc);
|
$data = curl_exec($curlproc);
|
||||||
|
|
||||||
|
if($get_cookie === 0){
|
||||||
|
|
||||||
|
if(isset($cookies_tmp["i"])){
|
||||||
|
|
||||||
|
return $cookies_tmp["i"];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
throw new Exception("Failed to get Yandex clearance cookie");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(curl_errno($curlproc)){
|
if(curl_errno($curlproc)){
|
||||||
|
|
||||||
throw new Exception(curl_error($curlproc));
|
throw new Exception(curl_error($curlproc));
|
||||||
|
@ -217,6 +264,23 @@ class yandex{
|
||||||
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
|
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
|
||||||
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
|
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
|
||||||
|
|
||||||
|
// get clearance cookie
|
||||||
|
if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
|
||||||
|
|
||||||
|
$proxy = $this->backend->get_ip();
|
||||||
|
|
||||||
|
$cookie =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://yandex.ru/support2/smart-captcha/ru/",
|
||||||
|
[],
|
||||||
|
false,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
|
apcu_store("yandexweb_cookie", $cookie);
|
||||||
|
}
|
||||||
|
|
||||||
if($get["npt"]){
|
if($get["npt"]){
|
||||||
|
|
||||||
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
|
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
|
||||||
|
@ -226,7 +290,8 @@ class yandex{
|
||||||
$proxy,
|
$proxy,
|
||||||
"https://yandex.com" . $npt,
|
"https://yandex.com" . $npt,
|
||||||
[],
|
[],
|
||||||
"yes"
|
"yes",
|
||||||
|
$cookie
|
||||||
);
|
);
|
||||||
}else{
|
}else{
|
||||||
|
|
||||||
|
@ -236,7 +301,7 @@ class yandex{
|
||||||
throw new Exception("Search term is empty!");
|
throw new Exception("Search term is empty!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
$proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
|
||||||
$lang = $get["lang"];
|
$lang = $get["lang"];
|
||||||
$older = $get["older"];
|
$older = $get["older"];
|
||||||
$newer = $get["newer"];
|
$newer = $get["newer"];
|
||||||
|
@ -283,7 +348,8 @@ class yandex{
|
||||||
$proxy,
|
$proxy,
|
||||||
"https://yandex.com/search/site/",
|
"https://yandex.com/search/site/",
|
||||||
$params,
|
$params,
|
||||||
"yes"
|
"yes",
|
||||||
|
$cookie
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
@ -314,6 +380,19 @@ class yandex{
|
||||||
|
|
||||||
$this->fuckhtml->load($html);
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
// Scrape page blocked error
|
||||||
|
$title =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByTagName("title");
|
||||||
|
|
||||||
|
if(
|
||||||
|
count($title) !== 0 &&
|
||||||
|
$title[0]["innerHTML"] == "403"
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Yandex blocked this proxy or 4get instance.");
|
||||||
|
}
|
||||||
|
|
||||||
// get nextpage
|
// get nextpage
|
||||||
$npt =
|
$npt =
|
||||||
$this->fuckhtml
|
$this->fuckhtml
|
||||||
|
|
Loading…
Reference in New Issue