Files
4get/scraper/yandex.php
2026-05-02 17:44:53 -04:00

1108 lines
29 KiB
PHP

<?php
class yandex{
/*
curl functions
*/
public function __construct(){
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php";
// backend included in the scraper functions
}
private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
// extract "i" cookie
if($get_cookie === 0){
$cookies_tmp = [];
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
$length = strlen($header);
$header = explode(":", $header, 2);
if(trim(strtolower($header[0])) == "set-cookie"){
$cookie_tmp = explode("=", trim($header[1]), 2);
$cookies_tmp[trim($cookie_tmp[0])] =
explode(";", $cookie_tmp[1], 2)[0];
}
return $length;
});
}
switch($nsfw){
case "yes": $nsfw = "0"; break;
case "maybe": $nsfw = "1"; break;
case "no": $nsfw = "2"; break;
}
switch($get_cookie){
case 0:
$cookie = "";
break;
case 1:
$cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
break;
default:
$cookie = "Cookie: i=" . $get_cookie;
}
$headers =
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
"DNT: 1",
$cookie,
"Referer: https://yandex.com/images/search",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: cross-site",
"Upgrade-Insecure-Requests: 1"];
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if($get_cookie === 0){
if(isset($cookies_tmp["i"])){
return $cookies_tmp["i"];
}else{
throw new Exception("Failed to get Yandex clearance cookie");
}
}
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function getfilters($pagetype){
switch($pagetype){
case "web":
return [
"lang" => [
"display" => "Language",
"option" => [
"any" => "Any language",
"en" => "English",
"ru" => "Russian",
"be" => "Belorussian",
"fr" => "French",
"de" => "German",
"id" => "Indonesian",
"kk" => "Kazakh",
"tt" => "Tatar",
"tr" => "Turkish",
"uk" => "Ukrainian"
]
],
"newer" => [
"display" => "Newer than",
"option" => "_DATE"
],
"older" => [
"display" => "Older than",
"option" => "_DATE"
]
];
break;
case "images":
return
[
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
],
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"week" => "Last week"
]
],
"size" => [
"display" => "Size",
"option" => [
"any" => "Any size",
"small" => "Small",
"medium" => "Medium",
"large" => "Large",
"wallpaper" => "Wallpaper"
]
],
"color" => [
"display" => "Colors",
"option" => [
"any" => "All colors",
"color" => "Color images only",
"gray" => "Black and white",
"red" => "Red",
"orange" => "Orange",
"yellow" => "Yellow",
"cyan" => "Cyan",
"green" => "Green",
"blue" => "Blue",
"violet" => "Purple",
"white" => "White",
"black" => "Black"
]
],
"type" => [
"display" => "Type",
"option" => [
"any" => "All types",
"photo" => "Photos",
"clipart" => "White background",
"lineart" => "Drawings and sketches",
"face" => "People",
"demotivator" => "Demotivators"
]
],
"layout" => [
"display" => "Layout",
"option" => [
"any" => "All layouts",
"horizontal" => "Horizontal",
"vertical" => "Vertical",
"square" => "Square"
]
],
"format" => [
"display" => "Format",
"option" => [
"any" => "Any format",
"jpeg" => "JPEG",
"png" => "PNG",
"gif" => "GIF"
]
]
];
break;
case "videos":
return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
],
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"9" => "Recently"
]
],
"duration" => [
"display" => "Duration",
"option" => [
"any" => "Any duration",
"short" => "Short"
]
]
];
break;
}
}
public function web($get){
$this->backend = new backend("yandex_w");
// has captcha
// https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
// get clearance cookie
if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
$proxy = $this->backend->get_ip();
$cookie =
$this->get(
$proxy,
"https://yandex.ru/support2/smart-captcha/ru/",
[],
false,
0
);
apcu_store("yandexweb_cookie", $cookie);
}
if($get["npt"]){
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
$html =
$this->get(
$proxy,
"https://yandex.com" . $npt,
[],
"yes",
$cookie
);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
$lang = $get["lang"];
$older = $get["older"];
$newer = $get["newer"];
$params = [
"text" => $search,
"web" => "1",
"frame" => "1",
"searchid" => "3131712"
];
if($lang != "any"){
$params["lang"] = $lang;
}
if(
$newer === false &&
$older !== false
){
$newer = 0;
}
if($newer !== false){
$params["from_day"] = date("j", $newer);
$params["from_month"] = date("n", $newer);
$params["from_year"] = date("Y", $newer);
if($older === false){
$older = time();
}
$params["to_day"] = date("j", $older);
$params["to_month"] = date("n", $older);
$params["to_year"] = date("Y", $older);
}
try{
$html =
$this->get(
$proxy,
"https://yandex.com/search/site/",
$params,
"yes",
$cookie
);
}catch(Exception $error){
throw new Exception("Could not get search page");
}
/*
$handle = fopen("scraper/yandex.html", "r");
$html = fread($handle, filesize("scraper/yandex.html"));
fclose($handle);*/
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
// Scrape page blocked error
$title =
$this->fuckhtml
->getElementsByTagName("title");
if(
count($title) !== 0 &&
$title[0]["innerHTML"] == "403"
){
throw new Exception("Yandex blocked this proxy or 4get instance.");
}
// get nextpage
$npt =
$this->fuckhtml
->getElementsByClassName(
"b-pager__next",
"a"
);
if(count($npt) !== 0){
$out["npt"] =
$this->backend->store(
$this->fuckhtml
->getTextContent(
$npt
[0]
["attributes"]
["href"]
),
"web",
$proxy
);
}
// get items
$items =
$this->fuckhtml
->getElementsByClassName(
"b-serp-item",
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$link =
$this->fuckhtml
->getElementsByClassName(
"b-serp-item__title-link",
"a"
)[0];
$out["web"][] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$link
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"b-serp-item__text",
"div"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$link
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
public function image($get){
$this->backend = new backend("yandex_i");
if($get["npt"]){
[$request, $proxy] =
$this->backend->get(
$get["npt"],
"images"
);
$request = json_decode($request, true);
$nsfw = $request["nsfw"];
unset($request["nsfw"]);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$time = $get["time"];
$size = $get["size"];
$color = $get["color"];
$type = $get["type"];
$layout = $get["layout"];
$format = $get["format"];
/*
$handle = fopen("scraper/yandex.json", "r");
$json = fread($handle, filesize("scraper/yandex.json"));
fclose($handle);*/
// SIZE
// large
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// medium
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// small
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// ORIENTATION
// Horizontal
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Vertical
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Square
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// TYPE
// Photos
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// White background
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Drawings and sketches
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// People
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Demotivators
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// COLOR
// Color images only
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Black and white
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Red
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Orange
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Yellow
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Cyan
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Green
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Blue
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Purple
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// White
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// Black
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// FORMAT
// jpeg
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// png
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// gif
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// RECENT
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
// WALLPAPER
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
$request = [
"format" => "json",
"request" => [
"blocks" => [
[
"block" => "extra-content",
"params" => (object)[],
"version" => 2
],
[
"block" => "i-global__params:ajax",
"params" => (object)[],
"version" => 2
],
[
"block" => "search2:ajax",
"params" => (object)[],
"version" => 2
],
[
"block" => "preview__isWallpaper",
"params" => (object)[],
"version" => 2
],
[
"block" => "content_type_search",
"params" => (object)[],
"version" => 2
],
[
"block" => "serp-controller",
"params" => (object)[],
"version" => 2
],
[
"block" => "cookies_ajax",
"params" => (object)[],
"version" => 2
],
[
"block" => "advanced-search-block",
"params" => (object)[],
"version" => 2
]
],
"metadata" => [
"bundles" => [
"lb" => "AS?(E<X120"
],
"assets" => [
// las base
"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
// las default
//"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
],
"extraContent" => [
"names" => [
"i-react-ajax-adapter"
]
]
]
]
];
/*
Apply filters
*/
if($time == "week"){
$request["recent"] = "7D";
}
if($size != "any"){
$request["isize"] = $size;
}
if($type != "any"){
$request["type"] = $type;
}
if($color != "any"){
$request["icolor"] = $color;
}
if($layout != "any"){
$request["iorient"] = $layout;
}
if($format != "any"){
$request["itype"] = $format;
}
$request["text"] = $search;
$request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
$request["request"] = json_encode($request["request"]);
}
try{
$json = $this->get(
$proxy,
"https://yandex.com/images/search",
$request,
$nsfw,
"yandex_i"
);
}catch(Exception $err){
throw new Exception("Failed to get JSON");
}
/*
$handle = fopen("scraper/yandex.json", "r");
$json = fread($handle, filesize("scraper/yandex.json"));
fclose($handle);*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if(
isset($json["type"]) &&
$json["type"] == "captcha"
){
throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
// get html
$html = "";
foreach($json["blocks"] as $block){
$html .= $block["html"];
// get next page
if(
isset($block["params"]["nextPageUrl"]) &&
!empty($block["params"]["nextPageUrl"])
){
$request["nsfw"] = $nsfw;
if(isset($request["p"])){
$request["p"]++;
}else{
$request["p"] = 1;
}
$out["npt"] =
$this->backend->store(
json_encode($request),
"images",
$proxy
);
}
}
$this->fuckhtml->load($html);
// get search results
$data = null;
foreach(
$this->fuckhtml
->getElementsByClassName(
"Root",
"div"
) as $div
){
if(isset($div["attributes"]["data-state"])){
$tmp = json_decode(
$this->fuckhtml
->getTextContent(
$div["attributes"]["data-state"]
),
true
);
if(isset($tmp["initialState"]["serpList"])){
$data = $tmp;
break;
}
}
}
if($data === null){
throw new Exception("Failed to extract JSON");
}
foreach($data["initialState"]["serpList"]["items"]["entities"] as $image){
$title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
if(isset($image["snippet"]["text"])){
$title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
}
$tmp = [
"title" =>
$this->fuckhtml
->getTextContent(
$this->titledots(
implode(": ", $title)
)
),
"source" => [],
"url" => htmlspecialchars_decode($image["snippet"]["url"])
];
// add preview URL
$tmp["source"][] = [
"url" => htmlspecialchars_decode($image["viewerData"]["preview"][0]["url"]),
"width" => (int)$image["viewerData"]["preview"][0]["w"],
"height" => (int)$image["viewerData"]["preview"][0]["h"],
];
foreach($image["viewerData"]["dups"] as $dup){
$tmp["source"][] = [
"url" => htmlspecialchars_decode($dup["url"]),
"width" => (int)$dup["w"],
"height" => (int)$dup["h"],
];
}
$tmp["source"][] = [
"url" =>
preg_replace(
'/^\/\//',
"https://",
htmlspecialchars_decode($image["viewerData"]["thumb"]["url"])
),
"width" => (int)$image["viewerData"]["thumb"]["w"],
"height" => (int)$image["viewerData"]["thumb"]["h"]
];
$out["image"][] = $tmp;
}
return $out;
}
public function video($get){
$this->backend = new backend("yandex_v");
if($get["npt"]){
[$get, $proxy] =
$this->backend->get(
$get["npt"],
"video"
);
$get = json_decode($get, true);
}else{
if(strlen($get["s"]) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
}
// https://yandex.com/video/search?text=skycamefalling&from=tabbar&format=json&ncrnd=7271&p=0&parent-reqid=&request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]}&serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL&yu=3091577281773194415&tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
// https://yandex.com/video/search
// ?text=skycamefalling
// &from=tabbar
// &format=json
// &ncrnd=7271
// &p=0
// &parent-reqid=
// &request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]} {"blocks":[{"block":"video-app","params":{}}]}
// &serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL
// &yu=3091577281773194415
// &tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
$params = [
"text" => $get["s"],
"from" => "tabbar",
"format" => "json",
"ncrnd" => 7271,
"p" => 0,
"parent-reqid" => "",
"request" => json_encode((object)[
"blocks" => [
(object)[
"block" => "video-app",
"params" => (object)[]
]
]
]),
"serpid" => "1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL",
"yu" => 3091577281773194415,
"tmpl_version" => "releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a"
];
if(isset($get["p"])){
$params["p"] = $get["p"];
}
if($get["duration"] != "any"){
$params["duration"] = $get["duration"];
}
if($get["time"] != "any"){
$params["within"] = $get["time"];
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
try{
$json =
$this->get(
$proxy,
"https://yandex.com/video/search",
$params,
$get["nsfw"],
"yandex_v"
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
if(!isset($json["results"]["clips"]["items"])){
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
foreach($json["results"]["clips"]["items"] as $k => $data){
if(isset($data["preview"]["posterSrc"])){
$poster = $data["preview"]["posterSrc"];
if(
preg_match(
'/^\/\//',
$data["preview"]["posterSrc"]
)
){
$poster = "https:" . $poster;
}
$thumb = [
"ratio" => "16:9",
"url" => $poster
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$out["video"][] = [
"title" => $data["relatedParams"]["text"],
"description" => $this->titledots($data["description"]),
"author" => [
"name" =>
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"]) ?
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"] : null,
"url" =>
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"]) ?
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"] : null,
"avatar" => null
],
"date" =>
isset($json["results"]["clips"]["dups"][$k]["date"]) ?
strtotime($json["results"]["clips"]["dups"][$k]["date"]) : null,
"duration" =>
isset($json["results"]["clips"]["dups"][$k]["duration"]["value"]) ?
(int)$json["results"]["clips"]["dups"][$k]["duration"]["value"] : null,
"views" =>
isset($json["results"]["clips"]["dups"][$k]["views"]["text"]) ?
$this->parseviews($json["results"]["clips"]["dups"][$k]["views"]["text"]) : null,
"thumb" => $thumb,
"url" =>
preg_replace(
'/^http:\/\//',
"https://",
$data["relatedParams"]["related_url"]
)
];
}
// get npt
if($json["results"]["search"]["hasNextPage"]){
$get["p"] = (int)$json["results"]["search"]["currentPage"] + 1;
$out["npt"] =
$this->backend->store(
json_encode($get),
"video",
$proxy
);
}
return $out;
}
private function parseviews($number){
// decimal should always be 1 number long
$number = explode(" ", $number, 2);
$number = $number[0];
$unit = strtolower($number[strlen($number) - 1]);
$tmp = explode(".", $number, 2);
$number = (int)$number;
if(count($tmp) === 2){
$decimal = (int)$tmp[1];
}else{
$decimal = 0;
}
switch($unit){
case "k":
$exponant = 1000;
break;
case "m":
$exponant = 1000000;
break;
case "b";
$exponant = 1000000000;
break;
default:
$exponant = 1;
break;
}
return ($number * $exponant) + ($decimal * ($exponant / 10));
}
private function titledots($title){
$substr = substr($title, -3);
if(
$substr == "..." ||
$substr == ""
){
return trim(substr($title, 0, -3));
}
return trim($title);
}
}