forked from lolcat/4get
still missing things on google scraper
This commit is contained in:
2287
scraper/brave.php
Normal file
2287
scraper/brave.php
Normal file
File diff suppressed because it is too large
Load Diff
2722
scraper/ddg.php
Normal file
2722
scraper/ddg.php
Normal file
File diff suppressed because it is too large
Load Diff
1562
scraper/google.php
Normal file
1562
scraper/google.php
Normal file
File diff suppressed because it is too large
Load Diff
242
scraper/marginalia.php
Normal file
242
scraper/marginalia.php
Normal file
@@ -0,0 +1,242 @@
|
||||
<?php
|
||||
|
||||
class marginalia{
|
||||
public function __construct(){
|
||||
|
||||
$this->key = "public";
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
switch($page){
|
||||
|
||||
case "web":
|
||||
return [
|
||||
"profile" => [
|
||||
"display" => "Profile",
|
||||
"option" => [
|
||||
"any" => "Default",
|
||||
"modern" => "Modern"
|
||||
]
|
||||
],
|
||||
"format" => [
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any",
|
||||
"html5" => "html5",
|
||||
"xhtml" => "xhtml",
|
||||
"html123" => "html123"
|
||||
]
|
||||
],
|
||||
"file" => [
|
||||
"display" => "File",
|
||||
"option" => [
|
||||
"any" => "Any",
|
||||
"nomedia" => "Deny media",
|
||||
"media" => "Contains media",
|
||||
"audio" => "Contains audio",
|
||||
"video" => "Contains video",
|
||||
"archive" => "Contains archive",
|
||||
"document" => "Contains document"
|
||||
]
|
||||
],
|
||||
"javascript" => [
|
||||
"display" => "Javascript",
|
||||
"option" => [
|
||||
"any" => "Allow JS",
|
||||
"deny" => "Deny JS",
|
||||
"require" => "Require JS"
|
||||
]
|
||||
],
|
||||
"trackers" => [
|
||||
"display" => "Trackers",
|
||||
"option" => [
|
||||
"any" => "Allow trackers",
|
||||
"deny" => "Deny trackers",
|
||||
"require" => "Require trackers"
|
||||
]
|
||||
],
|
||||
"cookies" => [
|
||||
"display" => "Cookies",
|
||||
"option" => [
|
||||
"any" => "Allow cookies",
|
||||
"deny" => "Deny cookies",
|
||||
"require" => "Require cookies"
|
||||
]
|
||||
],
|
||||
"affiliate" => [
|
||||
"display" => "Affiliate links in body",
|
||||
"option" => [
|
||||
"any" => "Allow affiliate links",
|
||||
"deny" => "Deny affiliate links",
|
||||
"require" => "Require affiliate links"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private function get($url, $get = []){
|
||||
|
||||
$headers = [
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"
|
||||
];
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = [$get["s"]];
|
||||
$profile = $get["profile"];
|
||||
$format = $get["format"];
|
||||
$file = $get["file"];
|
||||
|
||||
foreach(
|
||||
[
|
||||
"javascript" => $get["javascript"],
|
||||
"trackers" => $get["trackers"],
|
||||
"cookies" => $get["cookies"],
|
||||
"affiliate" => $get["affiliate"]
|
||||
]
|
||||
as $key => $value
|
||||
){
|
||||
|
||||
if($value == "any"){ continue; }
|
||||
|
||||
switch($key){
|
||||
|
||||
case "javascript": $str = "js:true"; break;
|
||||
case "trackers": $str = "special:tracking"; break;
|
||||
case "cookies": $str = "special:cookies"; break;
|
||||
case "affiliate": $str = "special:affiliate"; break;
|
||||
}
|
||||
|
||||
if($value == "deny"){
|
||||
$str = "-" . $str;
|
||||
}
|
||||
|
||||
$search[] = $str;
|
||||
}
|
||||
|
||||
if($format != "any"){
|
||||
|
||||
$search[] = "format:$format";
|
||||
}
|
||||
|
||||
switch($file){
|
||||
|
||||
case "any": break;
|
||||
case "nomedia": $search[] = "-special:media"; break;
|
||||
case "media": $search[] = "special:media"; break;
|
||||
|
||||
default:
|
||||
$search[] = "file:$file";
|
||||
}
|
||||
|
||||
$search = implode(" ", $search);
|
||||
|
||||
$params = [
|
||||
"count" => 20
|
||||
];
|
||||
|
||||
if($profile == "modern"){
|
||||
|
||||
$params["index"] = 1;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
"https://api.marginalia.nu/{$this->key}/search/" . urlencode($search),
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
if($json == "Slow down"){
|
||||
|
||||
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
/*
|
||||
$handle = fopen("scraper/marginalia.json", "r");
|
||||
$json = json_decode(fread($handle, filesize("scraper/marginalia.json")), true);
|
||||
fclose($handle);*/
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
foreach($json["results"] as $result){
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $result["title"],
|
||||
"description" => str_replace("\n", " ", $result["description"]),
|
||||
"url" => $result["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
|
1182
scraper/mojeek.php
Normal file
1182
scraper/mojeek.php
Normal file
File diff suppressed because it is too large
Load Diff
244
scraper/wiby.php
Normal file
244
scraper/wiby.php
Normal file
@@ -0,0 +1,244 @@
|
||||
<?php
|
||||
|
||||
class wiby{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/nextpage.php";
|
||||
$this->nextpage = new nextpage("wiby");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if($page != "web"){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No"
|
||||
]
|
||||
],
|
||||
"date" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past day",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month",
|
||||
"year" => "Past year",
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($url, $get = [], $nsfw){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Cookie: ws={$nsfw}",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
$q =
|
||||
json_decode(
|
||||
$this->nextpage->get($get["npt"], "web"),
|
||||
true
|
||||
);
|
||||
|
||||
$nsfw = $q["nsfw"];
|
||||
unset($q["nsfw"]);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$date = $get["date"];
|
||||
$nsfw = $get["nsfw"] == "yes" ? "0" : "1";
|
||||
|
||||
$search =
|
||||
str_replace(
|
||||
[
|
||||
"!g",
|
||||
"!gi",
|
||||
"!gv",
|
||||
"!gm",
|
||||
"!b",
|
||||
"!bi",
|
||||
"!bv",
|
||||
"!bm",
|
||||
"!td",
|
||||
"!tw",
|
||||
"!tm",
|
||||
"!ty",
|
||||
"&g",
|
||||
"&gi",
|
||||
"&gv",
|
||||
"&gm",
|
||||
"&b",
|
||||
"&bi",
|
||||
"&bv",
|
||||
"&bm",
|
||||
"&td",
|
||||
"&tw",
|
||||
"&tm",
|
||||
"&ty",
|
||||
],
|
||||
"",
|
||||
$search
|
||||
);
|
||||
|
||||
switch($date){
|
||||
|
||||
case "day": $search = "!td " . $search; break;
|
||||
case "week": $search = "!tw " . $search; break;
|
||||
case "month": $search = "!tm " . $search; break;
|
||||
case "year": $search = "!ty " . $search; break;
|
||||
}
|
||||
|
||||
$q = [
|
||||
"q" => $search
|
||||
];
|
||||
}
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
"https://wiby.me/",
|
||||
$q,
|
||||
$nsfw
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/<p class="pin"><blockquote>(?:<\/p>)?<br><a class="more" href="\/\?q=[^"]+&p=([0-9]+)">Find more\.\.\.<\/a><\/blockquote>/',
|
||||
$html,
|
||||
$nextpage
|
||||
);
|
||||
|
||||
if(count($nextpage) === 0){
|
||||
|
||||
$nextpage = null;
|
||||
}else{
|
||||
|
||||
$nextpage =
|
||||
$this->nextpage->store(
|
||||
json_encode([
|
||||
"q" => $q["q"],
|
||||
"p" => (int)$nextpage[1],
|
||||
"nsfw" => $nsfw
|
||||
]),
|
||||
"web"
|
||||
);
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => $nextpage,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
preg_match_all(
|
||||
'/<blockquote>[\s]*<a .* href="(.*)">(.*)<\/a>.*<p>(.*)<\/p>[\s]*<\/blockquote>/Ui',
|
||||
$html,
|
||||
$links
|
||||
);
|
||||
|
||||
for($i=0; $i<count($links[0]); $i++){
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->unescapehtml(trim($links[2][$i])),
|
||||
"description" => $this->unescapehtml(trim(strip_tags($links[3][$i]))),
|
||||
"url" => trim($links[1][$i]),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function unescapehtml($str){
|
||||
|
||||
return html_entity_decode(
|
||||
str_replace(
|
||||
[
|
||||
"<br>",
|
||||
"<br/>",
|
||||
"</br>",
|
||||
"<BR>",
|
||||
"<BR/>",
|
||||
"</BR>",
|
||||
],
|
||||
"\n",
|
||||
$str
|
||||
),
|
||||
ENT_QUOTES | ENT_XML1, 'UTF-8'
|
||||
);
|
||||
}
|
||||
}
|
530
scraper/yandex.php
Normal file
530
scraper/yandex.php
Normal file
@@ -0,0 +1,530 @@
|
||||
<?php
|
||||
|
||||
class yandex{
|
||||
|
||||
/*
|
||||
curl functions
|
||||
*/
|
||||
public function __construct(){
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
|
||||
include "lib/nextpage.php";
|
||||
$this->nextpage = new nextpage("yandex");
|
||||
}
|
||||
|
||||
private function get($url, $get = [], $nsfw){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
$search = $get["text"];
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
switch($nsfw){
|
||||
case "yes": $nsfw = "0"; break;
|
||||
case "maybe": $nsfw = "1"; break;
|
||||
case "no": $nsfw = "2"; break;
|
||||
}
|
||||
|
||||
$headers =
|
||||
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Encoding: gzip",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"DNT: 1",
|
||||
"Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
|
||||
"Referer: https://yandex.com/images/search?text={$search}",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: cross-site",
|
||||
"Upgrade-Insecure-Requests: 1"];
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getfilters($pagetype){
|
||||
|
||||
switch($pagetype){
|
||||
|
||||
case "images":
|
||||
return
|
||||
[
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"maybe" => "Maybe",
|
||||
"no" => "No"
|
||||
]
|
||||
],
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"week" => "Last week"
|
||||
]
|
||||
],
|
||||
"size" => [
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"small" => "Small",
|
||||
"medium" => "Medium",
|
||||
"large" => "Large",
|
||||
"wallpaper" => "Wallpaper"
|
||||
]
|
||||
],
|
||||
"color" => [
|
||||
"display" => "Colors",
|
||||
"option" => [
|
||||
"any" => "All colors",
|
||||
"color" => "Color images only",
|
||||
"gray" => "Black and white",
|
||||
"red" => "Red",
|
||||
"orange" => "Orange",
|
||||
"yellow" => "Yellow",
|
||||
"cyan" => "Cyan",
|
||||
"green" => "Green",
|
||||
"blue" => "Blue",
|
||||
"violet" => "Purple",
|
||||
"white" => "White",
|
||||
"black" => "Black"
|
||||
]
|
||||
],
|
||||
"type" => [
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "All types",
|
||||
"photo" => "Photos",
|
||||
"clipart" => "White background",
|
||||
"lineart" => "Drawings and sketches",
|
||||
"face" => "People",
|
||||
"demotivator" => "Demotivators"
|
||||
]
|
||||
],
|
||||
"layout" => [
|
||||
"display" => "Layout",
|
||||
"option" => [
|
||||
"any" => "All layouts",
|
||||
"horizontal" => "Horizontal",
|
||||
"vertical" => "Vertical",
|
||||
"square" => "Square"
|
||||
]
|
||||
],
|
||||
"format" => [
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
"jpeg" => "JPEG",
|
||||
"png" => "PNG",
|
||||
"gif" => "GIF"
|
||||
]
|
||||
]
|
||||
];
|
||||
break;
|
||||
|
||||
default:
|
||||
return [];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
$request =
|
||||
json_decode(
|
||||
$this->nextpage->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
$nsfw = $request["nsfw"];
|
||||
unset($request["nsfw"]);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$nsfw = $get["nsfw"];
|
||||
$time = $get["time"];
|
||||
$size = $get["size"];
|
||||
$color = $get["color"];
|
||||
$type = $get["type"];
|
||||
$layout = $get["layout"];
|
||||
$format = $get["format"];
|
||||
/*
|
||||
$handle = fopen("scraper/yandex.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
// SIZE
|
||||
// large
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// medium
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// small
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// ORIENTATION
|
||||
// Horizontal
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Vertical
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Square
|
||||
// 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// TYPE
|
||||
// Photos
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// White background
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Drawings and sketches
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// People
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Demotivators
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// COLOR
|
||||
// Color images only
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Black and white
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Red
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Orange
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Yellow
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Cyan
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Green
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Blue
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Purple
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// White
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// Black
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// FORMAT
|
||||
// jpeg
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// png
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// gif
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// RECENT
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
// WALLPAPER
|
||||
// 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
|
||||
|
||||
|
||||
$request = [
|
||||
"format" => "json",
|
||||
"request" => [
|
||||
"blocks" => [
|
||||
[
|
||||
"block" => "extra-content",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "i-global__params:ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "search2:ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "preview__isWallpaper",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "content_type_search",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "serp-controller",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "cookies_ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
[
|
||||
"block" => "advanced-search-block",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
]
|
||||
],
|
||||
"metadata" => [
|
||||
"bundles" => [
|
||||
"lb" => "AS?(E<X120"
|
||||
],
|
||||
"assets" => [
|
||||
// las base
|
||||
"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
|
||||
|
||||
// las default
|
||||
//"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
|
||||
],
|
||||
"extraContent" => [
|
||||
"names" => [
|
||||
"i-react-ajax-adapter"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
/*
|
||||
Apply filters
|
||||
*/
|
||||
if($time == "week"){
|
||||
$request["recent"] = "7D";
|
||||
}
|
||||
|
||||
if($size != "any"){
|
||||
|
||||
$request["isize"] = $size;
|
||||
}
|
||||
|
||||
if($type != "any"){
|
||||
|
||||
$request["type"] = $type;
|
||||
}
|
||||
|
||||
if($color != "any"){
|
||||
|
||||
$request["icolor"] = $color;
|
||||
}
|
||||
|
||||
if($layout != "any"){
|
||||
|
||||
$request["iorient"] = $layout;
|
||||
}
|
||||
|
||||
if($format != "any"){
|
||||
|
||||
$request["itype"] = $format;
|
||||
}
|
||||
|
||||
$request["text"] = $search;
|
||||
$request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
|
||||
|
||||
$request["request"] = json_encode($request["request"]);
|
||||
}
|
||||
|
||||
try{
|
||||
$json = $this->get(
|
||||
"https://yandex.com/images/search",
|
||||
$request,
|
||||
$nsfw
|
||||
);
|
||||
}catch(Exception $err){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
/*
|
||||
$handle = fopen("scraper/yandex.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if(
|
||||
isset($json["type"]) &&
|
||||
$json["type"] == "captcha"
|
||||
){
|
||||
|
||||
throw new Exception("Yandex blocked this 4get instance. Yandex blocks don't last very long, but the block timer gets reset everytime you make another unsuccessful request. Please try again in ~7 minutes.");
|
||||
}
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
// get html
|
||||
$html = "";
|
||||
foreach($json["blocks"] as $block){
|
||||
|
||||
$html .= $block["html"];
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
$div = $this->fuckhtml->getElementsByTagName("div");
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
// check for next page
|
||||
if(
|
||||
count(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more more_direction_next",
|
||||
$div
|
||||
)
|
||||
) !== 0
|
||||
){
|
||||
|
||||
$request["nsfw"] = $nsfw;
|
||||
|
||||
if(isset($request["p"])){
|
||||
|
||||
$request["p"]++;
|
||||
}else{
|
||||
|
||||
$request["p"] = 1;
|
||||
}
|
||||
|
||||
$out["npt"] = $this->nextpage->store(json_encode($request), "images");
|
||||
}
|
||||
|
||||
// get search results
|
||||
foreach(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"serp-item serp-item_type_search",
|
||||
$div
|
||||
)
|
||||
as $image
|
||||
){
|
||||
|
||||
$image =
|
||||
json_decode(
|
||||
$image
|
||||
["attributes"]
|
||||
["data-bem"],
|
||||
true
|
||||
)["serp-item"];
|
||||
|
||||
$title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
|
||||
|
||||
if(isset($image["snippet"]["text"])){
|
||||
|
||||
$title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
|
||||
}
|
||||
|
||||
$tmp = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->titledots(
|
||||
implode(": ", $title)
|
||||
)
|
||||
),
|
||||
"source" => [],
|
||||
"url" => htmlspecialchars_decode($image["snippet"]["url"])
|
||||
];
|
||||
|
||||
foreach($image["dups"] as $dup){
|
||||
|
||||
$tmp["source"][] = [
|
||||
"url" => htmlspecialchars_decode($dup["url"]),
|
||||
"width" => (int)$dup["w"],
|
||||
"height" => (int)$dup["h"],
|
||||
];
|
||||
}
|
||||
|
||||
$tmp["source"][] = [
|
||||
"url" =>
|
||||
preg_replace(
|
||||
'/^\/\//',
|
||||
"https://",
|
||||
htmlspecialchars_decode($image["thumb"]["url"])
|
||||
),
|
||||
"width" => (int)$image["thumb"]["size"]["width"],
|
||||
"height" => (int)$image["thumb"]["size"]["height"]
|
||||
];
|
||||
|
||||
$out["image"][] = $tmp;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
$substr = substr($title, -3);
|
||||
|
||||
if(
|
||||
$substr == "..." ||
|
||||
$substr == "…"
|
||||
){
|
||||
|
||||
return trim(substr($title, 0, -3));
|
||||
}
|
||||
|
||||
return trim($title);
|
||||
}
|
||||
}
|
1723
scraper/youtube.php
Normal file
1723
scraper/youtube.php
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user