forked from lolcat/4get
1
0
Fork 0

yep scraper cloudflare error handling

This commit is contained in:
lolcat 2024-05-23 08:58:46 -04:00
parent bcb5c4d519
commit 92d0102738
1 changed files with 38 additions and 16 deletions

View File

@ -6,6 +6,9 @@ class yep{
include "lib/backend.php";
$this->backend = new backend("yep");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
@ -254,8 +257,10 @@ class yep{
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Accept-Encoding: gzip, deflate, br, zstd",
"Connection: keep-alive",
"DNT: 1",
"Priority: u=1",
"Origin: https://yep.com",
"Referer: https://yep.com/",
"Connection: keep-alive",
@ -265,6 +270,9 @@ class yep{
"TE: trailers"]
);
// http3 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, 30);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
@ -324,7 +332,6 @@ class yep{
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
$json =
json_decode(
$this->get(
$this->backend->get_ip(),
"https://api.yep.com/fs/2/search",
@ -337,14 +344,29 @@ class yep{
"safeSearch" => $nsfw,
"type" => "web"
]
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
// detect cloudflare page
$this->fuckhtml->load($json);
if(
count(
$this->fuckhtml
->getElementsByClassName(
"cf-wrapper",
"div"
)
) !== 0
){
throw new Exception("Blocked by Cloudflare");
}
$json = json_decode($json, true);
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
if($json === null){