startpage captcha handle

This commit is contained in:
lolcat 2024-07-29 18:25:25 -04:00
parent ff06bc1f51
commit 4e4796bb71
1 changed files with 50 additions and 0 deletions

View File

@ -408,6 +408,8 @@ class startpage{
//$html = file_get_contents("scraper/startpage.html");
}
$this->detect_captcha($html);
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),$/m',
@ -1057,6 +1059,8 @@ class startpage{
}
}
$this->detect_captcha($html);
$out = [
"status" => "ok",
"npt" => null,
@ -1186,6 +1190,8 @@ class startpage{
}
}
$this->detect_captcha($html);
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),$/m',
@ -1326,6 +1332,8 @@ class startpage{
}
}
$this->detect_captcha($html);
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),$/m',
@ -1526,4 +1534,46 @@ class startpage{
$text
);
}
private function detect_captcha($html){
$this->fuckhtml->load($html);
$title =
$this->fuckhtml
->getElementsByTagName(
"title"
);
if(
count($title) !== 0 &&
$title[0]["innerHTML"] == "Redirecting..."
){
// check if it's a captcha
$as =
$this->fuckhtml
->getElementsByTagName(
"a"
);
foreach($as as $a){
if(
strpos(
$this->fuckhtml
->getTextContent(
$a["innerHTML"]
),
"https://www.startpage.com/sp/captcha"
) !== false
){
throw new Exception("Startpage returned a captcha");
}
}
throw new Exception("Startpage redirected the scraper to an unhandled page");
}
}
}