handle mojeek block

This commit is contained in:
lolcat 2025-08-03 12:28:57 -04:00
parent 2c4dc7da84
commit f30872134f
1 changed files with 24 additions and 5 deletions

View File

@ -501,11 +501,6 @@ class mojeek{
throw new Exception("Failed to get HTML");
}
/*
$handle = fopen("scraper/mojeek.html", "r");
$html = fread($handle, filesize("scraper/mojeek.html"));
fclose($handle);*/
}
$out = [
@ -526,6 +521,8 @@ class mojeek{
$this->fuckhtml->load($html);
$this->detect_block();
$results =
$this->fuckhtml
->getElementsByClassName("results-standard", "ul");
@ -1034,6 +1031,8 @@ class mojeek{
$this->fuckhtml->load($html);
$this->detect_block();
$articles =
$this->fuckhtml->getElementsByTagName("article");
@ -1166,6 +1165,26 @@ class mojeek{
return $out;
}
private function detect_block(){
$title =
$this->fuckhtml
->getElementsByTagName(
"title"
);
if(
count($title) !== 0 &&
$this->fuckhtml
->getTextContent(
$title[0]["innerHTML"]
) == "403 - Forbidden"
){
throw new Exception("Mojeek blocked this instance or request proxy.");
}
}
private function titledots($title){
return trim($title, ". \t\n\r\0\x0B");