handle mojeek block
This commit is contained in:
parent
2c4dc7da84
commit
f30872134f
|
@ -501,11 +501,6 @@ class mojeek{
|
||||||
|
|
||||||
throw new Exception("Failed to get HTML");
|
throw new Exception("Failed to get HTML");
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
$handle = fopen("scraper/mojeek.html", "r");
|
|
||||||
$html = fread($handle, filesize("scraper/mojeek.html"));
|
|
||||||
fclose($handle);*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$out = [
|
$out = [
|
||||||
|
@ -526,6 +521,8 @@ class mojeek{
|
||||||
|
|
||||||
$this->fuckhtml->load($html);
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
$this->detect_block();
|
||||||
|
|
||||||
$results =
|
$results =
|
||||||
$this->fuckhtml
|
$this->fuckhtml
|
||||||
->getElementsByClassName("results-standard", "ul");
|
->getElementsByClassName("results-standard", "ul");
|
||||||
|
@ -1034,6 +1031,8 @@ class mojeek{
|
||||||
|
|
||||||
$this->fuckhtml->load($html);
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
$this->detect_block();
|
||||||
|
|
||||||
$articles =
|
$articles =
|
||||||
$this->fuckhtml->getElementsByTagName("article");
|
$this->fuckhtml->getElementsByTagName("article");
|
||||||
|
|
||||||
|
@ -1166,6 +1165,26 @@ class mojeek{
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function detect_block(){
|
||||||
|
|
||||||
|
$title =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByTagName(
|
||||||
|
"title"
|
||||||
|
);
|
||||||
|
|
||||||
|
if(
|
||||||
|
count($title) !== 0 &&
|
||||||
|
$this->fuckhtml
|
||||||
|
->getTextContent(
|
||||||
|
$title[0]["innerHTML"]
|
||||||
|
) == "403 - Forbidden"
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Mojeek blocked this instance or request proxy.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private function titledots($title){
|
private function titledots($title){
|
||||||
|
|
||||||
return trim($title, ". \t\n\r\0\x0B");
|
return trim($title, ". \t\n\r\0\x0B");
|
||||||
|
|
Loading…
Reference in New Issue