diff --git a/scraper/mojeek.php b/scraper/mojeek.php index b2d6ed5..c15d34c 100644 --- a/scraper/mojeek.php +++ b/scraper/mojeek.php @@ -501,11 +501,6 @@ class mojeek{ throw new Exception("Failed to get HTML"); } - /* - $handle = fopen("scraper/mojeek.html", "r"); - $html = fread($handle, filesize("scraper/mojeek.html")); - fclose($handle);*/ - } $out = [ @@ -526,6 +521,8 @@ class mojeek{ $this->fuckhtml->load($html); + $this->detect_block(); + $results = $this->fuckhtml ->getElementsByClassName("results-standard", "ul"); @@ -1034,6 +1031,8 @@ class mojeek{ $this->fuckhtml->load($html); + $this->detect_block(); + $articles = $this->fuckhtml->getElementsByTagName("article"); @@ -1166,6 +1165,26 @@ class mojeek{ return $out; } + private function detect_block(){ + + $title = + $this->fuckhtml + ->getElementsByTagName( + "title" + ); + + if( + count($title) !== 0 && + $this->fuckhtml + ->getTextContent( + $title[0]["innerHTML"] + ) == "403 - Forbidden" + ){ + + throw new Exception("Mojeek blocked this instance or request proxy."); + } + } + private function titledots($title){ return trim($title, ". \t\n\r\0\x0B");