From e82e908ece8d1b1a33f0b09f2ac54316d1d2a134 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 25 Feb 2024 09:51:18 -0500 Subject: [PATCH] google fixes --- data/proxies/.gitignore | 2 +- scraper/google.php | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/data/proxies/.gitignore b/data/proxies/.gitignore index 70fd2c3..68f8e48 100644 --- a/data/proxies/.gitignore +++ b/data/proxies/.gitignore @@ -1,3 +1,3 @@ * !.gitignore -!onion.txt \ No newline at end of file +!onion.txt diff --git a/scraper/google.php b/scraper/google.php index 3cff687..0aba310 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -559,6 +559,7 @@ class google{ } curl_close($curlproc); + echo $data; return $data; } @@ -977,6 +978,11 @@ class google{ "related" => [] ]; + if($this->detect_sorry($html)){ + + throw new Exception("Google blocked this 4get instance. Please set up a proxy!"); + } + $this->parsejavascript($html); // @@ -2795,7 +2801,10 @@ class google{ throw new Exception("Failed to get search page"); } - $this->fuckhtml->load($html); + if($this->detect_sorry($html)){ + + throw new Exception("Google blocked this 4get instance. Please set up a proxy!"); + } $out = [ "status" => "ok", @@ -3609,4 +3618,22 @@ class google{ return rtrim($title, ". \t\n\r\0\x0B"); } + + private function detect_sorry($html){ + + $this->fuckhtml->load($html); + $detect_sorry = + $this->fuckhtml + ->getElementsByTagName("title"); + + if( + isset($detect_sorry[0]) && + $detect_sorry[0]["innerHTML"] == "302 Moved" + ){ + + return true; + } + + return false; + } }