From 81502d4721405e8e4632b37c252d4254426b1728 Mon Sep 17 00:00:00 2001 From: lolcat Date: Fri, 22 Mar 2024 21:31:06 -0400 Subject: [PATCH] added crowdview lol --- README.md | 1 + data/config.php | 1 + lib/frontend.php | 6 ++ scraper/crowdview.php | 145 ++++++++++++++++++++++++++++++++++++++++++ settings.php | 4 ++ 5 files changed, 157 insertions(+) create mode 100644 scraper/crowdview.php diff --git a/README.md b/README.md index 0867319..3015e4d 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ https://4get.ca - Brave - Yandex - Google + - Mwmbl - Mojeek - Marginalia - wiby diff --git a/data/config.php b/data/config.php index 91b86a1..1b10d14 100644 --- a/data/config.php +++ b/data/config.php @@ -104,6 +104,7 @@ class config{ const PROXY_PINTEREST = false; const PROXY_SEZNAM = false; const PROXY_NAVER = false; + const PROXY_CROWDVIEW = false; const PROXY_MWMBL = false; const PROXY_FTM = false; // findthatmeme const PROXY_IMGUR = false; diff --git a/lib/frontend.php b/lib/frontend.php index 7e3b6fb..d82dba2 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -902,6 +902,7 @@ class frontend{ "yandex" => "Yandex", "google" => "Google", "yep" => "Yep", + "crowdview" => "Crowdview", "mwmbl" => "Mwmbl", "mojeek" => "Mojeek", "marginalia" => "Marginalia", @@ -1019,6 +1020,11 @@ class frontend{ $lib = new facebook(); break;*/ + case "crowdview": + include "scraper/crowdview.php"; + $lib = new crowdview(); + break; + case "mwmbl": include "scraper/mwmbl.php"; $lib = new mwmbl(); diff --git a/scraper/crowdview.php b/scraper/crowdview.php new file mode 100644 index 0000000..8fb267b --- /dev/null +++ b/scraper/crowdview.php @@ -0,0 +1,145 @@ +backend = new backend("crowdview"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + } + + public function getfilters($page){ + + return []; + } + + private function get($proxy, $url, $get = []){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: none", + "Sec-Fetch-User: ?1"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function web($get){ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + try{ + $json = $this->get( + $proxy, + "https://crowdview-next-js.onrender.com/api/search-v3", + [ + "query" => $search + ] + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + $json = json_decode($json, true); + + if($json === NULL){ + + throw new Exception("Failed to decode JSON"); + } + + foreach($json["results"] as $item){ + + $description = explode("", $item["snippet"], 2); + + $out["web"][] = [ + "title" => $this->sanitize($item["title"]), + "description" => $this->sanitize($description[1]), + "url" => $item["link"], + "date" => strtotime($description[0]), + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + } + + return $out; + } + + private function sanitize($html){ + + return + trim( + $this->fuckhtml + ->getTextContent( + html_entity_decode( + $html + ) + ), + ". " + ); + } +} diff --git a/settings.php b/settings.php index 49ba166..e467c81 100644 --- a/settings.php +++ b/settings.php @@ -125,6 +125,10 @@ $settings = [ "value" => "yep", "text" => "Yep" ], + [ + "value" => "crowdview", + "text" => "Crowdview" + ], [ "value" => "mwmbl", "text" => "Mwmbl"