From 1a5a653be348b3e3823c7c5620cdc1f7cfc87072 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 1 Sep 2024 10:52:28 -0400 Subject: [PATCH] added ghostery search --- README.md | 13 +- api/v1/ac.php | 3 +- data/config.php | 1 + lib/frontend.php | 1 + scraper/ghostery.html | 2714 +++++++++++++++++++++++++++++++++++++++++ scraper/ghostery.php | 308 +++++ settings.php | 8 + 7 files changed, 3041 insertions(+), 7 deletions(-) create mode 100644 scraper/ghostery.html create mode 100644 scraper/ghostery.php diff --git a/README.md b/README.md index d81660b..ef7b001 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/W7W2OZK5H) +Donate to the project here! # 4get search **4get** is a proxy search engine that doesn't suck. @@ -37,11 +37,12 @@ tl;dr the best way to actually browse for shit. | Google | Google | Yandex | Startpage | | Google | | Startpage | Startpage | Google | Qwant | | Startpage | | Qwant | Qwant | Startpage | Mojeek | | Kagi | -| Yep | Yep | Qwant | | | Qwant | -| Greppr | Imgur | | | | Yep | -| Crowdview | FindThatMeme | | | | Marginalia | -| Mwmbl | | | | | YouTube | -| Mojeek | | | | | Soundcloud | +| Ghostery | Yep | Qwant | | | Qwant | +| Yep | Imgur | | | | Yep | +| Greppr | FindThatMeme | | | | Marginalia | +| Crowdview | | | | | YouTube | +| Mwmbl | | | | | Soundcloud | +| Mojeek | | | | | | | Marginalia | | | | | | | wiby | | | | | | | Curlie | | | | | | diff --git a/api/v1/ac.php b/api/v1/ac.php index ce9b3f2..50d3095 100644 --- a/api/v1/ac.php +++ b/api/v1/ac.php @@ -20,7 +20,8 @@ class autocomplete{ "yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}", "sc" => "", "startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english", - "kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}" + "kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}", + "ghostery" => "https://ghosterysearch.com/suggest?q={searchTerms}" ]; /* diff --git a/data/config.php b/data/config.php index 0d44c19..e4d3a0a 100644 --- a/data/config.php +++ b/data/config.php @@ -131,6 +131,7 @@ class config{ const PROXY_GOOGLE = false; const PROXY_STARTPAGE = false; const PROXY_QWANT = false; + const PROXY_GHOSTERY = false; const PROXY_MARGINALIA = false; const PROXY_MOJEEK = false; const PROXY_SC = false; // soundcloud diff --git a/lib/frontend.php b/lib/frontend.php index 10c7a8d..341c82e 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -941,6 +941,7 @@ class frontend{ "google" => "Google", "startpage" => "Startpage", "qwant" => "Qwant", + "ghostery" => "Ghostery", "yep" => "Yep", "greppr" => "Greppr", "crowdview" => "Crowdview", diff --git a/scraper/ghostery.html b/scraper/ghostery.html new file mode 100644 index 0000000..cc912b4 --- /dev/null +++ b/scraper/ghostery.html @@ -0,0 +1,2714 @@ + + + + + + + + + + + + 4chan - Ghostery Private Search + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ +
+ + +
+ + + + +
    + + + + + + + + + +
  1. + +

    + 4chan +

    +
    +
    + +
    +
    +

    + 4chan is a simple image-based bulletin board where anyone can post comments and share images anonymously. +

    +
    +
    + https://www.4chan.org/ + + + + + + + + + 2 + + + + + 2 + + + + + 1 + + + + + 5 + + + + + 1 + + + + + + + 11 + +
    +
    +
    + + + + + +

    4chan.org

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 2 + + + + + 2 + + + + + 1 + + + + + 5 + + + + + 1 + + + + + + 11 +
    +
      + +
    • + + Advertising + 2 +
    • + +
    • + + Site Analytics + 2 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Hosting + 5 +
    • + +
    • + + Extensions + 1 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  2. + + + +
  3. + +

    + 4chan - Wikipedia +

    +
    +
    + +
    +
    +

    + 4chan is an anonymous English-language imageboard website. Launched by Christopher "moot" Poole in October 2003, the site hosts boards dedicated to a wide variety of topics, from video games and television to literature, cooking, weapons, music, history, anime, fitness, politics, and sports, ... +

    +
    +
    + https://en.wikipedia.org/wiki/4chan + + + + + + + + + 3 + + + + + 1 + + + + + 5 + + + + + 1 + + + + + + + 10 + +
    +
    +
    + + + + + +

    wikipedia.org

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 3 + + + + + 1 + + + + + 5 + + + + + 1 + + + + + + 10 +
    +
      + +
    • + + Advertising + 3 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Hosting + 5 +
    • + +
    • + + Extensions + 1 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  4. + + + +
  5. + +

    + r/4chan +

    +
    +
    + +
    +
    +

    + r/4chan: The stories and information posted here are artistic works of fiction and falsehood. Only a fool would take anything posted here as fact. +

    +
    +
    + https://www.reddit.com/r/4chan/ + + + + + + + + + 5 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 6 + + + + + 1 + + + + + + + 16 + +
    +
    +
    + + + + + +

    reddit.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 5 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 6 + + + + + 1 + + + + + + 16 +
    +
      + +
    • + + Advertising + 5 +
    • + +
    • + + Site Analytics + 1 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Misc + 1 +
    • + +
    • + + Social Media + 1 +
    • + +
    • + + Hosting + 6 +
    • + +
    • + + Customer Interaction + 1 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  6. + + + +
  7. + +

    + GitHub - 4chan/4chan-API: Documentation for 4chan's read-only JSON API. +

    +
    +
    + +
    +
    +

    + Documentation for 4chan's read-only JSON API. Contribute to 4chan/4chan-API development by creating an account on GitHub. +

    +
    +
    + https://github.com/4chan/4chan-API + + + + + + + + + 2 + + + + + 1 + + + + + 4 + + + + + 1 + + + + + + + 8 + +
    +
    +
    + + + + + +

    github.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 2 + + + + + 1 + + + + + 4 + + + + + 1 + + + + + + 8 +
    +
      + +
    • + + Advertising + 2 +
    • + +
    • + + Misc + 1 +
    • + +
    • + + Hosting + 4 +
    • + +
    • + + Customer Interaction + 1 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  8. + + + +
  9. + +

    + Absolutely everything you need to know to understand 4chan, the Internet’s own bogeyman - The Washington Post +

    +
    +
    + +
    +
    +

    + A comprehensive, no-nonsense guide to one of the Internet's most confusing and influential Web sites. +

    +
    +
    + https://www.washingtonpost.com/news/the-intersect/wp/2014/09/25/absolutely-everything-you-need-to-know-to-understand-4chan-the-internets-own-bogeyman/ + + + + + + + + + 20 + + + + + 6 + + + + + 1 + + + + + 3 + + + + + 2 + + + + + 5 + + + + + + + 37 + +
    +
    +
    + + + + + +

    washingtonpost.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 20 + + + + + 6 + + + + + 1 + + + + + 3 + + + + + 2 + + + + + 5 + + + + + + 37 +
    +
      + +
    • + + Advertising + 20 +
    • + +
    • + + Site Analytics + 6 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Misc + 3 +
    • + +
    • + + Social Media + 2 +
    • + +
    • + + Hosting + 5 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  10. + + + +
  11. + +

    + The Dark Side of 4chan: Exploring the Dangers of an Unmoderated Online Community - The Bullhorn News +

    +
    +
    + +
    + +
    +
  12. + + + +
  13. + +

    + 4chan (@actual4chan) • Instagram photos and videos +

    +
    +
    + +
    +
    +

    + 15K Followers, 11 Following, 1,299 Posts - See Instagram photos and videos from 4chan (@actual4chan) +

    +
    +
    + https://www.instagram.com/actual4chan/ + + + + + + + + + 4 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 5 + + + + + + + 13 + +
    +
    +
    + + + + + +

    instagram.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 4 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 1 + + + + + 5 + + + + + + 13 +
    +
      + +
    • + + Advertising + 4 +
    • + +
    • + + Site Analytics + 1 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Misc + 1 +
    • + +
    • + + Social Media + 1 +
    • + +
    • + + Hosting + 5 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  14. + + + +
  15. + +

    + 4chan | The Guardian +

    +
    +
    + +
    +
    +

    + Latest news, sport, business, comment, analysis and reviews from the Guardian, the world's leading liberal voice +

    +
    +
    + https://www.theguardian.com/technology/4chan + + + + + + + + + 15 + + + + + 3 + + + + + 1 + + + + + 1 + + + + + 2 + + + + + 4 + + + + + + + 26 + +
    +
    +
    + + + + + +

    theguardian.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 15 + + + + + 3 + + + + + 1 + + + + + 1 + + + + + 2 + + + + + 4 + + + + + + 26 +
    +
      + +
    • + + Advertising + 15 +
    • + +
    • + + Site Analytics + 3 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Misc + 1 +
    • + +
    • + + Social Media + 2 +
    • + +
    • + + Hosting + 4 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  16. + + + +
  17. + +

    + Epic Win for Anonymous: How 4chan's Army Conquered the Web: Stryker, Cole: 9781590207109: Amazon.com: Books +

    +
    +
    + +
    +
    +

    + Epic Win for Anonymous: How 4chan's Army Conquered the Web [Stryker, Cole] on Amazon.com. *FREE* shipping on qualifying offers. Epic Win for Anonymous: How 4chan's Army Conquered the Web +

    +
    +
    + https://www.amazon.com/Epic-Win-4chans-Army-Conquered/dp/1590207106 + + + + + + + + + 18 + + + + + 3 + + + + + 1 + + + + + 5 + + + + + + + 27 + +
    +
    +
    + + + + + +

    amazon.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 18 + + + + + 3 + + + + + 1 + + + + + 5 + + + + + + 27 +
    +
      + +
    • + + Advertising + 18 +
    • + +
    • + + Site Analytics + 3 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Hosting + 5 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  18. + + + +
  19. + +

    + 4chan +

    +
    +
    + +
    +
    +

    + 4chan +

    +
    +
    + https://twitter.com/4chan + + + + + + + + + 8 + + + + + 3 + + + + + 1 + + + + + 1 + + + + + 3 + + + + + 9 + + + + + + + 25 + +
    +
    +
    + + + + + +

    twitter.com

    + + + + + +
    + +
    +
    + All trackers seen + + + + + + + + + + + +
    + + + + + + +
    + +
    +
    + + + + + + 8 + + + + + 3 + + + + + 1 + + + + + 1 + + + + + 3 + + + + + 9 + + + + + + 25 +
    +
      + +
    • + + Advertising + 8 +
    • + +
    • + + Site Analytics + 3 +
    • + +
    • + + Audio Video Player + 1 +
    • + +
    • + + Misc + 1 +
    • + +
    • + + Social Media + 3 +
    • + +
    • + + Hosting + 9 +
    • + +
    +
    +
    +
    + + +
    +
    +
    + +
    +
    +
  20. + +
+ + + + + +
+

Alternative Search Engines

+ + +
+ +
+ + + + + + + + + + + + + + + + diff --git a/scraper/ghostery.php b/scraper/ghostery.php new file mode 100644 index 0000000..9492f4b --- /dev/null +++ b/scraper/ghostery.php @@ -0,0 +1,308 @@ +backend = new backend("ghostery"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + } + + public function getfilters($page){ + + if($page != "web"){ + + return []; + } + + return [ + "country" => [ + "display" => "Country", + "option" => [ + "any" => "All regions", + "AR" => "Argentina", + "AU" => "Australia", + "AT" => "Austria", + "BE" => "Belgium", + "BR" => "Brazil", + "CA" => "Canada", + "CL" => "Chile", + "DK" => "Denmark", + "FI" => "Finland", + "FR" => "France", + "DE" => "Germany", + "HK" => "Hong Kong", + "IN" => "India", + "ID" => "Indonesia", + "IT" => "Italy", + "JP" => "Japan", + "KR" => "Korea", + "MY" => "Malaysia", + "MX" => "Mexico", + "NL" => "Netherlands", + "NZ" => "New Zealand", + "NO" => "Norway", + "CN" => "People's Republic of China", + "PL" => "Poland", + "PT" => "Portugal", + "PH" => "Republic of the Philippines", + "RU" => "Russia", + "SA" => "Saudi Arabia", + "ZA" => "South Africa", + "ES" => "Spain", + "SE" => "Sweden", + "CH" => "Switzerland", + "TW" => "Taiwan", + "TR" => "Turkey", + "GB" => "United Kingdom", + "US" => "United States" + ] + ] + ]; + } + + private function get($proxy, $url, $get = [], $country){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Referer: https://ghosterysearch.com", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: same-origin", + "Sec-Fetch-User: ?1", + "Priority: u=0, i"] + ); + + // http2 bypass + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function web($get){ + + if($get["npt"]){ + + [$query, $proxy] = $this->backend->get($get["npt"], "web"); + + parse_str($query, $query); + + // country + $country = $query["c"]; + unset($query["c"]); + + $query = http_build_query($query); + + $html = + $this->get( + $proxy, + "https://ghosterysearch.com/search?" . $query, + [], + $country + ); + }else{ + + $proxy = $this->backend->get_ip(); + + $html = + $this->get( + $proxy, + "https://ghosterysearch.com/search", + [ + "q" => $get["s"] + ], + $get["country"] + ); + } + + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + $this->fuckhtml->load($html); + + $results_wrapper = + $this->fuckhtml + ->getElementsByClassName( + "results", + "section" + ); + + if(count($results_wrapper) === 0){ + + throw new Exception("Failed to grep result section"); + } + + $this->fuckhtml->load($results_wrapper[0]); + + // get search results + $results = + $this->fuckhtml + ->getElementsByClassName( + "result", + "li" + ); + + if(count($results) === 0){ + + return $out; + } + + foreach($results as $result){ + + $this->fuckhtml->load($result); + + $a = + $this->fuckhtml + ->getElementsByClassName( + "url", + "a" + ); + + if(count($a) === 0){ + + continue; + } + + $a = $a[0]; + + $out["web"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByTagName( + "h2" + )[0] + ) + ), + "description" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByTagName( + "p" + )[0] + ) + ), + "url" => + $this->fuckhtml + ->getTextContent( + $a + ["attributes"] + ["href"] + ), + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + } + + $this->fuckhtml->load($html); + + // get pagination token + $pagination_wrapper = + $this->fuckhtml + ->getElementsByClassName( + "pagination", + "div" + ); + + if(count($pagination_wrapper) !== 0){ + + // found next page! + $this->fuckhtml->load($pagination_wrapper[0]); + + $a = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($a) !== 0){ + + $q = + parse_url( + $this->fuckhtml + ->getTextContent( + $a[count($a) - 1] + ["attributes"] + ["href"] + ), + PHP_URL_QUERY + ); + + $out["npt"] = + $this->backend + ->store( + $q . "&c=" . $get["country"], + "web", + $proxy + ); + } + } + + return $out; + } + + private function titledots($title){ + + return trim($title, " .\t\n\r\0\x0B…"); + } +} diff --git a/settings.php b/settings.php index 046e7c7..12f607f 100644 --- a/settings.php +++ b/settings.php @@ -91,6 +91,10 @@ $settings = [ "value" => "qwant", "text" => "Qwant" ], + [ + "value" => "ghostery", + "text" => "Ghostery" + ], [ "value" => "yep", "text" => "Yep" @@ -137,6 +141,10 @@ $settings = [ "value" => "qwant", "text" => "Qwant" ], + [ + "value" => "ghostery", + "text" => "Ghostery" + ], [ "value" => "yep", "text" => "Yep"