This commit is contained in:
lolcat 2024-04-21 19:31:56 -04:00
parent 9e18327df6
commit 130358a9e0
16 changed files with 1385 additions and 457 deletions

1
.gitignore vendored
View File

@ -29,3 +29,4 @@ data/captcha/minecraft/
banner/* banner/*
!banner/*default* !banner/*default*
>>>>>>> 77293818cd213ec0ad07c573d298fff9cd5b357d >>>>>>> 77293818cd213ec0ad07c573d298fff9cd5b357d
scraper/curlie.html

View File

@ -11,63 +11,42 @@ https://4get.ca
## Totally unbiased comparison between alternatives ## Totally unbiased comparison between alternatives
| | 4get | searx(ng) | librex | araa | | | 4get | searx(ng) | librex | araa |
|----------------------------|-------------------------|-----------|-------------|----------| |----------------------------|-------------------------|-----------|-------------|-----------|
| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | | RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ |
| Does it suck | no (debunked by snopes) | yes | yes | a little | | Does it suck | no (debunked by snopes) | yes | yes | a little |
| Does it work | ye | no | no | ye | | Does it work | ye | sometimes | no | sometimes |
| Did the dev commit suicide | not until my 30s | idk | yes | no | | Did the dev commit suicide | not until my 30s | no | allegedly | no |
## Features
1. Rotating proxies on a per-scraper basis
2. Search filters, which SearxNG lacks for the most part
3. Bot protection that *actually* filters out the bots (when configured)
4. Interface doesn't require javascript
5. Favicon fetcher with caching support & image proxy
6. Bunch of other shit
tl;dr the best way to actually browse for shit.
# Supported websites # Supported websites
1. Web
- DuckDuckGo
- Brave
- Yandex
- Google
- Mwmbl
- Mojeek
- Marginalia
- wiby
- Curlie
2. Images | Web | Images | Videos | News | Music | Autocompleter |
- DuckDuckGo |------------|--------------|------------|------------|------------|---------------|
- Yandex | DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave |
- Google | Brave | Brave | DuckDuckGo | Brave | | DuckDuckGo |
- Brave | Yandex | Yandex | Brave | Google | | Yandex |
- Yep | Google | Google | Yandex | Qwant | | Google |
- Imgur | Qwant | Qwant | Google | Mojeek | | Yep |
- FindThatMeme | Yep | Pinterest | Qwant | | | Marginalia |
| Crowdview | Yep | | | | YouTube |
3. Videos | Mwmbl | Imgur | | | | Soundcloud |
- YouTube | Mojeek | FindThatMeme | | | | |
- DuckDuckgo | Marginalia | | | | | |
- Brave | wiby | | | | | |
- Yandex | Curlie | | | | | |
- Google
4. News
- DuckDuckGo
- Brave
- Google
- Mojeek
5. Music
- SoundCloud
6. Autocompleter
- Brave
- DuckDuckGo
- Yandex
- Google
- Qwant
- Yep
- Marginalia
- YouTube
- SoundCloud
# Installation # Installation
Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>! Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>.
## Contact ## Contact
Shit breaks all the time but I repair it all the time too! Email me here: will (at) lolcat.ca Shit breaks all the time but I repair it all the time too... Email me here: <b>will (at) lolcat.ca</b> or create an issue.

View File

@ -18,7 +18,7 @@ class autocomplete{
"yep" => "https://api.yep.com/ac/?query={searchTerms}", "yep" => "https://api.yep.com/ac/?query={searchTerms}",
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}", "marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}", "yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . config::SC_CLIENT_TOKEN . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en" "sc" => ""
]; ];
/* /*
@ -39,14 +39,6 @@ class autocomplete{
$this->do404("Search(s) exceeds the 500 char length"); $this->do404("Search(s) exceeds the 500 char length");
} }
if(
isset($_GET["scraper"]) &&
is_string($_GET["scraper"]) === false
){
$_GET["scraper"] = "brave"; // default option
}
/* /*
Get $scraper Get $scraper
*/ */
@ -77,7 +69,6 @@ class autocomplete{
} }
// return results // return results
switch($scraper){ switch($scraper){
case "google": case "google":
@ -115,7 +106,16 @@ class autocomplete{
case "sc": case "sc":
// soundcloud // soundcloud
$js = $this->get($this->scrapers[$scraper], $_GET["s"]); chdir("../../");
include "scraper/sc.php";
$sc = new sc();
$token = $sc->get_token("raw_ip::::");
$js = $this->get(
"https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . $token . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en",
$_GET["s"]
);
$js = json_decode($js, true); $js = json_decode($js, true);

View File

@ -5,7 +5,7 @@ class config{
// any parameters. // any parameters.
// 4get version. Please keep this updated // 4get version. Please keep this updated
const VERSION = 7; const VERSION = 8;
// Will be shown pretty much everywhere. // Will be shown pretty much everywhere.
const SERVER_NAME = "4get"; const SERVER_NAME = "4get";
@ -63,13 +63,6 @@ class config{
"via" "via"
]; ];
// @TODO: Portscan the user for open proxies before allowing a connection, block user if any are found
// Requires the nmap package
const NMAP_PROXY_CHECK = false;
// @TODO: Make IP blacklist public under /api/v1/blacklist endpoint ?
const PUBLIC_IP_BLACKLIST = true;
// Maximal number of searches per captcha key/pass issued. Counter gets // Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day). // reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0 // Only useful when BOT_PROTECTION is NOT set to 0
@ -113,7 +106,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP
@ -123,6 +116,7 @@ class config{
const PROXY_BRAVE = false; const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false; const PROXY_GOOGLE = false;
const PROXY_QWANT = false;
const PROXY_MARGINALIA = false; const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false; const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud const PROXY_SC = false; // soundcloud
@ -146,14 +140,8 @@ class config{
// Scraper-specific parameters // Scraper-specific parameters
// //
// SOUNDCLOUD
// Get these parameters by making a search on soundcloud with network
// tab open, then filter URLs using "search?q=". (No need to login)
const SC_USER_ID = "447501-577662-794348-352629";
const SC_CLIENT_TOKEN = "VNc62l3wxDWS0Ol62j5UYNc1gsZ3UXPv";
// MARGINALIA // MARGINALIA
// Get an API key by contacting the Marginalia.nu maintainer. The "public" key // Use "null" to default out to HTML scraping OR specify a string to
// works but is almost always rate-limited. // use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = "public"; const MARGINALIA_API_KEY = null;
} }

View File

@ -93,31 +93,31 @@ class backend{
*/ */
public function store($payload, $page, $proxy){ public function store($payload, $page, $proxy){
$page = $page[0]; $key = sodium_crypto_secretbox_keygen();
$password = random_bytes(256); // 2048 bit $nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
$salt = random_bytes(16);
$key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
$iv =
random_bytes(
openssl_cipher_iv_length("aes-256-gcm")
);
$tag = "";
$out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
$requestid = apcu_inc("requestid"); $requestid = apcu_inc("requestid");
apcu_store( apcu_store(
$page . "." . $page[0] . "." . // first letter of page name
$this->scraper . $this->scraper . // scraper name
$requestid, $requestid,
gzdeflate($proxy . "," . $salt.$iv.$out.$tag), [
900 // cache information for 15 minutes blaze it $nonce,
$proxy,
// compress and encrypt
sodium_crypto_secretbox(
gzdeflate($payload),
$nonce,
$key
)
],
900 // cache information for 15 minutes
); );
return return
$this->scraper . $requestid . "." . $this->scraper . $requestid . "." .
rtrim(strtr(base64_encode($password), '+/', '-_'), '='); rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
} }
public function get($npt, $page){ public function get($npt, $page){
@ -137,7 +137,7 @@ class backend{
if($payload === false){ if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!"); throw new Exception("The next page token is invalid or has expired!");
} }
$key = $key =
@ -150,47 +150,27 @@ class backend{
) )
); );
$payload = gzinflate($payload); // decrypt and decompress data
$payload[2] =
// get proxy gzinflate(
[ sodium_crypto_secretbox_open(
$proxy, $payload[2], // data
$payload $payload[0], // nonce
] = explode(",", $payload, 2); $key
)
$key =
hash_pbkdf2(
"sha512",
$key,
substr($payload, 0, 16), // salt
20000,
32,
true
);
$ivlen = openssl_cipher_iv_length("aes-256-gcm");
$payload =
openssl_decrypt(
substr(
$payload,
16 + $ivlen,
-16
),
"aes-256-gcm",
$key,
OPENSSL_RAW_DATA,
substr($payload, 16, $ivlen),
substr($payload, -16)
); );
if($payload === false){ if($payload[2] === false){
throw new Exception("The nextPageToken is invalid or has expired!"); throw new Exception("The next page token is invalid or has expired!");
} }
// remove the key after using // remove the key after using successfully
apcu_delete($apcu); apcu_delete($apcu);
return [$payload, $proxy]; return [
$payload[2], // data
$payload[1] // proxy
];
} }
} }

View File

@ -290,30 +290,24 @@ class proxy{
if(isset($headers["content-type"])){ if(isset($headers["content-type"])){
if($headers["content-type"] == "text/html"){ if(stripos($headers["content-type"], "text/html") !== false){
throw new Exception("Server returned an html document instead of image"); throw new Exception("Server returned html");
} }
$tmp = explode(";", $headers["content-type"]); if(
preg_match(
for($i=0; $i<count($tmp); $i++){ '/image\/([^ ]+)/i',
$headers["content-type"],
$match
)
){
if( $format = strtolower($match[1]);
preg_match(
'/^image\/([^ ]+)/i', if(substr(strtolower($format), 0, 2) == "x-"){
$tmp[$i],
$match
)
){
$format = strtolower($match[1]); $format = substr($format, 2);
if(substr($format, 0, 2) == "x-"){
$format = substr($format, 2);
}
break;
} }
} }
} }
@ -351,6 +345,8 @@ class proxy{
private function stream($url, $referer, $format){ private function stream($url, $referer, $format){
$this->clientcache();
$this->url = $url; $this->url = $url;
$this->format = $format; $this->format = $format;
@ -360,8 +356,6 @@ class proxy{
throw new Exception("Invalid URL"); throw new Exception("Invalid URL");
} }
$this->clientcache();
$curl = curl_init(); $curl = curl_init();
// set headers // set headers
@ -490,11 +484,14 @@ class proxy{
// get content type // get content type
if(isset($this->headers["content-type"])){ if(isset($this->headers["content-type"])){
$filetype = explode("/", $this->headers["content-type"]); $octet_check = stripos($this->headers["content-type"], "octet-stream");
if(strtolower($filetype[0]) != $this->format){ if(
stripos($this->headers["content-type"], $this->format) === false &&
$octet_check === false
){
throw new Exception("Resource is not an {$this->format} (Found {$filetype[0]} instead)"); throw new Exception("Resource reported invalid Content-Type");
} }
}else{ }else{
@ -502,6 +499,18 @@ class proxy{
throw new Exception("Resource is not an {$this->format} (no Content-Type)"); throw new Exception("Resource is not an {$this->format} (no Content-Type)");
} }
$filetype = explode("/", $this->headers["content-type"]);
if(!isset($filetype[1])){
throw new Exception("Malformed Content-Type header");
}
if($octet_check !== false){
$filetype[1] = "jpeg";
}
header("Content-Type: {$this->format}/{$filetype[1]}"); header("Content-Type: {$this->format}/{$filetype[1]}");
// give payload size // give payload size
@ -541,7 +550,7 @@ class proxy{
if(isset($filename[1])){ if(isset($filename[1])){
header("Content-Disposition: filename=" . $filename[1] . "." . $filetype); header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\"");
return; return;
} }
} }
@ -552,7 +561,7 @@ class proxy{
if($filename === null){ if($filename === null){
// everything failed! rename file to domain name // everything failed! rename file to domain name
header("Content-Disposition: filename=" . parse_url($url, PHP_URL_HOST) . "." . $filetype); header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\"");
return; return;
} }
@ -569,7 +578,7 @@ class proxy{
$filename = implode(".", $filename); $filename = implode(".", $filename);
header("Content-Disposition: inline; filename=" . $filename . "." . $filetype); header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\"");
return; return;
} }

View File

@ -923,6 +923,7 @@ class frontend{
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex", "yandex" => "Yandex",
"google" => "Google", "google" => "Google",
"qwant" => "Qwant",
"yep" => "Yep", "yep" => "Yep",
"crowdview" => "Crowdview", "crowdview" => "Crowdview",
"mwmbl" => "Mwmbl", "mwmbl" => "Mwmbl",
@ -942,6 +943,7 @@ class frontend{
"yandex" => "Yandex", "yandex" => "Yandex",
"brave" => "Brave", "brave" => "Brave",
"google" => "Google", "google" => "Google",
"qwant" => "Qwant",
"yep" => "Yep", "yep" => "Yep",
//"pinterest" => "Pinterest", //"pinterest" => "Pinterest",
"imgur" => "Imgur", "imgur" => "Imgur",
@ -959,7 +961,8 @@ class frontend{
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex", "yandex" => "Yandex",
"google" => "Google" "google" => "Google",
"qwant" => "Qwant"
] ]
]; ];
break; break;
@ -971,6 +974,7 @@ class frontend{
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
"google" => "Google", "google" => "Google",
"qwant" => "Qwant",
"yep" => "Yep", "yep" => "Yep",
"mojeek" => "Mojeek" "mojeek" => "Mojeek"
] ]
@ -1010,98 +1014,8 @@ class frontend{
$scraper_out = $first; $scraper_out = $first;
} }
switch($scraper_out){ include "scraper/$scraper_out.php";
$lib = new $scraper_out();
case "ddg":
include "scraper/ddg.php";
$lib = new ddg();
break;
case "brave":
include "scraper/brave.php";
$lib = new brave();
break;
case "yt";
include "scraper/youtube.php";
$lib = new youtube();
break;
case "yandex":
include "scraper/yandex.php";
$lib = new yandex();
break;
case "google":
include "scraper/google.php";
$lib = new google();
break;
/*
case "fb":
include "scraper/facebook.php";
$lib = new facebook();
break;*/
case "crowdview":
include "scraper/crowdview.php";
$lib = new crowdview();
break;
case "mwmbl":
include "scraper/mwmbl.php";
$lib = new mwmbl();
break;
case "mojeek":
include "scraper/mojeek.php";
$lib = new mojeek();
break;
case "marginalia":
include "scraper/marginalia.php";
$lib = new marginalia();
break;
case "wiby":
include "scraper/wiby.php";
$lib = new wiby();
break;
case "curlie":
include "scraper/curlie.php";
$lib = new curlie();
break;
case "yep":
include "scraper/yep.php";
$lib = new yep();
break;
case "sc":
include "scraper/sc.php";
$lib = new sc();
break;
case "spotify":
include "scraper/spotify.php";
$lib = new spotify();
break;
case "pinterest":
include "scraper/pinterest.php";
$lib = new pinterest();
break;
case "imgur":
include "scraper/imgur.php";
$lib = new imgur();
break;
case "ftm":
include "scraper/ftm.php";
$lib = new ftm();
break;
}
// set scraper on $_GET // set scraper on $_GET
$_GET["scraper"] = $scraper_out; $_GET["scraper"] = $scraper_out;

View File

@ -24,13 +24,36 @@ try{
} }
// bing request, ask bing to resize and stream to browser // bing request, ask bing to resize and stream to browser
$image = parse_url($_GET["i"]);
if( if(
isset($image["host"]) &&
preg_match( preg_match(
'/bing.net$/', '/^[A-z0-9.]*bing\.(net|com)$/i',
parse_url($_GET["i"], PHP_URL_HOST) $image["host"]
) )
){ ){
if(
!isset($image["query"]) ||
!isset($image["path"]) ||
$image["path"] != "/th"
){
header("X-Error: Invalid bing image path");
$proxy->do404();
die();
}
parse_str($image["query"], $str);
if(!isset($str["id"])){
header("X-Error: Missing bing ID");
$proxy->do404();
die();
}
switch($_GET["s"]){ switch($_GET["s"]){
case "portrait": $req = "&w=50&h=90&p=0&qlt=90"; break; case "portrait": $req = "&w=50&h=90&p=0&qlt=90"; break;
@ -40,7 +63,7 @@ try{
case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break; case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break;
} }
$proxy->stream_linear_image($_GET["i"] . $req, "https://bing.net"); $proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . urlencode($str["id"]) . $req, "https://www.bing.com");
die(); die();
} }

View File

@ -3,78 +3,103 @@
class marginalia{ class marginalia{
public function __construct(){ public function __construct(){
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php"; include "lib/backend.php";
$this->backend = new backend("marginalia"); $this->backend = new backend("marginalia");
} }
public function getfilters($page){ public function getfilters($page){
switch($page){ if(config::MARGINALIA_API_KEY === null){
case "web": $base = [
return [ "adtech" => [
"profile" => [ "display" => "Reduce adtech",
"display" => "Profile", "option" => [
"option" => [ "no" => "No",
"any" => "Default", "yes" => "Yes"
"modern" => "Modern"
]
],
"format" => [
"display" => "Format",
"option" => [
"any" => "Any",
"html5" => "html5",
"xhtml" => "xhtml",
"html123" => "html123"
]
],
"file" => [
"display" => "File",
"option" => [
"any" => "Any",
"nomedia" => "Deny media",
"media" => "Contains media",
"audio" => "Contains audio",
"video" => "Contains video",
"archive" => "Contains archive",
"document" => "Contains document"
]
],
"javascript" => [
"display" => "Javascript",
"option" => [
"any" => "Allow JS",
"deny" => "Deny JS",
"require" => "Require JS"
]
],
"trackers" => [
"display" => "Trackers",
"option" => [
"any" => "Allow trackers",
"deny" => "Deny trackers",
"require" => "Require trackers"
]
],
"cookies" => [
"display" => "Cookies",
"option" => [
"any" => "Allow cookies",
"deny" => "Deny cookies",
"require" => "Require cookies"
]
],
"affiliate" => [
"display" => "Affiliate links in body",
"option" => [
"any" => "Allow affiliate links",
"deny" => "Deny affiliate links",
"require" => "Require affiliate links"
]
] ]
]; ],
"recent" => [
"display" => "Recent results",
"option" => [
"no" => "No",
"yes" => "Yes"
]
],
"intitle" => [
"display" => "Search in title",
"option" => [
"no" => "No",
"yes" => "Yes"
]
]
];
}else{
$base = [];
} }
return array_merge(
$base,
[
"format" => [
"display" => "Format",
"option" => [
"any" => "Any format",
"html5" => "html5",
"xhtml" => "xhtml",
"html123" => "html123"
]
],
"file" => [
"display" => "Filetype",
"option" => [
"any" => "Any filetype",
"nomedia" => "Deny media",
"media" => "Contains media",
"audio" => "Contains audio",
"video" => "Contains video",
"archive" => "Contains archive",
"document" => "Contains document"
]
],
"javascript" => [
"display" => "Javascript",
"option" => [
"any" => "Allow JS",
"deny" => "Deny JS",
"require" => "Require JS"
]
],
"trackers" => [
"display" => "Trackers",
"option" => [
"any" => "Allow trackers",
"deny" => "Deny trackers",
"require" => "Require trackers"
]
],
"cookies" => [
"display" => "Cookies",
"option" => [
"any" => "Allow cookies",
"deny" => "Deny cookies",
"require" => "Require cookies"
]
],
"affiliate" => [
"display" => "Affiliate links in body",
"option" => [
"any" => "Allow affiliate links",
"deny" => "Deny affiliate links",
"require" => "Require affiliate links"
]
]
]
);
} }
private function get($proxy, $url, $get = []){ private function get($proxy, $url, $get = []){
@ -132,7 +157,6 @@ class marginalia{
throw new Exception("Search term is empty!"); throw new Exception("Search term is empty!");
} }
$profile = $get["profile"];
$format = $get["format"]; $format = $get["format"];
$file = $get["file"]; $file = $get["file"];
@ -180,38 +204,6 @@ class marginalia{
$search = implode(" ", $search); $search = implode(" ", $search);
$params = [
"count" => 20
];
if($profile == "modern"){
$params["index"] = 1;
}
try{
$json =
$this->get(
$this->backend->get_ip(), // no nextpage
"https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
$params
);
}catch(Exception $error){
throw new Exception("Failed to get JSON");
}
if($json == "Slow down"){
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
}
$json = json_decode($json, true);
/*
$handle = fopen("scraper/marginalia.json", "r");
$json = json_decode(fread($handle, filesize("scraper/marginalia.json")), true);
fclose($handle);*/
$out = [ $out = [
"status" => "ok", "status" => "ok",
"spelling" => [ "spelling" => [
@ -228,19 +220,169 @@ class marginalia{
"related" => [] "related" => []
]; ];
foreach($json["results"] as $result){ if(config::MARGINALIA_API_KEY !== null){
try{
$json =
$this->get(
$this->backend->get_ip(), // no nextpage
"https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
[
"count" => 20
]
);
}catch(Exception $error){
throw new Exception("Failed to get JSON");
}
if($json == "Slow down"){
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
}
$json = json_decode($json, true);
foreach($json["results"] as $result){
$out["web"][] = [
"title" => $result["title"],
"description" => str_replace("\n", " ", $result["description"]),
"url" => $result["url"],
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
// no more cloudflare!! Parse html by default
$params = [
"query" => $search
];
foreach(["adtech", "recent", "intitle"] as $v){
if($get[$v] == "yes"){
switch($v){
case "adtech": $params["adtech"] = "reduce"; break;
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break;
}
}
}
try{
$html =
$this->get(
$this->backend->get_ip(),
"https://search.marginalia.nu/search",
$params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML");
}
$this->fuckhtml->load($html);
$sections =
$this->fuckhtml
->getElementsByClassName(
"card search-result",
"section"
);
foreach($sections as $section){
$this->fuckhtml->load($section);
$title =
$this->fuckhtml
->getElementsByClassName(
"title",
"a"
)[0];
$description =
$this->fuckhtml
->getElementsByClassName(
"description",
"p"
);
if(count($description) !== 0){
$description =
$this->fuckhtml
->getTextContent(
$description[0]
);
}else{
$description = null;
}
$sublinks = [];
$sublink_html =
$this->fuckhtml
->getElementsByClassName("additional-results");
if(count($sublink_html) !== 0){
$this->fuckhtml->load($sublink_html[0]);
$links =
$this->fuckhtml
->getElementsByTagName("a");
foreach($links as $link){
$sublinks[] = [
"title" =>
$this->fuckhtml
->getTextContent(
$link
),
"date" => null,
"description" => null,
"url" =>
$this->fuckhtml
->getTextContent(
$link["attributes"]["href"]
)
];
}
}
$out["web"][] = [ $out["web"][] = [
"title" => $result["title"], "title" =>
"description" => str_replace("\n", " ", $result["description"]), $this->fuckhtml
"url" => $result["url"], ->getTextContent(
$title
),
"description" => $description,
"url" =>
$this->fuckhtml
->getTextContent(
$title["attributes"]["href"]
),
"date" => null, "date" => null,
"type" => "web", "type" => "web",
"thumb" => [ "thumb" => [
"url" => null, "url" => null,
"ratio" => null "ratio" => null
], ],
"sublink" => [], "sublink" => $sublinks,
"table" => [] "table" => []
]; ];
} }

View File

@ -4,11 +4,8 @@ class pinterest{
public function __construct(){ public function __construct(){
include "lib/nextpage.php"; include "lib/backend.php";
$this->nextpage = new nextpage("pinterest"); $this->backend = new backend("pinterest");
include "lib/proxy_pool.php";
$this->proxy = new proxy_pool("pinterest");
} }
public function getfilters($page){ public function getfilters($page){

893
scraper/qwant.php Normal file
View File

@ -0,0 +1,893 @@
<?php
class qwant{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("qwant");
}
public function getfilters($page){
$base = [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
],
"country" => [
"display" => "Country",
"option" => [
"en_US" => "United States",
"fr_FR" => "France",
"en_GB" => "Great Britain",
"de_DE" => "Germany",
"it_IT" => "Italy",
"es_AR" => "Argentina",
"en_AU" => "Australia",
"es_ES" => "Spain (es)",
"ca_ES" => "Spain (ca)",
"cs_CZ" => "Czech Republic",
"ro_RO" => "Romania",
"el_GR" => "Greece",
"zh_CN" => "China",
"zh_HK" => "Hong Kong",
"en_NZ" => "New Zealand",
"fr_FR" => "France",
"th_TH" => "Thailand",
"ko_KR" => "South Korea",
"sv_SE" => "Sweden",
"nb_NO" => "Norway",
"da_DK" => "Denmark",
"hu_HU" => "Hungary",
"et_EE" => "Estonia",
"es_MX" => "Mexico",
"es_CL" => "Chile",
"en_CA" => "Canada (en)",
"fr_CA" => "Canada (fr)",
"en_MY" => "Malaysia",
"bg_BG" => "Bulgaria",
"fi_FI" => "Finland",
"pl_PL" => "Poland",
"nl_NL" => "Netherlands",
"pt_PT" => "Portugal",
"de_CH" => "Switzerland (de)",
"fr_CH" => "Switzerland (fr)",
"it_CH" => "Switzerland (it)",
"de_AT" => "Austria",
"fr_BE" => "Belgium (fr)",
"nl_BE" => "Belgium (nl)",
"en_IE" => "Ireland",
"he_IL" => "Israel"
]
]
];
switch($page){
case "web":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"extendedsearch" => [
// no display, wont show in interface
"option" => [
"yes" => "Yes",
"no" => "No"
]
]
]
);
break;
case "images":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"size" => [
"display" => "Size",
"option" => [
"any" => "Any size",
"large" => "Large",
"medium" => "Medium",
"small" => "Small"
]
],
"color" => [
"display" => "Color",
"option" => [
"any" => "Any color",
"coloronly" => "Color only",
"monochrome" => "Monochrome",
"black" => "Black",
"brown" => "Brown",
"gray" => "Gray",
"white" => "White",
"yellow" => "Yellow",
"orange" => "Orange",
"red" => "Red",
"pink" => "Pink",
"purple" => "Purple",
"blue" => "Blue",
"teal" => "Teal",
"green" => "Green"
]
],
"imagetype" => [
"display" => "Type",
"option" => [
"any" => "Any type",
"animatedgif" => "Animated GIF",
"photo" => "Photograph",
"transparent" => "Transparent"
]
],
"license" => [
"display" => "License",
"option" => [
"any" => "Any license",
"share" => "Non-commercial reproduction and sharing",
"sharecommercially" => "Reproduction and sharing",
"modify" => "Non-commercial reproduction, sharing and modification",
"modifycommercially" => "Reproduction, sharing and modification",
"public" => "Public domain"
]
]
]
);
break;
case "videos":
$base = array_merge(
$base,
[
"order" => [
"display" => "Order by",
"option" => [
"relevance" => "Relevance",
"views" => "Views",
"date" => "Most recent",
]
],
"source" => [
"display" => "Source",
"option" => [
"any" => "Any source",
"youtube" => "YouTube",
"dailymotion" => "Dailymotion",
]
]
]
);
break;
case "news":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"hour" => "Less than 1 hour ago",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"order" => [
"display" => "Order by",
"option" => [
"relevance" => "Relevance",
"date" => "Most recent"
]
]
]
);
break;
}
return $base;
}
private function get($proxy, $url, $get = []){
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: application/json, text/plain, */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Origin: https://www.qwant.com",
"Referer: https://www.qwant.com/",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"TE: trailers"
];
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
// Bypass HTTP/2 check
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
// get next page data
[$params, $proxy] = $this->backend->get($get["npt"], "web");
$params = json_decode($params, true);
}else{
// get _GET data instead
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
if(strlen($search) > 2048){
throw new Exception("Search term is too long!");
}
$proxy = $this->backend->get_ip();
$params = [
"q" => $search,
"freshness" => $get["time"],
"count" => 10,
"locale" => $get["country"],
"offset" => 0,
"device" => "desktop",
"tgp" => 3,
"safesearch" => 0,
"displayed" => "true"
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
}
/*
$handle = fopen("scraper/qwant_web.json", "r");
$json = fread($handle, filesize("scraper/qwant_web.json"));
fclose($handle);*/
try{
$json =
$this->get(
$proxy,
"https://fdn.qwant.com/v3/search/web",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
$json = json_decode($json, true);
if($json === NULL){
throw new Exception("Failed to decode JSON");
}
if(isset($json["data"]["message"][0])){
throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]);
}
if($json["status"] != "success"){
if($json["data"]["error_code"] === 5){
return $out;
}
throw new Exception("Server returned an error code: " . $json["data"]["error_code"]);
}
if(!isset($json["data"]["result"]["items"]["mainline"])){
throw new Exception("Server did not return a result object");
}
// data is OK, parse
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// get instant answer
if(
$get["extendedsearch"] == "yes" &&
isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"])
){
try{
$answer =
$this->get(
$proxy,
"https://api.qwant.com/v3" .
$json["data"]["result"]["items"]["sidebar"][0]["endpoint"],
[]
);
$answer = json_decode($answer, true);
if(
$answer === null ||
$answer["status"] != "success" ||
$answer["data"]["result"] === null
){
throw new Exception();
}
// parse answer
$out["answer"][] = [
"title" => $answer["data"]["result"]["title"],
"description" => [
[
"type" => "text",
"value" => $this->trimdots($answer["data"]["result"]["description"])
]
],
"url" => $answer["data"]["result"]["url"],
"thumb" =>
$answer["data"]["result"]["thumbnail"]["landscape"] == null ?
null :
$this->unshitimage(
$answer["data"]["result"]["thumbnail"]["landscape"],
false
),
"table" => [],
"sublink" => []
];
}catch(Exception $error){
// do nothing in case of failure
}
}
// get word correction
if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){
$out["spelling"] = [
"type" => "including",
"using" => $json["data"]["query"]["queryContext"]["alteredQuery"],
"correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"]
];
}
// check for next page
if($json["data"]["result"]["lastPage"] === false){
$params["offset"] = $params["offset"] + 10;
$out["npt"] =
$this->backend->store(
json_encode($params),
"web",
$proxy
);
}
// parse results
foreach($json["data"]["result"]["items"]["mainline"] as $item){
switch($item["type"]){ // ignores ads
case "web":
foreach($item["items"] as $result){
if(isset($result["thumbnailUrl"])){
$thumb = [
"url" => $this->unshitimage($result["thumbnailUrl"]),
"ratio" => "16:9"
];
}else{
$thumb = [
"url" => null,
"ratio" => null
];
}
$sublinks = [];
if(isset($result["links"])){
foreach($result["links"] as $link){
$sublinks[] = [
"title" => $this->trimdots($link["title"]),
"date" => null,
"description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null,
"url" => $link["url"]
];
}
}
$out["web"][] = [
"title" => $this->trimdots($result["title"]),
"description" => $this->trimdots($result["desc"]),
"url" => $result["url"],
"date" => null,
"type" => "web",
"thumb" => $thumb,
"sublink" => $sublinks,
"table" => []
];
}
break;
case "images":
foreach($item["items"] as $image){
$out["image"][] = [
"title" => $image["title"],
"source" => [
[
"url" => $image["media"],
"width" => (int)$image["width"],
"height" => (int)$image["height"]
],
[
"url" => $this->unshitimage($image["thumbnail"]),
"width" => $image["thumb_width"],
"height" => $image["thumb_height"]
]
],
"url" => $image["url"]
];
}
break;
case "videos":
foreach($item["items"] as $video){
$out["video"][] = [
"title" => $video["title"],
"description" => null,
"date" => (int)$video["date"],
"duration" => $video["duration"] === null ? null : $video["duration"] / 1000,
"views" => null,
"thumb" =>
$video["thumbnail"] === null ?
[
"url" => null,
"ratio" => null,
] :
[
"url" => $this->unshitimage($video["thumbnail"]),
"ratio" => "16:9",
],
"url" => $video["url"]
];
}
break;
case "related_searches":
foreach($item["items"] as $related){
$out["related"][] = $related["text"];
}
break;
}
}
return $out;
}
public function image($get){
if($get["npt"]){
[$params, $proxy] =
$this->backend->get(
$get["npt"],
"images"
);
$params = json_decode($params, true);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$params = [
"t" => "images",
"q" => $search,
"count" => 125,
"locale" => $get["country"],
"offset" => 0, // increment by 125
"device" => "desktop",
"tgp" => 3
];
if($get["time"] != "any"){
$params["freshness"] = $get["time"];
}
foreach(["size", "color", "imagetype", "license"] as $p){
if($get[$p] != "any"){
$params[$p] = $get[$p];
}
}
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
}
try{
$json = $this->get(
$proxy,
"https://api.qwant.com/v3/search/images",
$params,
);
}catch(Exception $err){
throw new Exception("Failed to get JSON");
}
/*
$handle = fopen("scraper/yandex.json", "r");
$json = fread($handle, filesize("scraper/yandex.json"));
fclose($handle);*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if($json["status"] != "success"){
throw new Exception("Qwant returned an API error");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if($json["data"]["result"]["lastPage"] === false){
$params["offset"] = $params["offset"] + 125;
$out["npt"] = $this->backend->store(
json_encode($params),
"images",
$proxy
);
}
foreach($json["data"]["result"]["items"] as $image){
$out["image"][] = [
"title" => $this->trimdots($image["title"]),
"source" => [
[
"url" => $image["media"],
"width" => $image["width"],
"height" => $image["height"]
],
[
"url" => $this->unshitimage($image["thumbnail"]),
"width" => $image["thumb_width"],
"height" => $image["thumb_height"]
]
],
"url" => $image["url"]
];
}
return $out;
}
public function video($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$params = [
"t" => "videos",
"q" => $search,
"count" => 50,
"locale" => $get["country"],
"offset" => 0, // dont implement pagination
"device" => "desktop",
"tgp" => 3
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
try{
$json =
$this->get(
$this->backend->get_ip(),
"https://api.qwant.com/v3/search/videos",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
if($json["status"] != "success"){
throw new Exception("Qwant returned an API error");
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
foreach($json["data"]["result"]["items"] as $video){
if(empty($video["thumbnail"])){
$thumb = [
"url" => null,
"ratio" => null
];
}else{
$thumb = [
"url" => $this->unshitimage($video["thumbnail"], false),
"ratio" => "16:9"
];
}
$duration = (int)$video["duration"];
$out["video"][] = [
"title" => $video["title"],
"description" => $this->limitstrlen($video["desc"]),
"author" => [
"name" => $video["channel"],
"url" => null,
"avatar" => null
],
"date" => (int)$video["date"],
"duration" => $duration === 0 ? null : $duration,
"views" => null,
"thumb" => $thumb,
"url" => preg_replace("/\?syndication=.+/", "", $video["url"])
];
}
return $out;
}
public function news($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$params = [
"t" => "news",
"q" => $search,
"count" => 50,
"locale" => $get["country"],
"offset" => 0, // dont implement pagination
"device" => "desktop",
"tgp" => 3
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
try{
$json =
$this->get(
$this->backend->get_ip(),
"https://api.qwant.com/v3/search/news",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
if($json["status"] != "success"){
throw new Exception("Qwant returned an API error");
}
$out = [
"status" => "ok",
"npt" => null,
"news" => []
];
foreach($json["data"]["result"]["items"] as $news){
if(empty($news["media"][0]["pict_big"]["url"])){
$thumb = [
"url" => null,
"ratio" => null
];
}else{
$thumb = [
"url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false),
"ratio" => "16:9"
];
}
$out["news"][] = [
"title" => $news["title"],
"author" => $news["press_name"],
"description" => $this->trimdots($news["desc"]),
"date" => (int)$news["date"],
"thumb" => $thumb,
"url" => $news["url"]
];
}
return $out;
}
private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0];
}
private function trimdots($text){
return trim($text, ". ");
}
private function unshitimage($url, $is_bing = true){
// https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
parse_str(parse_url($url)["query"], $parts);
if($is_bing){
$parse = parse_url($parts["u"]);
parse_str($parse["query"], $parts);
return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
}
return $parts["u"];
}
}

View File

@ -70,7 +70,7 @@ class sc{
return $data; return $data;
} }
public function music($get){ public function music($get, $last_attempt = false){
if($get["npt"]){ if($get["npt"]){
@ -108,6 +108,7 @@ class sc{
$type = $get["type"]; $type = $get["type"];
$proxy = $this->backend->get_ip(); $proxy = $this->backend->get_ip();
$token = $this->get_token($proxy);
switch($type){ switch($type){
@ -117,12 +118,11 @@ class sc{
"q" => $search, "q" => $search,
"variant_ids" => "", "variant_ids" => "",
"facet" => "model", "facet" => "model",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -133,12 +133,11 @@ class sc{
"q" => $search, "q" => $search,
"variant_ids" => "", "variant_ids" => "",
"facet_genre" => "", "facet_genre" => "",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -149,12 +148,11 @@ class sc{
"q" => $search, "q" => $search,
"variant_ids" => "", "variant_ids" => "",
"facet" => "place", "facet" => "place",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -165,12 +163,11 @@ class sc{
"q" => $search, "q" => $search,
"variant_ids" => "", "variant_ids" => "",
"facet" => "genre", "facet" => "genre",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -181,12 +178,11 @@ class sc{
"q" => $search, "q" => $search,
"variant_ids" => "", "variant_ids" => "",
"facet" => "genre", "facet" => "genre",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -198,12 +194,11 @@ class sc{
"variant_ids" => "", "variant_ids" => "",
"filter.content_tier" => "SUB_HIGH_TIER", "filter.content_tier" => "SUB_HIGH_TIER",
"facet" => "genre", "facet" => "genre",
"user_id" => config::SC_USER_ID, "client_id" => $token,
"client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20, "limit" => 20,
"offset" => 0, "offset" => 0,
"linked_partitioning" => 1, "linked_partitioning" => 1,
"app_version" => 1696577813, "app_version" => 1713542117,
"app_locale" => "en" "app_locale" => "en"
]; ];
break; break;
@ -229,7 +224,14 @@ class sc{
if($json === null){ if($json === null){
throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?"); if($last_attempt === true){
throw new Exception("Fetched an invalid token (please report!!)");
}
// token might've expired, get a new one and re-try search
get_token($proxy);
return $this->music($get, true);
} }
$out = [ $out = [
@ -352,7 +354,7 @@ class sc{
"endpoint" => "sc", "endpoint" => "sc",
"url" => "url" =>
$item["media"]["transcodings"][0]["url"] . $item["media"]["transcodings"][0]["url"] .
"?client_id=" . config::SC_CLIENT_TOKEN . "?client_id=" . $token .
"&track_authorization=" . "&track_authorization=" .
$item["track_authorization"] $item["track_authorization"]
]; ];
@ -390,6 +392,37 @@ class sc{
return $out; return $out;
} }
public function get_token($proxy){
$token = apcu_fetch("sc_token");
if($token === false){
$js =
$this->get(
$proxy,
"https://a-v2.sndcdn.com/assets/1-c3e4038d.js",
[]
);
preg_match(
'/client_id=([^"]+)/',
$js,
$token
);
if(!isset($token[1])){
throw new Exception("Failed to get search token");
}
apcu_store("sc_token", $token[1]);
return $token[1];
}
return $token;
}
private function limitstrlen($text){ private function limitstrlen($text){
return return

View File

@ -209,7 +209,7 @@ class wiby{
$out["web"][] = [ $out["web"][] = [
"title" => $this->unescapehtml(trim($links[2][$i])), "title" => $this->unescapehtml(trim($links[2][$i])),
"description" => $this->unescapehtml(trim(strip_tags($links[3][$i]))), "description" => $this->unescapehtml(trim(strip_tags($links[3][$i]), ".\n\r ")),
"url" => trim($links[1][$i]), "url" => trim($links[1][$i]),
"date" => null, "date" => null,
"type" => "web", "type" => "web",

View File

@ -644,6 +644,11 @@ class yandex{
$json = json_decode($json, true); $json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if( if(
isset($json["type"]) && isset($json["type"]) &&
$json["type"] == "captcha" $json["type"] == "captcha"
@ -652,11 +657,6 @@ class yandex{
throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes."); throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
} }
if($json === null){
throw new Exception("Failed to decode JSON");
}
$out = [ $out = [
"status" => "ok", "status" => "ok",
"npt" => null, "npt" => null,

View File

@ -22,10 +22,10 @@ var list = [];
var pinged_list = []; var pinged_list = [];
var reqs = 0; var reqs = 0;
var errors = 0; var errors = 0;
var sort = 0; // lower ping first var sort = 6; // highest version first
// check for instance redirect stuff // check for instance redirect stuff
var redir = ""; var redir = [];
var target = "/web?"; var target = "/web?";
new URL(window.location.href) new URL(window.location.href)
.searchParams .searchParams
@ -39,12 +39,16 @@ new URL(window.location.href)
} }
if(key == "npt"){ return; } if(key == "npt"){ return; }
redir += encodeURIComponent(key) + "=" + encodeURIComponent(value) redir.push(encodeURIComponent(key) + "=" + encodeURIComponent(value))
} }
); );
if(redir != ""){ if(redir.length !== 0){
redir = target + redir;
redir = target + redir.join("&");
}else{
redir = "";
} }
var quote = document.createElement("div"); var quote = document.createElement("div");
@ -61,14 +65,13 @@ var table = document.createElement("table");
table.innerHTML = table.innerHTML =
'<thead>' + '<thead>' +
'<tr>' + '<tr>' +
'<th><div class="arrow up"></div>Ping</th>' +
'<th class="extend">Server</th>' + '<th class="extend">Server</th>' +
'<th>Address</th>' + '<th>Address</th>' +
'<th>Bot protection</th>' + '<th>Bot protection</th>' +
'<th title="Amount of legit requests processed since the last APCU cache clear (usually happens at midnight)">Real reqs (?)</th>' + '<th title="Amount of legit requests processed since the last APCU cache clear (usually happens at midnight)">Real reqs (?)</th>' +
'<th title="Amount of filtered requests processed since the last APCU cache clear (usually happens at midnight)">Bot reqs (?)</th>' + '<th title="Amount of filtered requests processed since the last APCU cache clear (usually happens at midnight)">Bot reqs (?)</th>' +
'<th>API</th>' + '<th>API</th>' +
'<th>Version</th>' + '<th><div class="arrow up"></div>Version</th>' +
'</tr>' + '</tr>' +
'</thead>' + '</thead>' +
'<tbody></tbody>'; '<tbody></tbody>';
@ -118,14 +121,13 @@ for(var i=0; i<th.length; i++){
switch(div.textContent.toLowerCase()){ switch(div.textContent.toLowerCase()){
case "ping": sort = orientation; break; case "server": sort = 0 + orientation; break;
case "server": sort = 2 + orientation; break; case "address": sort = 2 + orientation; break;
case "address": sort = 4 + orientation; break; case "bot protection": sort = 4 + orientation; break;
case "bot protection": sort = 6 + orientation; break; case "real reqs (?)": sort = 6 + orientation; break;
case "real reqs (?)": sort = 8 + orientation; break; case "bot reqs (?)": sort = 8 + orientation; break;
case "bot reqs (?)": sort = 10 + orientation; break; case "api": sort = 10 + orientation; break;
case "api": sort = 12 + orientation; break; case "version": sort = 12 + orientation; break;
case "version": sort = 14 + orientation; break;
} }
render_list(); render_list();
@ -160,16 +162,6 @@ function number_format(int){
return new Intl.NumberFormat().format(int); return new Intl.NumberFormat().format(int);
} }
window.fetch = (function(fetch) {
return function(fn, t){
const begin = Date.now();
return fetch.apply(this, arguments).then(function(response) {
response.ping = Date.now() - begin;
return response;
});
};
})(window.fetch);
// parse initial server list // parse initial server list
fetch_server(window.location.origin); fetch_server(window.location.origin);
@ -188,7 +180,6 @@ async function fetch_server(server){
if(list[i] == server){ if(list[i] == server){
// serber was already fetched // serber was already fetched
console.info("Already checked server: " + server);
return; return;
} }
} }
@ -200,9 +191,7 @@ async function fetch_server(server){
try{ try{
var payload = await fetch( var payload = await fetch(server + "/ami4get");
server + "/ami4get"
);
if(payload.status !== 200){ if(payload.status !== 200){
@ -214,7 +203,6 @@ async function fetch_server(server){
} }
data = await payload.json(); data = await payload.json();
data.server.ping = payload.ping;
}catch(error){ }catch(error){
@ -316,41 +304,36 @@ function render_list(){
case 0: case 0:
case 1: case 1:
sorted_list = sorta(pinged_list, "ping", filter === true ? false : true); sorted_list = textsort(pinged_list, "name", filter === true ? false : true);
break; break;
case 2: case 2:
case 3: case 3:
sorted_list = textsort(pinged_list, "name", filter === true ? false : true); sorted_list = textsort(pinged_list, "ip", filter === true ? false : true);
break; break;
case 4: case 4:
case 5: case 5:
sorted_list = textsort(pinged_list, "ip", filter === true ? false : true); sorted_list = sorta(pinged_list, "bot_protection", filter === true ? false : true);
break; break;
case 6: case 6:
case 7: case 7:
sorted_list = sorta(pinged_list, "bot_protection", filter === true ? false : true); sorted_list = sorta(pinged_list, "real_requests", filter);
break; break;
case 8: case 8:
case 9: case 9:
sorted_list = sorta(pinged_list, "real_requests", filter); sorted_list = sorta(pinged_list, "bot_requests", filter);
break; break;
case 10: case 10:
case 11: case 11:
sorted_list = sorta(pinged_list, "bot_requests", filter); sorted_list = sorta(pinged_list, "api_enabled", filter);
break; break;
case 12: case 12:
case 13: case 13:
sorted_list = sorta(pinged_list, "api_enabled", filter);
break;
case 14:
case 15:
sorted_list = sorta(pinged_list, "version", filter); sorted_list = sorta(pinged_list, "version", filter);
break; break;
} }
@ -362,32 +345,16 @@ function render_list(){
html += '<tr onclick="show_server(' + sorted_list[k].index + ');">'; html += '<tr onclick="show_server(' + sorted_list[k].index + ');">';
for(var i=0; i<8; i++){ for(var i=0; i<7; i++){
html += '<td'; html += '<td';
switch(i){ switch(i){
case 0: // server ping
if(sorted_list[k].server.ping <= 100){
html += '><span style="color:var(--green);">' + sorted_list[k].server.ping + '</span>';
break;
}
if(sorted_list[k].server.ping <= 200){
html += '><span style="color:var(--yellow);">' + sorted_list[k].server.ping + '</span>';
break;
}
html += '><span style="color:var(--red);">' + number_format(sorted_list[k].server.ping) + '</span>';
break;
// server name // server name
case 1: html += ' class="extend">' + htmlspecialchars(sorted_list[k].server.name); break; case 0: html += ' class="extend">' + htmlspecialchars(sorted_list[k].server.name); break;
case 2: html += '>' + htmlspecialchars(new URL(sorted_list[k].server.ip).host); break; case 1: html += '>' + htmlspecialchars(new URL(sorted_list[k].server.ip).host); break;
case 3: // bot protection case 2: // bot protection
switch(sorted_list[k].server.bot_protection){ switch(sorted_list[k].server.bot_protection){
case 0: case 0:
@ -407,15 +374,15 @@ function render_list(){
} }
break; break;
case 4: // real reqs case 3: // real reqs
html += '>' + number_format(sorted_list[k].server.real_requests); html += '>' + number_format(sorted_list[k].server.real_requests);
break; break;
case 5: // bot reqs case 4: // bot reqs
html += '>' + number_format(sorted_list[k].server.bot_requests); html += '>' + number_format(sorted_list[k].server.bot_requests);
break; break;
case 6: // api enabled case 5: // api enabled
if(sorted_list[k].server.api_enabled){ if(sorted_list[k].server.api_enabled){
@ -427,7 +394,7 @@ function render_list(){
break; break;
// version // version
case 7: html += ">v" + sorted_list[k].server.version; break; case 6: html += ">v" + sorted_list[k].server.version; break;
} }
html += '</td>'; html += '</td>';
@ -436,6 +403,8 @@ function render_list(){
html += '</tr>'; html += '</tr>';
} }
console.log(html);
tbody.innerHTML = html; tbody.innerHTML = html;
} }

View File

@ -38,7 +38,7 @@ This is a metasearch engine that gets results from other engines, and strips awa
Provide users with a privacy oriented, extremely lightweight, ad free, free as in freedom (and free beer!) way to search for documents around the internet, with minimal, optional javascript code. My long term goal would be to build my own index (that doesn't suck) and provide users with an unbiased search engine, with no political inclinations. Provide users with a privacy oriented, extremely lightweight, ad free, free as in freedom (and free beer!) way to search for documents around the internet, with minimal, optional javascript code. My long term goal would be to build my own index (that doesn't suck) and provide users with an unbiased search engine, with no political inclinations.
<a href="#logs"><h2 id="logs">Do you keep logs?</h2></a> <a href="#logs"><h2 id="logs">Do you keep logs?</h2></a>
I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using <div class="code-inline">aes-256-gcm</div> on the serber, for which I give you a key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it's used, whichever comes first.<br><br> I store data temporarly to get the next page of results. This might include search queries, filters and tokens. These parameters are encrypted using <div class="code-inline">libsodium</div> on the serber, for which I give you a decryption key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it's used, whichever comes first.<br><br>
I <b>don't</b> log IP addresses, user agents, or anything else. The <div class="code-inline">npt</div> tokens are the only thing that are stored (in RAM, mind you), temporarly, encrypted. I <b>don't</b> log IP addresses, user agents, or anything else. The <div class="code-inline">npt</div> tokens are the only thing that are stored (in RAM, mind you), temporarly, encrypted.
@ -48,7 +48,7 @@ Your search queries and supplied filters are shared with the scraper you chose (
TL;DR assume those websites can see what you search for, but can't see who you are (unless you're really dumb). TL;DR assume those websites can see what you search for, but can't see who you are (unless you're really dumb).
<a href="#hosting"><h2 id="hosting">Where is this website hosted?</h2></a> <a href="#hosting"><h2 id="hosting">Where is this website hosted?</h2></a>
This website is hosted on a Contabo shitbox in the United States. Please head over to the <a href="/instances">4get instances</a> page, select an instance and click on "IP lookup".
<a href="#keyboard-shortcuts"><h2 id="keyboard-shortcuts">Keyboard shortcuts?</h2></a> <a href="#keyboard-shortcuts"><h2 id="keyboard-shortcuts">Keyboard shortcuts?</h2></a>
Use <div class="code-inline">/</div> to focus the search box.<br><br> Use <div class="code-inline">/</div> to focus the search box.<br><br>