1
0
forked from lolcat/4get
This commit is contained in:
2023-11-07 08:04:56 -05:00
parent 64b090ee05
commit 785452873f
59 changed files with 2592 additions and 1277 deletions

197
lib/backend.php Normal file
View File

@@ -0,0 +1,197 @@
<?php
class backend{
public function __construct($scraper){
$this->scraper = $scraper;
$this->requestid = apcu_inc("real_requests");
}
/*
Proxy stuff
*/
public function get_ip(){
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
if($pool === false){
// we don't want a proxy, fuck off!
return 'raw_ip::::';
}
// indent
$proxy_index_raw = apcu_inc("p." . $this->scraper);
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
$proxylist = explode("\n", $proxylist);
// ignore empty or commented lines
$proxylist = array_filter($proxylist, function($entry){
$entry = ltrim($entry);
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
});
$proxylist = array_values($proxylist);
return $proxylist[$proxy_index_raw % count($proxylist)];
}
// this function is also called directly on nextpage
public function assign_proxy(&$curlproc, $ip){
// parse proxy line
[
$type,
$address,
$port,
$username,
$password
] = explode(":", $ip, 5);
switch($type){
case "raw_ip":
return;
break;
case "http":
case "https":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
break;
case "socks4":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks4a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5_hostname":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
}
if($username != ""){
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
}
}
/*
Next page stuff
*/
public function store($payload, $page, $proxy){
$page = $page[0];
$password = random_bytes(256); // 2048 bit
$salt = random_bytes(16);
$key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
$iv =
random_bytes(
openssl_cipher_iv_length("aes-256-gcm")
);
$tag = "";
$out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
$key = apcu_inc("key", 1);
apcu_store(
$page . "." .
$this->scraper .
$this->requestid,
gzdeflate($proxy . "," . $salt.$iv.$out.$tag),
900 // cache information for 15 minutes blaze it
);
return
$this->scraper . $this->requestid . "." .
rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
}
public function get($npt, $page){
$page = $page[0];
$explode = explode(".", $npt, 2);
if(count($explode) !== 2){
throw new Exception("Malformed nextPageToken!");
}
$apcu = $page . "." . $explode[0];
$key = $explode[1];
$payload = apcu_fetch($apcu);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
$key =
base64_decode(
str_pad(
strtr($key, '-_', '+/'),
strlen($key) % 4,
'=',
STR_PAD_RIGHT
)
);
$payload = gzinflate($payload);
// get proxy
[
$proxy,
$payload
] = explode(",", $payload, 2);
$key =
hash_pbkdf2(
"sha512",
$key,
substr($payload, 0, 16), // salt
20000,
32,
true
);
$ivlen = openssl_cipher_iv_length("aes-256-gcm");
$payload =
openssl_decrypt(
substr(
$payload,
16 + $ivlen,
-16
),
"aes-256-gcm",
$key,
OPENSSL_RAW_DATA,
substr($payload, 16, $ivlen),
substr($payload, -16)
);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
// remove the key after using
apcu_delete($apcu);
return [$payload, $proxy];
}
}

View File

@@ -4,6 +4,19 @@ class captcha{
public function __construct($frontend, $get, $filters, $page, $output){
// check if we want captcha
if(config::BOT_PROTECTION !== 1){
if($output === true){
$frontend->loadheader(
$get,
$filters,
$page
);
}
return;
}
/*
Validate cookie, if it exists
*/
@@ -46,6 +59,7 @@ class captcha{
if($output === false){
http_response_code(429); // too many reqs
echo json_encode([
"status" => "The \"pass\" token in your cookies is missing or has expired!!"
]);
@@ -184,15 +198,6 @@ class captcha{
}
}
/*
Generate random grid data to pass to captcha.php
*/
$dataset = [
["birds", 2263],
["fumo_plushies", 1006],
["minecraft", 848]
];
// get the positions for the answers
// will return between 3 and 6 answer positions
$range = range(0, 15);
@@ -216,17 +221,18 @@ class captcha{
}
// choose a dataset
$choosen = &$dataset[random_int(0, count($dataset) - 1)];
$c = count(config::CAPTCHA_DATASET);
$choosen = config::CAPTCHA_DATASET[random_int(0, $c - 1)];
$choices = [];
for($i=0; $i<count($dataset); $i++){
for($i=0; $i<$c; $i++){
if($dataset[$i][0] == $choosen[0]){
if(config::CAPTCHA_DATASET[$i][0] == $choosen[0]){
continue;
}
$choices[] = $dataset[$i];
$choices[] = config::CAPTCHA_DATASET[$i];
}
// generate grid data

View File

@@ -152,7 +152,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -180,7 +180,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -379,7 +379,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
@@ -395,7 +395,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"User-Agent: " . config::USER_AGENT,
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",

View File

@@ -4,6 +4,41 @@ class frontend{
public function load($template, $replacements = []){
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
$replacements["version"] = config::VERSION;
if(isset($_COOKIE["theme"])){
$theme = str_replace(["/". "."], "", $_COOKIE["theme"]);
if(
$theme != "Dark" &&
!is_file("static/themes/" . $theme . ".css")
){
$theme = config::DEFAULT_THEME;
}
}else{
$theme = config::DEFAULT_THEME;
}
if($theme != "Dark"){
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">';
}else{
$replacements["style"] = "";
}
if(isset($_COOKIE["scraper_ac"])){
$replacements["ac"] = '?ac=' . htmlspecialchars($_COOKIE["scraper_ac"]);
}else{
$replacements["ac"] = '';
}
$handle = fopen("template/{$template}", "r");
$data = fread($handle, filesize("template/{$template}"));
fclose($handle);
@@ -29,30 +64,6 @@ class frontend{
return trim($html);
}
public function getthemeclass($raw = true){
if(
isset($_COOKIE["theme"]) &&
$_COOKIE["theme"] == "cream"
){
$body_class = "theme-white ";
}else{
$body_class = "";
}
if(
$raw &&
$body_class != ""
){
return ' class="' . rtrim($body_class) . '"';
}
return $body_class;
}
public function loadheader(array $get, array $filters, string $page){
echo
@@ -62,8 +73,7 @@ class frontend{
"index" => "no",
"search" => htmlspecialchars($get["s"]),
"tabs" => $this->generatehtmltabs($page, $get["s"]),
"filters" => $this->generatehtmlfilters($filters, $get),
"body_class" => $this->getthemeclass()
"filters" => $this->generatehtmlfilters($filters, $get)
]);
if(
@@ -74,18 +84,17 @@ class frontend{
){
// bot detected !!
echo
$this->drawerror(
"Tshh, blocked!",
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
);
$this->drawerror(
"Tshh, blocked!",
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
);
die();
}
}
public function drawerror($title, $error){
return
echo
$this->load("search.html", [
"class" => "",
"right-left" => "",
@@ -96,6 +105,23 @@ class frontend{
$error .
'</div>'
]);
die();
}
public function drawscrapererror($error, $get, $target){
$this->drawerror(
"Shit",
'This scraper returned an error:' .
'<div class="code">' . htmlspecialchars($error) . '</div>' .
'Things you can try:' .
'<ul>' .
'<li>Use a different scraper</li>' .
'<li>Remove keywords that could cause errors</li>' .
'<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
'</ul><br>' .
'If the error persists, please <a href="/about">contact the administrator</a>.'
);
}
public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){
@@ -819,30 +845,7 @@ class frontend{
public function getscraperfilters($page){
$get_scraper = null;
switch($page){
case "web":
$get_scraper = isset($_COOKIE["scraper_web"]) ? $_COOKIE["scraper_web"] : null;
break;
case "images":
$get_scraper = isset($_COOKIE["scraper_images"]) ? $_COOKIE["scraper_images"] : null;
break;
case "videos":
$get_scraper = isset($_COOKIE["scraper_videos"]) ? $_COOKIE["scraper_videos"] : null;
break;
case "news":
$get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
break;
case "music":
$get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
break;
}
$get_scraper = isset($_COOKIE["scraper_$page"]) ? $_COOKIE["scraper_$page"] : null;
if(
isset($_GET["scraper"]) &&
@@ -1148,32 +1151,8 @@ class frontend{
break;
case "_SEARCH":
// get search string & bang
$sanitized[$parameter] = trim($sanitized[$parameter]);
$sanitized["bang"] = "";
if(
strlen($sanitized[$parameter]) !== 0 &&
$sanitized[$parameter][0] == "!"
){
$sanitized[$parameter] = explode(" ", $sanitized[$parameter], 2);
$sanitized["bang"] = trim($sanitized[$parameter][0]);
if(count($sanitized[$parameter]) === 2){
$sanitized[$parameter] = trim($sanitized[$parameter][1]);
}else{
$sanitized[$parameter] = "";
}
$sanitized["bang"] = ltrim($sanitized["bang"], "!");
}
$sanitized[$parameter] = ltrim($sanitized[$parameter], "! \n\r\t\v\x00");
// get search string
$sanitized["s"] = trim($sanitized[$parameter]);
}
}
}

View File

@@ -442,5 +442,3 @@ class fuckhtml{
return json_decode($json_out, true);
}
}
?>

View File

@@ -1,106 +0,0 @@
<?php
class nextpage{
public function __construct($scraper){
$this->scraper = $scraper;
}
public function store($payload, $page){
$page = $page[0];
$password = random_bytes(256); // 2048 bit
$salt = random_bytes(16);
$key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
$iv =
random_bytes(
openssl_cipher_iv_length("aes-256-gcm")
);
$tag = "";
$out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
$key = apcu_inc("key", 1);
apcu_store(
$page . "." .
$this->scraper .
(string)$key,
gzdeflate($salt.$iv.$out.$tag),
900 // cache information for 15 minutes blaze it
);
return
$this->scraper . $key . "." .
rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
}
public function get($npt, $page){
$page = $page[0];
$explode = explode(".", $npt, 2);
if(count($explode) !== 2){
throw new Exception("Malformed nextPageToken!");
}
$apcu = $page . "." . $explode[0];
$key = $explode[1];
$payload = apcu_fetch($apcu);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
$key =
base64_decode(
str_pad(
strtr($key, '-_', '+/'),
strlen($key) % 4,
'=',
STR_PAD_RIGHT
)
);
$payload = gzinflate($payload);
$key =
hash_pbkdf2(
"sha512",
$key,
substr($payload, 0, 16), // salt
20000,
32,
true
);
$ivlen = openssl_cipher_iv_length("aes-256-gcm");
$payload =
openssl_decrypt(
substr(
$payload,
16 + $ivlen,
-16
),
"aes-256-gcm",
$key,
OPENSSL_RAW_DATA,
substr($payload, 16, $ivlen),
substr($payload, -16)
);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
// remove the key after using
apcu_delete($apcu);
return $payload;
}
}