4get/lib/backend.php

178 lines
3.7 KiB
PHP
Raw Permalink Normal View History

2023-11-07 13:04:56 +00:00
<?php
class backend{
public function __construct($scraper){
$this->scraper = $scraper;
}
/*
Proxy stuff
*/
public function get_ip(){
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
if($pool === false){
// we don't want a proxy, fuck off!
return 'raw_ip::::';
}
// indent
$proxy_index_raw = apcu_inc("p." . $this->scraper);
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
$proxylist = explode("\n", $proxylist);
// ignore empty or commented lines
$proxylist = array_filter($proxylist, function($entry){
$entry = ltrim($entry);
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
});
$proxylist = array_values($proxylist);
return $proxylist[$proxy_index_raw % count($proxylist)];
}
// this function is also called directly on nextpage
public function assign_proxy(&$curlproc, string $ip){
2023-11-07 13:04:56 +00:00
// parse proxy line
[
$type,
$address,
$port,
$username,
$password
] = explode(":", $ip, 5);
switch($type){
case "raw_ip":
return;
break;
case "http":
case "https":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
break;
case "socks4":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks4a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5_hostname":
2024-06-24 21:15:54 +00:00
case "socks5a":
2023-11-07 13:04:56 +00:00
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
}
if($username != ""){
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
}
}
/*
Next page stuff
*/
public function store(string $payload, string $page, string $proxy){
2023-11-07 13:04:56 +00:00
2024-04-21 23:31:56 +00:00
$key = sodium_crypto_secretbox_keygen();
$nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
2023-11-07 13:04:56 +00:00
2023-11-08 14:05:39 +00:00
$requestid = apcu_inc("requestid");
2023-11-07 13:04:56 +00:00
apcu_store(
2024-04-21 23:31:56 +00:00
$page[0] . "." . // first letter of page name
$this->scraper . // scraper name
2023-11-08 14:05:39 +00:00
$requestid,
2024-04-21 23:31:56 +00:00
[
$nonce,
$proxy,
// compress and encrypt
sodium_crypto_secretbox(
gzdeflate($payload),
$nonce,
$key
)
],
900 // cache information for 15 minutes
2023-11-07 13:04:56 +00:00
);
return
2023-11-08 14:05:39 +00:00
$this->scraper . $requestid . "." .
2024-04-21 23:31:56 +00:00
rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
2023-11-07 13:04:56 +00:00
}
public function get(string $npt, string $page){
2023-11-07 13:04:56 +00:00
$page = $page[0];
$explode = explode(".", $npt, 2);
if(count($explode) !== 2){
throw new Exception("Malformed nextPageToken!");
}
$apcu = $page . "." . $explode[0];
$key = $explode[1];
$payload = apcu_fetch($apcu);
if($payload === false){
2024-04-21 23:31:56 +00:00
throw new Exception("The next page token is invalid or has expired!");
2023-11-07 13:04:56 +00:00
}
$key =
base64_decode(
str_pad(
strtr($key, '-_', '+/'),
strlen($key) % 4,
'=',
STR_PAD_RIGHT
)
);
2024-04-21 23:31:56 +00:00
// decrypt and decompress data
$payload[2] =
gzinflate(
sodium_crypto_secretbox_open(
$payload[2], // data
$payload[0], // nonce
$key
)
2023-11-07 13:04:56 +00:00
);
2024-04-21 23:31:56 +00:00
if($payload[2] === false){
2023-11-07 13:04:56 +00:00
2024-04-21 23:31:56 +00:00
throw new Exception("The next page token is invalid or has expired!");
2023-11-07 13:04:56 +00:00
}
2024-04-21 23:31:56 +00:00
// remove the key after using successfully
2023-11-07 13:04:56 +00:00
apcu_delete($apcu);
2024-04-21 23:31:56 +00:00
return [
$payload[2], // data
$payload[1] // proxy
];
2023-11-07 13:04:56 +00:00
}
}