still missing things on google scraper

This commit is contained in:
2023-07-22 14:41:14 -04:00
commit bca265aea6
90 changed files with 17559 additions and 0 deletions

144
lib/bingcache-todo-fix.php Normal file
View File

@@ -0,0 +1,144 @@
<?php
// https://www.bing.com/search?q=url%3Ahttps%3A%2F%2Flolcat.ca
// https://cc.bingj.com/cache.aspx?q=url%3ahttps%3a%2f%2flolcat.ca&d=4769685974291356&mkt=en-CA&setlang=en-US&w=tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-
// <div class="b_attribution" u="0N|5119|4769685974291356|tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-" tabindex="0">
new bingcache();
class bingcache{
public function __construct(){
if(
!isset($_GET["s"]) ||
$this->validate_url($_GET["s"]) === false
){
var_dump($this->validate_url($_GET["s"]));
$this->do404("Please provide a valid URL.");
}
$url = $_GET["s"];
$curlproc = curl_init();
curl_setopt(
$curlproc,
CURLOPT_URL,
"https://www.bing.com/search?q=url%3A" .
urlencode($url)
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt(
$curlproc,
CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
$this->do404("Failed to connect to bing servers. Please try again later.");
}
curl_close($curlproc);
preg_match(
'/<div class="b_attribution" u="(.*)" tabindex="0">/',
$data,
$keys
);
print_r($keys);
if(count($keys) === 0){
$this->do404("Bing has not archived this URL.");
}
$keys = explode("|", $keys[1]);
$count = count($keys);
//header("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
echo("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
}
public function do404($text){
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"error.html",
[
"title" => "Shit",
"text" => $text
]
);
die();
}
public function validate_url($url){
$url_parts = parse_url($url);
// check if required parts are there
if(
!isset($url_parts["scheme"]) ||
!(
$url_parts["scheme"] == "http" ||
$url_parts["scheme"] == "https"
) ||
!isset($url_parts["host"])
){
return false;
}
if(
// if its not an RFC-valid URL
!filter_var($url, FILTER_VALIDATE_URL)
){
return false;
}
$ip =
str_replace(
["[", "]"], // handle ipv6
"",
$url_parts["host"]
);
// if its not an IP
if(!filter_var($ip, FILTER_VALIDATE_IP)){
// resolve domain's IP
$ip = gethostbyname($url_parts["host"] . ".");
}
// check if its localhost
return filter_var(
$ip,
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
);
}
}

BIN
lib/classic.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

652
lib/curlproxy.php Normal file
View File

@@ -0,0 +1,652 @@
<?php
class proxy{
public const req_web = 0;
public const req_image = 1;
public function __construct($cache = true){
$this->cache = $cache;
}
public function do404(){
http_response_code(404);
header("Content-Type: image/png");
$handle = fopen("lib/img404.png", "r");
echo fread($handle, filesize("lib/img404.png"));
fclose($handle);
die();
return;
}
public function getabsoluteurl($path, $relative){
if($this->validateurl($path)){
return $path;
}
if(substr($path, 0, 2) == "//"){
return "https:" . $path;
}
$url = null;
$relative = parse_url($relative);
$url = $relative["scheme"] . "://";
if(
isset($relative["user"]) &&
isset($relative["pass"])
){
$url .= $relative["user"] . ":" . $relative["pass"] . "@";
}
$url .= $relative["host"];
if(isset($relative["path"])){
$relative["path"] = explode(
"/",
$relative["path"]
);
unset($relative["path"][count($relative["path"]) - 1]);
$relative["path"] = implode("/", $relative["path"]);
$url .= $relative["path"];
}
if(
strlen($path) !== 0 &&
$path[0] !== "/"
){
$url .= "/";
}
$url .= $path;
return $url;
}
public function validateurl($url){
$url_parts = parse_url($url);
// check if required parts are there
if(
!isset($url_parts["scheme"]) ||
!(
$url_parts["scheme"] == "http" ||
$url_parts["scheme"] == "https"
) ||
!isset($url_parts["host"])
){
return false;
}
$ip =
str_replace(
["[", "]"], // handle ipv6
"",
$url_parts["host"]
);
// if its not an IP
if(!filter_var($ip, FILTER_VALIDATE_IP)){
// resolve domain's IP
$ip = gethostbyname($url_parts["host"] . ".");
}
// check if its localhost
if(
filter_var(
$ip,
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
) === false
){
return false;
}
return true;
}
public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
if($redirectcount === 5){
throw new Exception("Too many redirects");
}
// sanitize URL
try{
$this->validateurl($url);
}catch(Exception $error){
throw new Exception($error->getMessage());
}
$this->clientcache();
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curl, CURLOPT_HEADER, 1);
switch($reqtype){
case self::req_web:
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"
]
);
break;
case self::req_image:
if($referer === null){
$referer = explode("/", $url, 4);
array_pop($referer);
$referer = implode("/", $referer);
}
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
}
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
// limit size of payloads
curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
curl_setopt($curl, CURLOPT_NOPROGRESS, false);
curl_setopt(
$curl,
CURLOPT_PROGRESSFUNCTION,
function($downloadsize, $downloaded, $uploadsize, $uploaded
){
// if $downloaded exceeds 100MB, fuck off
return ($downloaded > 100000000) ? 1 : 0;
});
$body = curl_exec($curl);
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
curl_close($curl);
$headers = [];
$http = null;
while(true){
$header = explode("\n", $body, 2);
$body = $header[1];
if($http === null){
// http/1.1 200 ok
$header = explode("/", $header[0], 2);
$header = explode(" ", $header[1], 3);
$http = [
"version" => (float)$header[0],
"code" => (int)$header[1]
];
continue;
}
if(trim($header[0]) == ""){
// reached end of headers
break;
}
$header = explode(":", $header[0], 2);
// malformed headers
if(count($header) !== 2){ continue; }
$headers[strtolower(trim($header[0]))] = trim($header[1]);
}
// check http code
if(
$http["code"] >= 300 &&
$http["code"] <= 309
){
// redirect
if(!isset($headers["location"])){
throw new Exception("Broken redirect");
}
$redirectcount++;
return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
}else{
if(
$acceptallcodes === false &&
$http["code"] > 300
){
throw new Exception("Remote server returned an error code! ({$http["code"]})");
}
}
// check if data is okay
switch($reqtype){
case self::req_image:
$format = false;
if(isset($headers["content-type"])){
if($headers["content-type"] == "text/html"){
throw new Exception("Server returned an html document instead of image");
}
$tmp = explode(";", $headers["content-type"]);
for($i=0; $i<count($tmp); $i++){
if(
preg_match(
'/^image\/([^ ]+)/i',
$tmp[$i],
$match
)
){
$format = strtolower($match[1]);
if(substr($format, 0, 2) == "x-"){
$format = substr($format, 2);
}
break;
}
}
}
return [
"http" => $http,
"format" => $format,
"headers" => $headers,
"body" => $body
];
break;
default:
return [
"http" => $http,
"headers" => $headers,
"body" => $body
];
break;
}
return;
}
public function stream_linear_image($url, $referer = null){
$this->stream($url, $referer, "image");
}
public function stream_linear_audio($url, $referer = null){
$this->stream($url, $referer, "audio");
}
private function stream($url, $referer, $format){
$this->url = $url;
$this->format = $format;
// sanitize URL
try{
$this->validateurl($url);
}catch(Exception $error){
throw new Exception($error->getMessage());
}
$this->clientcache();
$curl = curl_init();
// set headers
if($referer === null){
$referer = explode("/", $url, 4);
array_pop($referer);
$referer = implode("/", $referer);
}
switch($format){
case "image":
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
case "audio":
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
}
// follow redirects
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
// set url
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
// timeout + disable ssl
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
curl_setopt(
$curl,
CURLOPT_WRITEFUNCTION,
function($c, $data){
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
throw new Exception("Serber returned a non-200 code");
}
echo $data;
return strlen($data);
}
);
$this->empty_header = false;
$this->cont = false;
$this->headers_tmp = [];
$this->headers = [];
curl_setopt(
$curl,
CURLOPT_HEADERFUNCTION,
function($c, $header){
$head = trim($header);
$len = strlen($head);
if($len === 0){
$this->empty_header = true;
$this->headers_tmp = [];
}else{
$this->empty_header = false;
$this->headers_tmp[] = $head;
}
foreach($this->headers_tmp as $h){
// parse headers
$h = explode(":", $h, 2);
if(count($h) !== 2){
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
// not HTTP 200, probably a redirect
$this->cont = false;
}else{
$this->cont = true;
}
// is HTTP 200, just ignore that line
continue;
}
$this->headers[strtolower(trim($h[0]))] = trim($h[1]);
}
if(
$this->cont &&
$this->empty_header
){
// get content type
if(isset($this->headers["content-type"])){
$filetype = explode("/", $this->headers["content-type"]);
if(strtolower($filetype[0]) != $this->format){
throw new Exception("Resource is not an {$this->format} (Found {$filetype[0]} instead)");
}
}else{
throw new Exception("Resource is not an {$this->format} (no Content-Type)");
}
header("Content-Type: {$this->format}/{$filetype[1]}");
// give payload size
if(isset($this->headers["content-length"])){
header("Content-Length: {$this->headers["content-length"]}");
}
// give filename
$this->getfilenameheader($this->headers, $this->url, $filetype[1]);
}
return strlen($header);
}
);
curl_exec($curl);
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
curl_close($curl);
}
public function getfilenameheader($headers, $url, $filetype = "jpg"){
// get filename from content-disposition header
if(isset($headers["content-disposition"])){
preg_match(
'/filename=([^;]+)/',
$headers["content-disposition"],
$filename
);
if(isset($filename[1])){
header("Content-Disposition: filename=" . $filename[1] . "." . $filetype);
return;
}
}
// get filename from URL
$filename = parse_url($url, PHP_URL_PATH);
if($filename === null){
// everything failed! rename file to domain name
header("Content-Disposition: filename=" . parse_url($url, PHP_URL_HOST) . "." . $filetype);
return;
}
// remove extension from filename
$filename =
explode(
".",
basename($filename)
);
if(count($filename) > 1){
array_pop($filename);
}
$filename = implode(".", $filename);
header("Content-Disposition: inline; filename=" . $filename . "." . $filetype);
return;
}
public function getimageformat($payload, &$imagick){
$finfo = new finfo(FILEINFO_MIME_TYPE);
$format = $finfo->buffer($payload["body"]);
if($format === false){
if($payload["format"] === false){
header("X-Error: Could not parse format");
$this->favicon404();
}
$format = $payload["format"];
}else{
$format_tmp = explode("/", $format, 2);
if($format_tmp[0] == "image"){
$format_tmp = strtolower($format_tmp[1]);
if(substr($format_tmp, 0, 2) == "x-"){
$format_tmp = substr($format_tmp, 2);
}
$format = $format_tmp;
}
}
switch($format){
case "tiff": $format = "gif"; break;
case "vnd.microsoft.icon": $format = "ico"; break;
case "icon": $format = "ico"; break;
case "svg+xml": $format = "svg"; break;
}
$imagick = new Imagick();
if(
!in_array(
$format,
array_map("strtolower", $imagick->queryFormats())
)
){
// format could not be found, but imagemagick can
// sometimes detect it? shit's fucked
$format = false;
}
return $format;
}
public function clientcache(){
if($this->cache === false){
return;
}
header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
$headers = getallheaders();
if(
isset($headers["If-Modified-Since"]) ||
isset($headers["If-Unmodified-Since"])
){
http_response_code(304); // 304: Not Modified
die();
}
}
}

BIN
lib/favicon404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 807 B

1282
lib/frontend.php Normal file

File diff suppressed because it is too large Load Diff

361
lib/fuckhtml.php Normal file
View File

@@ -0,0 +1,361 @@
<?php
class fuckhtml{
public function __construct($html = null, $isfile = false){
if($html !== null){
$this->load($html, $isfile);
}
}
public function load($html, $isfile = false){
if(is_array($html)){
if(!isset($html["innerHTML"])){
throw new Exception("(load) Supplied array doesn't contain a innerHTML index");
}
$html = $html["innerHTML"];
}
if($isfile){
$handle = fopen($html, "r");
$fetch = fread($handle, filesize($html));
fclose($handle);
$this->html = $fetch;
}else{
$this->html = $html;
}
$this->strlen = strlen($this->html);
}
public function getElementsByTagName(string $tagname){
$out = [];
/*
Scrape start of the tag. Example
<div class="mydiv"> ...
*/
if($tagname == "*"){
$tagname = '[^\/<>\s]+';
}else{
$tagname = preg_quote(strtolower($tagname));
}
preg_match_all(
'/<\s*(' . $tagname . ')(\s(?:[^>\'"]*|"[^"]*"|\'[^\']*\')+)?\s*>/i',
/* '/<\s*(' . $tagname . ')(\s[\S\s]*?)?>/i', */
$this->html,
$starting_tags,
PREG_OFFSET_CAPTURE
);
for($i=0; $i<count($starting_tags[0]); $i++){
/*
Parse attributes
*/
$attributes = [];
preg_match_all(
'/([^\/\s\\=]+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|[^\s]*))?/',
$starting_tags[2][$i][0],
$regex_attributes
);
for($k=0; $k<count($regex_attributes[0]); $k++){
if(trim($regex_attributes[2][$k]) == ""){
$attributes[$regex_attributes[1][$k]] =
"true";
continue;
}
$attributes[$regex_attributes[1][$k]] =
trim($regex_attributes[2][$k], "'\" \n\r\t\v\x00");
}
$out[] = [
"tagName" => strtolower($starting_tags[1][$i][0]),
"startPos" => $starting_tags[0][$i][1],
"endPos" => 0,
"startTag" => $starting_tags[0][$i][0],
"attributes" => $attributes,
"innerHTML" => null
];
}
/*
Get innerHTML
*/
// get closing tag positions
preg_match_all(
'/<\s*\/\s*(' . $tagname . ')\s*>/i',
$this->html,
$regex_closing_tags,
PREG_OFFSET_CAPTURE
);
// merge opening and closing tags together
for($i=0; $i<count($regex_closing_tags[1]); $i++){
$out[] = [
"tagName" => strtolower($regex_closing_tags[1][$i][0]),
"endTag" => $regex_closing_tags[0][$i][0],
"startPos" => $regex_closing_tags[0][$i][1]
];
}
usort(
$out,
function($a, $b){
return $a["startPos"] > $b["startPos"];
}
);
// computer the indent level for each element
$level = [];
$count = count($out);
for($i=0; $i<$count; $i++){
if(!isset($level[$out[$i]["tagName"]])){
$level[$out[$i]["tagName"]] = 0;
}
if(isset($out[$i]["startTag"])){
// encountered starting tag
$level[$out[$i]["tagName"]]++;
$out[$i]["level"] = $level[$out[$i]["tagName"]];
}else{
// encountered closing tag
$out[$i]["level"] = $level[$out[$i]["tagName"]];
$level[$out[$i]["tagName"]]--;
}
}
// if the indent level is the same for a div,
// we encountered _THE_ closing tag
for($i=0; $i<$count; $i++){
if(!isset($out[$i]["startTag"])){
continue;
}
for($k=$i; $k<$count; $k++){
if(
isset($out[$k]["endTag"]) &&
$out[$i]["tagName"] == $out[$k]["tagName"] &&
$out[$i]["level"]
=== $out[$k]["level"]
){
$startlen = strlen($out[$i]["startTag"]);
$endlen = strlen($out[$k]["endTag"]);
$out[$i]["endPos"] = $out[$k]["startPos"] + $endlen;
$out[$i]["innerHTML"] =
substr(
$this->html,
$out[$i]["startPos"] + $startlen,
$out[$k]["startPos"] - ($out[$i]["startPos"] + $startlen)
);
$out[$i]["outerHTML"] =
substr(
$this->html,
$out[$i]["startPos"],
$out[$k]["startPos"] - $out[$i]["startPos"] + $endlen
);
break;
}
}
}
// filter out ending divs
for($i=0; $i<$count; $i++){
if(isset($out[$i]["endTag"])){
unset($out[$i]);
}
unset($out[$i]["startTag"]);
}
return array_values($out);
}
public function getElementsByAttributeName(string $name, $collection = null){
if($collection === null){
$collection = $this->getElementsByTagName("*");
}elseif(is_string($collection)){
$collection = $this->getElementsByTagName($collection);
}
$return = [];
foreach($collection as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
if($attrib_name == $name){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
$elems = $this->getElementsByAttributeName($name, $collection);
$value = explode(" ", $value);
$return = [];
foreach($elems as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
$attrib_value = explode(" ", $attrib_value);
$ac = count($attrib_value);
$nc = count($value);
$cr = 0;
for($i=0; $i<$nc; $i++){
for($k=0; $k<$ac; $k++){
if($value[$i] == $attrib_value[$k]){
$cr++;
}
}
}
if($cr === $nc){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementsByAttributeValue(string $name, string $value, $collection = null){
$elems = $this->getElementsByAttributeName($name, $collection);
$return = [];
foreach($elems as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
if($attrib_value == $value){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementById(string $idname, $collection = null){
$id = $this->getElementsByAttributeValue("id", $idname, $collection);
if(count($id) !== 0){
return $id[0];
}
return false;
}
public function getElementsByClassName(string $classname, $collection = null){
return $this->getElementsByFuzzyAttributeValue("class", $classname, $collection);
}
public function getTextContent($html, $whitespace = false, $trim = true){
if(is_array($html)){
if(!isset($html["innerHTML"])){
throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index");
}
$html = $html["innerHTML"];
}
$html =
preg_split('/\n|<\/?br>/i', $html);
$out = "";
for($i=0; $i<count($html); $i++){
$tmp =
html_entity_decode(
strip_tags(
$html[$i]
),
ENT_QUOTES | ENT_XML1, "UTF-8"
);
if($trim){
$tmp = trim($tmp);
}
$out .= $tmp;
if($whitespace === true){
$out .= "\n";
}else{
$out .= " ";
}
}
if($trim){
return trim($out);
}
return $out;
}
}
?>

BIN
lib/img404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

106
lib/nextpage.php Normal file
View File

@@ -0,0 +1,106 @@
<?php
class nextpage{
public function __construct($scraper){
$this->scraper = $scraper;
}
public function store($payload, $page){
$page = $page[0];
$password = random_bytes(256); // 2048 bit
$salt = random_bytes(16);
$key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
$iv =
random_bytes(
openssl_cipher_iv_length("aes-256-gcm")
);
$tag = "";
$out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
$key = apcu_inc("key", 1);
apcu_store(
$page . "." .
$this->scraper .
(string)($key),
gzdeflate($salt.$iv.$out.$tag),
420 // cache information for 7 minutes blaze it
);
return
$this->scraper . $key . "." .
rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
}
public function get($npt, $page){
$page = $page[0];
$explode = explode(".", $npt, 2);
if(count($explode) !== 2){
throw new Exception("Malformed nextPageToken!");
}
$apcu = $page . "." . $explode[0];
$key = $explode[1];
$payload = apcu_fetch($apcu);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
$key =
base64_decode(
str_pad(
strtr($key, '-_', '+/'),
strlen($key) % 4,
'=',
STR_PAD_RIGHT
)
);
$payload = gzinflate($payload);
$key =
hash_pbkdf2(
"sha512",
$key,
substr($payload, 0, 16), // salt
20000,
32,
true
);
$ivlen = openssl_cipher_iv_length("aes-256-gcm");
$payload =
openssl_decrypt(
substr(
$payload,
16 + $ivlen,
-16
),
"aes-256-gcm",
$key,
OPENSSL_RAW_DATA,
substr($payload, 16, $ivlen),
substr($payload, -16)
);
if($payload === false){
throw new Exception("The nextPageToken is invalid or has expired!");
}
// remove the key after using
apcu_delete($apcu);
return $payload;
}
}

132
lib/type-todo.php Normal file
View File

@@ -0,0 +1,132 @@
public function type($get){
$search = $get["s"];
$bang = $get["bang"];
if(empty($search)){
if(!empty($bang)){
// !youtube
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
pg_prepare($conn, "bang_get", "SELECT bang,name FROM bangs WHERE bang LIKE $1 ORDER BY bang ASC LIMIT 8");
$q = pg_execute($conn, "bang_get", ["$bang%"]);
$results = [];
while($row = pg_fetch_array($q, null, PGSQL_ASSOC)){
$results[] = [
"s" => "!" . $row["bang"],
"n" => $row["name"]
];
}
return $results;
}else{
// everything is empty
// lets just return a bang list
return [
[
"s" => "!w",
"n" => "Wikipedia",
"u" => "https://en.wikipedia.org/wiki/Special:Search?search={%q%}"
],
[
"s" => "!4ch",
"n" => "4chan Board",
"u" => "https://find.4chan.org/?q={%q%}"
],
[
"s" => "!a",
"n" => "Amazon",
"u" => "https://www.amazon.com/s?k={%q%}"
],
[
"s" => "!e",
"n" => "eBay",
"u" => "https://www.ebay.com/sch/items/?_nkw={%q%}"
],
[
"s" => "!so",
"n" => "Stack Overflow",
"u" => "http://stackoverflow.com/search?q={%q%}"
],
[
"s" => "!gh",
"n" => "GitHub",
"u" => "https://github.com/search?utf8=%E2%9C%93&q={%q%}"
],
[
"s" => "!tw",
"n" => "Twitter",
"u" => "https://twitter.com/search?q={%q%}"
],
[
"s" => "!r",
"n" => "Reddit",
"u" => "https://www.reddit.com/search?q={%q%}"
],
];
}
}
// now we know search isnt empty
if(!empty($bang)){
// check if the bang exists
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
pg_prepare($conn, "bang_get_single", "SELECT bang,name FROM bangs WHERE bang = $1 LIMIT 1");
$q = pg_execute($conn, "bang_get_single", [$bang]);
$row = pg_fetch_array($q, null, PGSQL_ASSOC);
if(isset($row["bang"])){
$bang = "!$bang ";
}else{
$bang = "";
}
}
try{
$res = $this->get(
"https://duckduckgo.com/ac/",
[
"q" => strtolower($search)
],
ddg::req_xhr
);
$res = json_decode($res, true);
}catch(Exception $e){
throw new Exception("Failed to get /ac/");
}
$arr = [];
for($i=0; $i<count($res); $i++){
if($i === 8){break;}
if(empty($bang)){
$arr[] = [
"s" => $res[$i]["phrase"]
];
}else{
$arr[] = [
"s" => $bang . $res[$i]["phrase"],
"n" => $row["name"]
];
}
}
return $arr;
}