$value){ $html = str_replace( "{%{$key}%}", $value, $html ); } return trim($html); } public function getthemeclass($raw = true){ if( isset($_COOKIE["theme"]) && $_COOKIE["theme"] == "cream" ){ $body_class = "theme-white "; }else{ $body_class = ""; } if( $raw && $body_class != "" ){ return ' class="' . rtrim($body_class) . '"'; } return $body_class; } public function loadheader(array $get, array $filters, string $page){ echo $this->load("header.html", [ "title" => trim($get["s"] . " ({$page})"), "description" => ucfirst($page) . ' search results for "' . htmlspecialchars($get["s"]) . '"', "index" => "no", "search" => htmlspecialchars($get["s"]), "tabs" => $this->generatehtmltabs($page, $get["s"]), "filters" => $this->generatehtmlfilters($filters, $get), "body_class" => $this->getthemeclass() ]); if( preg_match( '/bot|wget|curl|python-requests|scrapy|feedfetcher|go-http-client|ruby|universalfeedparser|yahoo\! slurp|spider|rss/i', $_SERVER["HTTP_USER_AGENT"] ) ){ // bot detected !! echo $this->drawerror( "Tshh, blocked!", 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running your own 4get instance or using the API.', ); die(); } } public function drawerror($title, $error){ return $this->load("search.html", [ "class" => "", "right-left" => "", "right-right" => "", "left" => '
' . '

' . htmlspecialchars($title) . '

' . $error . '
' ]); } public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true){ $payload = '
'; // add favicon, link and archive links $payload .= $this->drawlink($site["url"]); /* Draw title + description + filetype */ $payload .= ''; if($duration !== null){ $payload .= '
' . htmlspecialchars($duration) . '
'; } $payload .= '
'; } $payload .= '
'; if( isset($site["type"]) && $site["type"] != "web" ){ $payload .= '
' . strtoupper($site["type"]) . '
'; } $payload .= htmlspecialchars($site["title"]) . '
'; if($greentext !== null){ $payload .= '
' . htmlspecialchars($greentext) . '
'; } if($site["description"] !== null){ $payload .= '
' . $this->highlighttext($keywords, $site["description"]) . '
'; } $payload .= '
'; /* Sublinks */ if( isset($site["sublink"]) && !empty($site["sublink"]) ){ usort($site["sublink"], function($a, $b){ return strlen($a["description"]) > strlen($b["description"]); }); $payload .= ''; } if( isset($site["table"]) && !empty($site["table"]) ){ $payload .= ''; foreach($site["table"] as $title => $value){ $payload .= '' . '' . '' . ''; } $payload .= '
' . htmlspecialchars($title) . '' . htmlspecialchars($value) . '
'; } return $payload . ''; } public function highlighttext($keywords, $text){ $text = htmlspecialchars($text); $keywords = explode(" ", $keywords); $regex = []; foreach($keywords as $word){ $regex[] = "\b" . preg_quote($word, "/") . "\b"; } $regex = "/" . implode("|", $regex) . "/i"; return preg_replace( $regex, '${0}', $text ); } function highlightcode($text){ // https://www.php.net/highlight_string ini_set("highlight.comment", "c-comment"); ini_set("highlight.default", "c-default"); ini_set("highlight.html", "c-default"); ini_set("highlight.keyword", "c-keyword"); ini_set("highlight.string", "c-string"); $text = trim( preg_replace( '/<\/span>$/', "", // remove stray ending span because of the ', ' ' ], [ "\n", // replace
with newlines " " // replace html entity to space ], str_replace( [ // leading \n<?php ", "", "" ], "", highlight_string("', '', $text); } return $text; } public function drawlink($link){ /* Add favicon */ $host = parse_url($link); $esc = explode( ".", $host["host"], 2 ); if( count($esc) === 2 && $esc[0] == "www" ){ $esc = $esc[1]; }else{ $esc = $esc[0]; } $esc = substr($esc, 0, 2); $urlencode = urlencode($link); $payload = '
' . '' . '
'; /* Add archive links */ if( $host["host"] == "boards.4chan.org" || $host["host"] == "boards.4channel.org" ){ $archives = []; $path = explode("/", $host["path"]); $count = count($path); // /pol/thread/417568063/post-shitty-memes-if-you-want-to if($count !== 0){ $isboard = true; switch($path[1]){ case "con": break; case "q": $archives[] = "desuarchive.org"; break; case "qa": $archives[] = "desuarchive.org"; break; case "qb": $archives[] = "arch.b4k.co"; break; case "trash": $archives[] = "desuarchive.org"; break; case "a": $archives[] = "desuarchive.org"; break; case "c": $archives[] = "desuarchive.org"; break; case "w": break; case "m": $archives[] = "desuarchive.org"; break; case "cgl": $archives[] = "desuarchive.org"; $archives[] = "warosu.org"; break; case "cm": $archives[] = "boards.fireden.net"; break; case "f": $archives[] = "archive.4plebs.org"; break; case "n": break; case "jp": $archives[] = "warosu.org"; break; case "vt": $archives[] = "warosu.org"; break; case "v": $archives[] = "boards.fireden.net"; $archives[] = "arch.b4k.co"; break; case "vg": $archives[] = "boards.fireden.net"; $archives[] = "arch.b4k.co"; break; case "vm": $archives[] = "arch.b4k.co"; break; case "vmg": $archives[] = "arch.b4k.co"; break; case "vp": $archives[] = "arch.b4k.co"; break; case "vr": $archives[] = "desuarchive.org"; $archives[] = "warosu.org"; break; case "vrpg": $archives[] = "arch.b4k.co"; break; case "vst": $archives[] = "arch.b4k.co"; break; case "co": $archives[] = "desuarchive.org"; break; case "g": $archives[] = "desuarchive.org"; $archives[] = "arch.b4k.co"; break; case "tv": $archives[] = "archive.4plebs.org"; break; case "k": $archives[] = "desuarchive.org"; break; case "o": $archives[] = "archive.4plebs.org"; break; case "an": $archives[] = "desuarchive.org"; break; case "tg": $archives[] = "desuarchive.org"; $archives[] = "archive.4plebs.org"; break; case "sp": $archives[] = "archive.4plebs.org"; break; case "xs": $archives[] = "eientei.xyz"; break; case "pw": break; case "sci": $archives[] = "boards.fireden.net"; $archives[] = "warosu.org"; $archives[] = "eientei.xyz"; break; case "his": $archives[] = "desuarchive.org"; break; case "int": $archives[] = "desuarchive.org"; break; case "out": break; case "toy": break; case "i": $archives[] = "archiveofsins.com"; $archives[] = "eientei.xyz"; break; case "po": break; case "p": break; case "ck": $archives[] = "warosu.org"; break; case "ic": $archives[] = "boards.fireden.net"; $archives[] = "warosu.org"; break; case "wg": break; case "lit": $archives[] = "warosu.org"; break; case "mu": $archives[] = "desuarchive.org"; break; case "fa": $archives[] = "warosu.org"; break; case "3": $archives[] = "warosu.org"; $archives[] = "eientei.xyz"; break; case "gd": break; case "diy": $archives[] = "warosu.org"; break; case "wsg": $archives[] = "desuarchive.org"; break; case "qst": break; case "biz": $archives[] = "warosu.org"; break; case "trv": $archives[] = "archive.4plebs.org"; break; case "fit": $archives[] = "desuarchive.org"; break; case "x": $archives[] = "archive.4plebs.org"; break; case "adv": $archives[] = "archive.4plebs.org"; break; case "lgbt": $archives[] = "archiveofsins.com"; break; case "mlp": $archives[] = "desuarchive.org"; $archives[] = "arch.b4k.co"; break; case "news": break; case "wsr": break; case "vip": break; case "b": $archives[] = "thebarchive.com"; break; case "r9k": $archives[] = "desuarchive.org"; break; case "pol": $archives[] = "archive.4plebs.org"; break; case "bant": $archives[] = "thebarchive.com"; break; case "soc": $archives[] = "archiveofsins.com"; break; case "s4s": $archives[] = "archive.4plebs.org"; break; case "s": $archives[] = "archiveofsins.com"; break; case "hc": $archives[] = "archiveofsins.com"; break; case "hm": $archives[] = "archiveofsins.com"; break; case "h": $archives[] = "archiveofsins.com"; break; case "e": break; case "u": $archives[] = "archiveofsins.com"; break; case "d": $archives[] = "desuarchive.org"; break; case "y": $archives[] = "boards.fireden.net"; break; case "t": $archives[] = "archiveofsins.com"; break; case "hr": $archives[] = "archive.4plebs.org"; break; case "gif": break; case "aco": $archives[] = "desuarchive.org"; break; case "r": $archives[] = "archiveofsins.com"; break; default: $isboard = false; break; } if($isboard === true){ $archives[] = "archived.moe"; } $trail = ""; if( isset($path[2]) && isset($path[3]) && $path[2] == "thread" ){ $trail .= "/" . $path[1] . "/thread/" . $path[3]; }elseif($isboard){ $trail = "/" . $path[1] . "/"; } for($i=0; $i' . '' . $archives[$i][0] . $archives[$i][1] . '' . $archives[$i] . ''; } } } $payload .= 'goGoogle cache' . 'arArchive.org' . 'arArchive.is' . 'biBing cache' . 'meMegalodon' . '
'; /* Draw link */ $parts = explode("/", $link); $clickurl = ""; // remove trailing / $c = count($parts) - 1; if($parts[$c] == ""){ $parts[$c - 1] = $parts[$c - 1] . "/"; unset($parts[$c]); } // merge https://site together $parts = [ $parts[0] . $parts[1] . '//' . $parts[2], ...array_slice($parts, 3, count($parts) - 1) ]; $c = count($parts); for($i=0; $i<$c; $i++){ if($i !== 0){ $clickurl .= "/"; } $clickurl .= $parts[$i]; if($i === $c - 1){ $parts[$i] = rtrim($parts[$i], "/"); } $payload .= '' . htmlspecialchars(urldecode($parts[$i])) . ''; if($i !== $c - 1){ $payload .= ''; } } return $payload . '
'; } public function getscraperfilters($page){ $get_scraper = null; switch($page){ case "web": $get_scraper = isset($_COOKIE["scraper_web"]) ? $_COOKIE["scraper_web"] : null; break; case "images": $get_scraper = isset($_COOKIE["scraper_images"]) ? $_COOKIE["scraper_images"] : null; break; case "videos": $get_scraper = isset($_COOKIE["scraper_videos"]) ? $_COOKIE["scraper_videos"] : null; break; case "news": $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; break; } if( isset($_GET["scraper"]) && is_string($_GET["scraper"]) ){ $get_scraper = $_GET["scraper"]; }else{ if( isset($_GET["npt"]) && is_string($_GET["npt"]) ){ $get_scraper = explode(".", $_GET["npt"], 2)[0]; $get_scraper = preg_replace( '/[0-9]+$/', "", $get_scraper ); } } // add search field $filters = [ "s" => [ "option" => "_SEARCH" ] ]; // define default scrapers switch($page){ case "web": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", "google" => "Google", "mojeek" => "Mojeek", "marginalia" => "Marginalia", "wiby" => "wiby" ] ]; break; case "images": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "yandex" => "Yandex", "google" => "Google" ] ]; break; case "videos": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "yt" => "YouTube", "ddg" => "DuckDuckGo", "google" => "Google" ] ]; break; case "news": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", "google" => "Google", "mojeek" => "Mojeek" ] ]; break; } // get scraper name from user input, or default out to preferred scraper $scraper_out = null; $first = true; foreach($filters["scraper"]["option"] as $scraper_name => $scraper_pretty){ if($first === true){ $first = $scraper_name; } if($scraper_name == $get_scraper){ $scraper_out = $scraper_name; } } if($scraper_out === null){ $scraper_out = $first; } switch($scraper_out){ case "ddg": include "scraper/ddg.php"; $lib = new ddg(); break; case "brave": include "scraper/brave.php"; $lib = new brave(); break; case "yt"; include "scraper/youtube.php"; $lib = new youtube(); break; case "yandex": include "scraper/yandex.php"; $lib = new yandex(); break; case "google": include "scraper/google.php"; $lib = new google(); break; case "mojeek": include "scraper/mojeek.php"; $lib = new mojeek(); break; case "marginalia": include "scraper/marginalia.php"; $lib = new marginalia(); break; case "wiby": include "scraper/wiby.php"; $lib = new wiby(); break; } // set scraper on $_GET $_GET["scraper"] = $scraper_out; // set nsfw on $_GET if( isset($_COOKIE["nsfw"]) && !isset($_GET["nsfw"]) ){ $_GET["nsfw"] = $_COOKIE["nsfw"]; } return [ $lib, array_merge_recursive( $filters, $lib->getfilters($page) ) ]; } public function parsegetfilters($parameters, $whitelist){ $sanitized = []; // add npt token if( isset($parameters["npt"]) && is_string($parameters["npt"]) ){ $sanitized["npt"] = $parameters["npt"]; }else{ $sanitized["npt"] = false; } // we're iterating over $whitelist, so // you can't polluate $sanitized with useless // parameters foreach($whitelist as $parameter => $value){ if(isset($parameters[$parameter])){ if(!is_string($parameters[$parameter])){ $sanitized[$parameter] = null; continue; } // parameter is already set, use that value $sanitized[$parameter] = $parameters[$parameter]; }else{ // parameter is not set, add it if(is_string($value["option"])){ // special field: set default value manually switch($value["option"]){ case "_DATE": // no date set $sanitized[$parameter] = false; break; case "_SEARCH": // no search set $sanitized[$parameter] = ""; break; } }else{ // set a default value $sanitized[$parameter] = array_keys($value["option"])[0]; } } // sanitize input if(is_array($value["option"])){ if( !in_array( $sanitized[$parameter], $keys = array_keys($value["option"]) ) ){ $sanitized[$parameter] = $keys[0]; } }else{ // sanitize search & string switch($value["option"]){ case "_DATE": if($sanitized[$parameter] !== false){ $sanitized[$parameter] = strtotime($sanitized[$parameter]); if($sanitized[$parameter] <= 0){ $sanitized[$parameter] = false; } } break; case "_SEARCH": // get search string & bang $sanitized[$parameter] = trim($sanitized[$parameter]); $sanitized["bang"] = ""; if( strlen($sanitized[$parameter]) !== 0 && $sanitized[$parameter][0] == "!" ){ $sanitized[$parameter] = explode(" ", $sanitized[$parameter], 2); $sanitized["bang"] = trim($sanitized[$parameter][0]); if(count($sanitized[$parameter]) === 2){ $sanitized[$parameter] = trim($sanitized[$parameter][1]); }else{ $sanitized[$parameter] = ""; } $sanitized["bang"] = ltrim($sanitized["bang"], "!"); } $sanitized[$parameter] = ltrim($sanitized[$parameter], "! \n\r\t\v\x00"); } } } // invert dates if needed if( isset($sanitized["older"]) && isset($sanitized["newer"]) && $sanitized["newer"] !== false && $sanitized["older"] !== false && $sanitized["newer"] > $sanitized["older"] ){ // invert [ $sanitized["older"], $sanitized["newer"] ] = [ $sanitized["newer"], $sanitized["older"] ]; } return $sanitized; } public function s_to_timestamp($seconds){ if(is_string($seconds)){ return "LIVE"; } return ($seconds >= 60) ? ltrim(gmdate("H:i:s", $seconds), ":0") : gmdate("0:s", $seconds); } public function generatehtmltabs($page, $query){ $html = null; foreach(["web", "images", "videos", "news"] as $type){ $html .= '' . ucfirst($type) . ''; } return $html; } public function generatehtmlfilters($filters, $params){ $html = null; foreach($filters as $filter_name => $filter_values){ if(!isset($filter_values["display"])){ continue; } $output = true; $tmp = '
' . '
' . htmlspecialchars($filter_values["display"]) . '
'; if(is_array($filter_values["option"])){ $tmp .= ''; }else{ switch($filter_values["option"]){ case "_DATE": $tmp .= ' $value){ if( $value == null || $value == false || $key == "npt" || $key == "extendedsearch" || $value == "any" || $value == "all" || ( $ommit === true && $key == "s" ) ){ continue; } $out[$key] = $value; } return http_build_query($out); } public function htmlnextpage($gets, $npt, $page){ $query = $this->buildquery($gets); return $page . "?" . $query . "&npt=" . $npt; } }