'; }else{ $replacements["style"] = ""; } if(isset($_COOKIE["scraper_ac"])){ $replacements["ac"] = '?ac=' . htmlspecialchars($_COOKIE["scraper_ac"]); }else{ $replacements["ac"] = ''; } if( isset($replacements["timetaken"]) && $replacements["timetaken"] !== null ){ $replacements["timetaken"] = '
Took ' . number_format(microtime(true) - $replacements["timetaken"], 2) . 's
'; } $handle = fopen("template/{$template}", "r"); $data = fread($handle, filesize("template/{$template}")); fclose($handle); $data = explode("\n", $data); $html = ""; for($i=0; $i $value){ $html = str_replace( "{%{$key}%}", $value, $html ); } return trim($html); } public function loadheader(array $get, array $filters, string $page){ echo $this->load("header.html", [ "title" => trim(htmlspecialchars($get["s"]) . " ({$page})"), "description" => ucfirst($page) . ' search results for "' . htmlspecialchars($get["s"]) . '"', "index" => "no", "search" => htmlspecialchars($get["s"]), "tabs" => $this->generatehtmltabs($page, $get["s"]), "filters" => $this->generatehtmlfilters($filters, $get) ]); $headers_raw = getallheaders(); $header_keys = []; $user_agent = ""; $bad_header = false; // block bots that present X-Forwarded-For, Via, etc foreach($headers_raw as $headerkey => $headervalue){ $headerkey = strtolower($headerkey); if($headerkey == "user-agent"){ $user_agent = $headervalue; continue; } // check header key if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){ $bad_header = true; break; } } // SSL check $bad_ssl = false; if( isset($_SERVER["https"]) && $_SERVER["https"] == "on" && isset($_SERVER["SSL_CIPHER"]) && in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS) ){ $bad_ssl = true; } if( $bad_header === true || $bad_ssl === true || $user_agent == "" || // user agent check preg_match( config::HEADER_REGEX, $user_agent ) ){ // bot detected !! apcu_inc("captcha_gen"); $this->drawerror( "Tshh, blocked!", 'Your browser, IP or IP range has been blocked from this 4get instance. If this is an error, please contact the administrator.' ); die(); } } public function drawerror($title, $error, $timetaken = null){ if($timetaken === null){ $timetaken = microtime(true); } echo $this->load("search.html", [ "timetaken" => $timetaken, "class" => "", "right-left" => "", "right-right" => "", "left" => '
' . '

' . htmlspecialchars($title) . '

' . $error . '
' ]); die(); } public function drawscrapererror($error, $get, $target, $timetaken = null){ if($timetaken === null){ $timetaken = microtime(true); } $this->drawerror( "Shit", 'This scraper returned an error:' . '
' . htmlspecialchars($error) . '
' . 'Things you can try:' . '
' . 'If the error persists, please contact the administrator.', $timetaken ); } public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){ $payload = '
'; // add favicon, link and archive links $payload .= $this->drawlink($site["url"]); /* Draw title + description + filetype */ $payload .= ''; if($duration !== null){ $payload .= '
' . htmlspecialchars($duration) . '
'; } $payload .= '
'; } $payload .= '
'; if( isset($site["type"]) && $site["type"] != "web" ){ $payload .= '
' . strtoupper($site["type"]) . '
'; } $payload .= $this->highlighttext($keywords, $site["title"]) . '
'; if($greentext !== null){ $payload .= '
' . htmlspecialchars($greentext) . '
'; } if($site["description"] !== null){ $payload .= '
' . $this->highlighttext($keywords, $site["description"]) . '
'; } $payload .= $customhtml; $payload .= '
'; /* Sublinks */ if( isset($site["sublink"]) && !empty($site["sublink"]) ){ usort($site["sublink"], function($a, $b){ return strlen($a["description"]) > strlen($b["description"]); }); $payload .= ''; } if( isset($site["table"]) && !empty($site["table"]) ){ $payload .= ''; foreach($site["table"] as $title => $value){ $payload .= '' . '' . '' . ''; } $payload .= '
' . htmlspecialchars($title) . '' . htmlspecialchars($value) . '
'; } return $payload . ''; } public function highlighttext($keywords, $text){ $text = htmlspecialchars($text); $keywords = explode(" ", $keywords); $regex = []; foreach($keywords as $word){ $regex[] = "\b" . preg_quote($word, "/") . "\b"; } $regex = "/" . implode("|", $regex) . "/i"; return preg_replace( $regex, '${0}', $text ); } function highlightcode($text){ // https://www.php.net/highlight_string ini_set("highlight.comment", "c-comment"); ini_set("highlight.default", "c-default"); ini_set("highlight.html", "c-default"); ini_set("highlight.keyword", "c-keyword"); ini_set("highlight.string", "c-string"); $text = trim( preg_replace( '/<\/span>$/', "", // remove stray ending span because of the ', ' ' ], [ "\n", // replace
with newlines " " // replace html entity to space ], str_replace( [ // leading \n<?php ", "", "" ], "", highlight_string("', '', $text); } return $text; } public function drawlink($link){ /* Add favicon */ $host = parse_url($link); $esc = explode( ".", $host["host"], 2 ); if( count($esc) === 2 && $esc[0] == "www" ){ $esc = $esc[1]; }else{ $esc = $esc[0]; } $esc = substr($esc, 0, 2); $urlencode = urlencode($link); $payload = '
' . '' . '
'; /* Add archive links */ if( $host["host"] == "boards.4chan.org" || $host["host"] == "boards.4channel.org" ){ $archives = []; $path = explode("/", $host["path"]); $count = count($path); // /pol/thread/417568063/post-shitty-memes-if-you-want-to if($count !== 0){ $isboard = true; switch($path[1]){ case "con": break; case "q": $archives[] = "desuarchive.org"; break; case "qa": $archives[] = "desuarchive.org"; break; case "qb": $archives[] = "arch.b4k.co"; break; case "trash": $archives[] = "desuarchive.org"; break; case "a": $archives[] = "desuarchive.org"; break; case "c": $archives[] = "desuarchive.org"; break; case "w": break; case "m": $archives[] = "desuarchive.org"; break; case "cgl": $archives[] = "desuarchive.org"; $archives[] = "warosu.org"; break; case "f": $archives[] = "archive.4plebs.org"; break; case "n": break; case "jp": $archives[] = "warosu.org"; break; case "vt": $archives[] = "warosu.org"; break; case "v": $archives[] = "arch.b4k.co"; break; case "vg": $archives[] = "arch.b4k.co"; break; case "vm": $archives[] = "arch.b4k.co"; break; case "vmg": $archives[] = "arch.b4k.co"; break; case "vp": $archives[] = "arch.b4k.co"; break; case "vr": $archives[] = "desuarchive.org"; $archives[] = "warosu.org"; break; case "vrpg": $archives[] = "arch.b4k.co"; break; case "vst": $archives[] = "arch.b4k.co"; break; case "co": $archives[] = "desuarchive.org"; break; case "g": $archives[] = "desuarchive.org"; $archives[] = "arch.b4k.co"; break; case "tv": $archives[] = "archive.4plebs.org"; break; case "k": $archives[] = "desuarchive.org"; break; case "o": $archives[] = "archive.4plebs.org"; break; case "an": $archives[] = "desuarchive.org"; break; case "tg": $archives[] = "desuarchive.org"; $archives[] = "archive.4plebs.org"; break; case "sp": $archives[] = "archive.4plebs.org"; break; case "xs": $archives[] = "eientei.xyz"; break; case "pw": break; case "sci": $archives[] = "warosu.org"; $archives[] = "eientei.xyz"; break; case "his": $archives[] = "desuarchive.org"; break; case "int": $archives[] = "desuarchive.org"; break; case "out": break; case "toy": break; case "i": $archives[] = "archiveofsins.com"; $archives[] = "eientei.xyz"; break; case "po": break; case "p": break; case "ck": $archives[] = "warosu.org"; break; case "ic": $archives[] = "warosu.org"; break; case "wg": break; case "lit": $archives[] = "warosu.org"; break; case "mu": $archives[] = "desuarchive.org"; break; case "fa": $archives[] = "warosu.org"; break; case "3": $archives[] = "warosu.org"; $archives[] = "eientei.xyz"; break; case "gd": break; case "diy": $archives[] = "warosu.org"; break; case "wsg": $archives[] = "desuarchive.org"; break; case "qst": break; case "biz": $archives[] = "warosu.org"; break; case "trv": $archives[] = "archive.4plebs.org"; break; case "fit": $archives[] = "desuarchive.org"; break; case "x": $archives[] = "archive.4plebs.org"; break; case "adv": $archives[] = "archive.4plebs.org"; break; case "lgbt": $archives[] = "archiveofsins.com"; break; case "mlp": $archives[] = "desuarchive.org"; $archives[] = "arch.b4k.co"; break; case "news": break; case "wsr": break; case "vip": break; case "b": $archives[] = "thebarchive.com"; break; case "r9k": $archives[] = "desuarchive.org"; break; case "pol": $archives[] = "archive.4plebs.org"; break; case "bant": $archives[] = "thebarchive.com"; break; case "soc": $archives[] = "archiveofsins.com"; break; case "s4s": $archives[] = "archive.4plebs.org"; break; case "s": $archives[] = "archiveofsins.com"; break; case "hc": $archives[] = "archiveofsins.com"; break; case "hm": $archives[] = "archiveofsins.com"; break; case "h": $archives[] = "archiveofsins.com"; break; case "e": break; case "u": $archives[] = "archiveofsins.com"; break; case "d": $archives[] = "desuarchive.org"; break; case "t": $archives[] = "archiveofsins.com"; break; case "hr": $archives[] = "archive.4plebs.org"; break; case "gif": break; case "aco": $archives[] = "desuarchive.org"; break; case "r": $archives[] = "archiveofsins.com"; break; default: $isboard = false; break; } if($isboard === true){ $archives[] = "archived.moe"; } $trail = ""; if( isset($path[2]) && isset($path[3]) && $path[2] == "thread" ){ $trail .= "/" . $path[1] . "/thread/" . $path[3]; }elseif($isboard){ $trail = "/" . $path[1] . "/"; } for($i=0; $i' . '' . $archives[$i][0] . $archives[$i][1] . '' . $archives[$i] . ''; } } } $payload .= 'goGoogle cache' . 'arArchive.org' . 'arArchive.is' . 'ghGhostarchive' . 'biBing cache' . 'meMegalodon' . '
'; /* Draw link */ $parts = explode("/", $link); $clickurl = ""; // remove trailing / $c = count($parts) - 1; if($parts[$c] == ""){ $parts[$c - 1] = $parts[$c - 1] . "/"; unset($parts[$c]); } // merge https://site together $parts = [ $parts[0] . $parts[1] . '//' . $parts[2], ...array_slice($parts, 3, count($parts) - 1) ]; $c = count($parts); for($i=0; $i<$c; $i++){ if($i !== 0){ $clickurl .= "/"; } $clickurl .= $parts[$i]; if($i === $c - 1){ $parts[$i] = rtrim($parts[$i], "/"); } $payload .= '' . htmlspecialchars(urldecode($parts[$i])) . ''; if($i !== $c - 1){ $payload .= ''; } } return $payload . '
'; } public function getscraperfilters($page){ $get_scraper = isset($_COOKIE["scraper_$page"]) ? $_COOKIE["scraper_$page"] : null; if( isset($_GET["scraper"]) && is_string($_GET["scraper"]) ){ $get_scraper = $_GET["scraper"]; }else{ if( isset($_GET["npt"]) && is_string($_GET["npt"]) ){ $get_scraper = explode(".", $_GET["npt"], 2)[0]; $get_scraper = preg_replace( '/[0-9]+$/', "", $get_scraper ); } } // add search field $filters = [ "s" => [ "option" => "_SEARCH" ] ]; // define default scrapers switch($page){ case "web": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", "google" => "Google", "google_cse" => "Google CSE", "startpage" => "Startpage", "qwant" => "Qwant", "ghostery" => "Ghostery", "yep" => "Yep", "greppr" => "Greppr", "crowdview" => "Crowdview", "mwmbl" => "Mwmbl", "mojeek" => "Mojeek", "solofield" => "Solofield", "marginalia" => "Marginalia", "wiby" => "wiby", "curlie" => "Curlie" ] ]; break; case "images": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "yandex" => "Yandex", "brave" => "Brave", "google" => "Google", "google_cse" => "Google CSE", "startpage" => "Startpage", "qwant" => "Qwant", "yep" => "Yep", "solofield" => "Solofield", //"pinterest" => "Pinterest", "imgur" => "Imgur", "ftm" => "FindThatMeme" ] ]; break; case "videos": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "yt" => "YouTube", //"fb" => "Facebook videos", "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", "google" => "Google", "startpage" => "Startpage", "qwant" => "Qwant", "solofield" => "Solofield" ] ]; break; case "news": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", "google" => "Google", "startpage" => "Startpage", "qwant" => "Qwant", "yep" => "Yep", "mojeek" => "Mojeek" ] ]; break; case "music": $filters["scraper"] = [ "display" => "Scraper", "option" => [ "sc" => "SoundCloud" //"spotify" => "Spotify" ] ]; break; } // get scraper name from user input, or default out to preferred scraper $scraper_out = null; $first = true; foreach($filters["scraper"]["option"] as $scraper_name => $scraper_pretty){ if($first === true){ $first = $scraper_name; } if($scraper_name == $get_scraper){ $scraper_out = $scraper_name; } } if($scraper_out === null){ $scraper_out = $first; } include "scraper/$scraper_out.php"; $lib = new $scraper_out(); // set scraper on $_GET $_GET["scraper"] = $scraper_out; // set nsfw on $_GET if( isset($_COOKIE["nsfw"]) && !isset($_GET["nsfw"]) ){ $_GET["nsfw"] = $_COOKIE["nsfw"]; } return [ $lib, array_merge_recursive( $filters, $lib->getfilters($page) ) ]; } public function parsegetfilters($parameters, $whitelist){ $sanitized = []; // add npt token if( isset($parameters["npt"]) && is_string($parameters["npt"]) ){ $sanitized["npt"] = $parameters["npt"]; }else{ $sanitized["npt"] = false; } // we're iterating over $whitelist, so // you can't polluate $sanitized with useless // parameters foreach($whitelist as $parameter => $value){ if(isset($parameters[$parameter])){ if(!is_string($parameters[$parameter])){ $sanitized[$parameter] = null; continue; } // parameter is already set, use that value $sanitized[$parameter] = $parameters[$parameter]; }else{ // parameter is not set, add it if(is_string($value["option"])){ // special field: set default value manually switch($value["option"]){ case "_DATE": // no date set $sanitized[$parameter] = false; break; case "_SEARCH": // no search set $sanitized[$parameter] = ""; break; } }else{ // set a default value $sanitized[$parameter] = array_keys($value["option"])[0]; } } // sanitize input if(is_array($value["option"])){ if( !in_array( $sanitized[$parameter], $keys = array_keys($value["option"]) ) ){ $sanitized[$parameter] = $keys[0]; } }else{ // sanitize search & string switch($value["option"]){ case "_DATE": if($sanitized[$parameter] !== false){ $sanitized[$parameter] = strtotime($sanitized[$parameter]); if($sanitized[$parameter] <= 0){ $sanitized[$parameter] = false; } } break; case "_SEARCH": // get search string $sanitized["s"] = trim($sanitized[$parameter]); } } } // invert dates if needed if( isset($sanitized["older"]) && isset($sanitized["newer"]) && $sanitized["newer"] !== false && $sanitized["older"] !== false && $sanitized["newer"] > $sanitized["older"] ){ // invert [ $sanitized["older"], $sanitized["newer"] ] = [ $sanitized["newer"], $sanitized["older"] ]; } return $sanitized; } public function s_to_timestamp($seconds){ if(is_string($seconds)){ return "LIVE"; } return ($seconds >= 60) ? ltrim(gmdate("H:i:s", $seconds), ":0") : gmdate("0:s", $seconds); } public function generatehtmltabs($page, $query){ $html = null; foreach(["web", "images", "videos", "news", "music"] as $type){ $html .= '' . ucfirst($type) . ''; } return $html; } public function generatehtmlfilters($filters, $params){ $html = null; foreach($filters as $filter_name => $filter_values){ if(!isset($filter_values["display"])){ continue; } $output = true; $tmp = '
' . '
' . htmlspecialchars($filter_values["display"]) . '
'; if(is_array($filter_values["option"])){ $tmp .= ''; }else{ switch($filter_values["option"]){ case "_DATE": $tmp .= ' $value){ if( $value == null || $value == false || $key == "npt" || $key == "extendedsearch" || $value == "any" || $value == "all" || $key == "spellcheck" || ( $ommit === true && $key == "s" ) ){ continue; } if( $key == "older" || $key == "newer" ){ $value = date("Y-m-d", (int)$value); } $out[$key] = $value; } return http_build_query($out); } public function htmlimage($image, $format){ if( preg_match( '/^data:/', $image ) ){ return htmlspecialchars($image); } return "/proxy?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ $query = $this->buildquery($gets); return $page . "?" . $query . "&npt=" . $npt; } }