diff --git a/data/config.php b/data/config.php index e0f061c..161458f 100644 --- a/data/config.php +++ b/data/config.php @@ -135,8 +135,6 @@ class config{ const PROXY_GOOGLE = false; const PROXY_GOOGLE_API = false; const PROXY_GOOGLE_CSE = false; - const PROXY_MULLVAD_GOOGLE = false; - const PROXY_MULLVAD_BRAVE = false; const PROXY_STARTPAGE = false; const PROXY_QWANT = false; const PROXY_BAIDU = false; @@ -160,6 +158,8 @@ class config{ const PROXY_SANKAKUCOMPLEX = false; const PROXY_FLICKR = false; const PROXY_PIXABAY = false; + const PROXY_UNSPLASH = false; + const PROXY_PEXELS = false; const PROXY_FIVEHPX = false; const PROXY_VSCO = false; const PROXY_SEZNAM = false; diff --git a/lib/frontend.php b/lib/frontend.php index da3b626..7cf4e24 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -487,368 +487,6 @@ class frontend{ '' . '
'; - /* - Add archive links - */ - if( - $host["host"] == "boards.4chan.org" || - $host["host"] == "boards.4channel.org" - ){ - - $archives = []; - $path = explode("/", $host["path"]); - $count = count($path); - // /pol/thread/417568063/post-shitty-memes-if-you-want-to - - if($count !== 0){ - - $isboard = true; - - switch($path[1]){ - - case "con": - break; - - case "q": - $archives[] = "desuarchive.org"; - break; - - case "qa": - $archives[] = "desuarchive.org"; - break; - - case "qb": - $archives[] = "arch.b4k.co"; - break; - - case "trash": - $archives[] = "desuarchive.org"; - break; - - case "a": - $archives[] = "desuarchive.org"; - break; - - case "c": - $archives[] = "desuarchive.org"; - break; - - case "w": - break; - - case "m": - $archives[] = "desuarchive.org"; - break; - - case "cgl": - $archives[] = "desuarchive.org"; - $archives[] = "warosu.org"; - break; - - case "f": - $archives[] = "archive.4plebs.org"; - break; - - case "n": - break; - - case "jp": - $archives[] = "warosu.org"; - break; - - case "vt": - $archives[] = "warosu.org"; - break; - - case "v": - $archives[] = "arch.b4k.co"; - break; - - case "vg": - $archives[] = "arch.b4k.co"; - break; - - case "vm": - $archives[] = "arch.b4k.co"; - break; - - case "vmg": - $archives[] = "arch.b4k.co"; - break; - - case "vp": - $archives[] = "arch.b4k.co"; - break; - - case "vr": - $archives[] = "desuarchive.org"; - $archives[] = "warosu.org"; - break; - - case "vrpg": - $archives[] = "arch.b4k.co"; - break; - - case "vst": - $archives[] = "arch.b4k.co"; - break; - - case "co": - $archives[] = "desuarchive.org"; - break; - - case "g": - $archives[] = "desuarchive.org"; - $archives[] = "arch.b4k.co"; - break; - - case "tv": - $archives[] = "archive.4plebs.org"; - break; - - case "k": - $archives[] = "desuarchive.org"; - break; - - case "o": - $archives[] = "archive.4plebs.org"; - break; - - case "an": - $archives[] = "desuarchive.org"; - break; - - case "tg": - $archives[] = "desuarchive.org"; - $archives[] = "archive.4plebs.org"; - break; - - case "sp": - $archives[] = "archive.4plebs.org"; - break; - - case "xs": - $archives[] = "eientei.xyz"; - break; - - case "pw": - break; - - case "sci": - $archives[] = "warosu.org"; - $archives[] = "eientei.xyz"; - break; - - case "his": - $archives[] = "desuarchive.org"; - break; - - case "int": - $archives[] = "desuarchive.org"; - break; - - case "out": - break; - - case "toy": - break; - - case "i": - $archives[] = "archiveofsins.com"; - $archives[] = "eientei.xyz"; - break; - - case "po": - break; - - case "p": - break; - - case "ck": - $archives[] = "warosu.org"; - break; - - case "ic": - $archives[] = "warosu.org"; - break; - - case "wg": - break; - - case "lit": - $archives[] = "warosu.org"; - break; - - case "mu": - $archives[] = "desuarchive.org"; - break; - - case "fa": - $archives[] = "warosu.org"; - break; - - case "3": - $archives[] = "warosu.org"; - $archives[] = "eientei.xyz"; - break; - - case "gd": - break; - - case "diy": - $archives[] = "warosu.org"; - break; - - case "wsg": - $archives[] = "desuarchive.org"; - break; - - case "qst": - break; - - case "biz": - $archives[] = "warosu.org"; - break; - - case "trv": - $archives[] = "archive.4plebs.org"; - break; - - case "fit": - $archives[] = "desuarchive.org"; - break; - - case "x": - $archives[] = "archive.4plebs.org"; - break; - - case "adv": - $archives[] = "archive.4plebs.org"; - break; - - case "lgbt": - $archives[] = "archiveofsins.com"; - break; - - case "mlp": - $archives[] = "desuarchive.org"; - $archives[] = "arch.b4k.co"; - break; - - case "news": - break; - - case "wsr": - break; - - case "vip": - break; - - case "b": - $archives[] = "thebarchive.com"; - break; - - case "r9k": - $archives[] = "desuarchive.org"; - break; - - case "pol": - $archives[] = "archive.4plebs.org"; - break; - - case "bant": - $archives[] = "thebarchive.com"; - break; - - case "soc": - $archives[] = "archiveofsins.com"; - break; - - case "s4s": - $archives[] = "archive.4plebs.org"; - break; - - case "s": - $archives[] = "archiveofsins.com"; - break; - - case "hc": - $archives[] = "archiveofsins.com"; - break; - - case "hm": - $archives[] = "archiveofsins.com"; - break; - - case "h": - $archives[] = "archiveofsins.com"; - break; - - case "e": - break; - - case "u": - $archives[] = "archiveofsins.com"; - break; - - case "d": - $archives[] = "desuarchive.org"; - break; - - case "t": - $archives[] = "archiveofsins.com"; - break; - - case "hr": - $archives[] = "archive.4plebs.org"; - break; - - case "gif": - break; - - case "aco": - $archives[] = "desuarchive.org"; - break; - - case "r": - $archives[] = "archiveofsins.com"; - break; - - default: - $isboard = false; - break; - } - - if($isboard === true){ - - $archives[] = "archived.moe"; - } - - $trail = ""; - - if( - isset($path[2]) && - isset($path[3]) && - $path[2] == "thread" - ){ - - $trail .= "/" . $path[1] . "/thread/" . $path[3]; - }elseif($isboard){ - - $trail = "/" . $path[1] . "/"; - } - - for($i=0; $i' . - '' . $archives[$i][0] . $archives[$i][1] . '' . - $archives[$i] . - ''; - } - } - } - $payload .= 'arArchive.org' . 'arArchive.is' . @@ -994,7 +632,9 @@ class frontend{ "pinterest" => "Pinterest", "cara" => "Cara", "flickr" => "Flickr", + "pexels" => "Pexels", "pixabay" => "Pixabay", + "unsplash" => "Unsplash", "fivehpx" => "500px", "vsco" => "VSCO", "imgur" => "Imgur", diff --git a/scraper/pexels.php b/scraper/pexels.php new file mode 100644 index 0000000..13af6ae --- /dev/null +++ b/scraper/pexels.php @@ -0,0 +1,285 @@ +backend = new backend("pexels"); + } + + public function getfilters($page){ + + return [ + "time" => [ // date_from + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "last_24_hours" => "Last 24 hours", + "last_week" => "Last week", + "last_month" => "Last month", + "last_year" => "Last year" + ] + ], + "orientation" => [ + "display" => "Orientation", + "option" => [ + "any" => "Any orientation", + "landscape" => "Horizontal", + "portrait" => "Vertical", + "square" => "Square" + ] + ], + "color" => [ + "display" => "Color", + "option" => [ + "any" => "Any color", + "795548" => "Brown", + "F44336" => "Red", + "E91E63" => "Hot pink", + "9C27B0" => "Magenta", + "673AB7" => "Purple", + "3F51B5" => "Indigo", + "2196F3" => "Blue", + "03A9F4" => "Light blue", + "00BCD4" => "Cyan", + "009688" => "Forest green", + "4CAF50" => "Green", + "8BC34A" => "Lime", + "CDDC39" => "Pear", + "FFEB3B" => "Yellow", + "FFC107" => "Gold", + "FF9800" => "Orange", + "FF5722" => "Tomato", + "9E9E9E" => "Gray", + "607D8B" => "Teal", + "000000" => "Black", + "FFFFFF" => "White" + ] + ], + "people_count" => [ + "display" => "Head count", + "option" => [ + "any" => "Any number", + "0" => "0", + "1" => "1", + "2" => "2", + "3_plus" => "3+", + ] + ], + "people_age" => [ + "display" => "People's age", + "option" => [ + "any" => "Any age", + "baby" => "Baby", + "child" => "Child", + "teenager" => "Teenager", + "adult" => "Adult", + "senior_adult" => "Senior adult" + ] + ] + ]; + } + + private function get($proxy, $url, $get = []){ + + $curlproc = curl_init(); + + $search = $get["query"]; + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US,en;q=0.9", + "Accept-Encoding: gzip, deflate, br, zstd", + "Referer: https://www.pexels.com/search/" . rawurlencode($search) . "/", + "Content-Type: application/json", + "secret-key: H2jk9uKnhRmL6WPwh89zBezWvr", // hardcoded but like, people on github have been using this shit since 23' + "X-Client-Type: react", + "X-Next-Forwarded-CF-Connecting-IP: ", + "X-Next-Forwarded-CF-IPCountry: ", + "X-Next-Forwarded-CF-IPRegionCode: ", + "X-Active-Experiment: ", + "DNT: 1", + "Sec-GPC: 1", + "Alt-Used: www.pexels.com", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Priority: u=4", + "TE: trailers"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function image($get){ + + if($get["npt"]){ + + [$filter, $proxy] = + $this->backend->get( + $get["npt"], + "images" + ); + + $filter = json_decode($filter, true); + + }else{ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + // ?query=blue%20footed%20booby&page=1&per_page=24&seo_tags=true + $filter = [ + "query" => $search, + "page" => 1, + "per_page" => 24, + "seo_tags" => "true" + ]; + + // add filters + if($get["time"] != "any"){ + + $filter["date_from"] = $get["time"]; + } + + if($get["orientation"] != "any"){ + + $filter["orientation"] = $get["orientation"]; + } + + if($get["color"] != "any"){ + + $filter["color"] = $get["color"]; + } + + if($get["people_count"] != "any"){ + + $filter["people_count"] = $get["people_count"]; + } + + if($get["people_age"] != "any"){ + + $filter["people_age"] = $get["people_age"]; + } + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + try{ + $html = + $this->get( + $proxy, + "https://www.pexels.com/en-us/api/v3/search/photos", + $filter + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $json = json_decode($html, true); + + if(!isset($json["data"])){ + + throw new Exception("Pexels did not return a data object"); + } + + foreach($json["data"] as $image){ + + $thumb_size = + $this->imgratio( + (int)$image["attributes"]["width"], + (int)$image["attributes"]["height"], + 350 + ); + + $out["image"][] = [ + "title" => $image["attributes"]["title"] . ": " . $image["attributes"]["description"], + "source" => [ + [ + "url" => $image["attributes"]["image"]["download_link"], + "width" => (int)$image["attributes"]["width"], + "height" => (int)$image["attributes"]["height"] + ], + [ + "url" => + preg_replace( + '/(?:w|h)=[0-9]+$/', + "w=350", + $image["attributes"]["image"]["small"] + ), + "width" => $thumb_size[0], + "height" => $thumb_size[1] + ] + ], + "url" => + "https://pexels.com/photo/" . + $image["attributes"]["slug"] . "-" . + $image["attributes"]["id"] . "/" + ]; + } + + // get next page + if((int)$json["pagination"]["current_page"] < (int)$json["pagination"]["total_pages"]){ + + $filter["page"]++; + + $out["npt"] = + $this->backend->store( + json_encode($filter), + "images", + $proxy + ); + } + + return $out; + } + + private function imgratio($width, $height, $max_width){ + + $ratio = $max_width / $width; + $new_height = floor($height * $ratio); + + return [ + $max_width, + $new_height + ]; + } +} diff --git a/scraper/unsplash.php b/scraper/unsplash.php new file mode 100644 index 0000000..dc57686 --- /dev/null +++ b/scraper/unsplash.php @@ -0,0 +1,267 @@ +fuckhtml = new fuckhtml(); + + include "lib/backend.php"; + $this->backend = new backend("unsplash"); + } + + public function getfilters($page){ + + return [ + "order_by" => [ + "display" => "Order by", + "option" => [ + "relevance" => "Relevance", + "latest" => "Newest", + "editorial" => "Curated" + ] + ], + "orientation" => [ + "display" => "Order by", + "option" => [ + "any" => "Any orientation", + "landscape" => "Landscape", + "portrait" => "Portrait", + "squarish" => "Square" + ] + ], + "license" => [ + "display" => "License", + "option" => [ + "any" => "Any license", + "only" => "Unsplash+", + "none" => "Free" + ] + ] + ]; + } + + private function get($proxy, $url, $get = [], $referer){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US", + "Accept-Encoding: gzip, deflate, br, zstd", + "Referer: {$referer}", + "client-geo-region: global", + "x-client-version: 8999df28be3f138bf2c646df5d656e4dc6970ba0", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Priority: u=0", + "TE: trailers"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function image($get){ + + if($get["npt"]){ + + [$filter, $proxy] = + $this->backend->get( + $get["npt"], + "images" + ); + + $filter = json_decode($filter, true); + + }else{ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + $filter = [ + "page" => 1, + "per_page" => 20, + "query" => $search + ]; + + // add filters + if($get["order_by"] != "relevance"){ + + $filter["order_by"] = $get["order_by"]; + } + + if($get["orientation"] != "any"){ + + $filter["orientation"] = $get["orientation"]; + } + + if($get["license"] != "any"){ + + $filter["plus"] = $get["license"]; + } + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + // https://unsplash.com/s/photos/shibuya-night?order_by=latest&orientation=landscape&license=free + // https://unsplash.com/s/photos/%3Ctest-%3F!-haha == get( + $proxy, + "https://unsplash.com/napi/search/photos", + $filter, + "https://unsplash.com/s/photos/" . rawurlencode(str_replace(" ", "-", $filter["query"])), + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Could not decode the JSON payload"); + } + + foreach($json["results"] as $image){ + + $base = explode("?", $image["urls"]["raw"])[0]; + + if( + (bool)$image["premium"] || + (bool)$image["plus"] + ){ + + // when we get "premium" images, give + // 1. highest resolution with watermarks + // 2. highest resolution without watermarks + // (if width of image is above 900px, it has watermarks) + // 3. thumbnail without watermark + $x900 = $this->imgratio((int)$image["width"], (int)$image["height"], 900); + $x500 = $this->imgratio((int)$image["width"], (int)$image["height"], 500); + + $source = [ + [ + "url" => $base, + "width" => (int)$image["width"], + "height" => (int)$image["height"] + ], + [ + "url" => $base . "?w=900", + "width" => $x900[0], + "height" => $x900[1] + ], + [ + "url" => $base . "?w=500", + "width" => $x500[0], + "height" => $x500[1] + ] + ]; + }else{ + + $x500 = $this->imgratio((int)$image["width"], (int)$image["height"], 500); + + // image is free as in freedom(tm) + $source = [ + [ + "url" => $base, + "width" => (int)$image["width"], + "height" => (int)$image["height"] + ], + [ + "url" => $base . "?w=500", + "width" => $x500[0], + "height" => $x500[1] + ] + ]; + } + + $title = []; + + $image["description"] = trim($image["description"]); + $image["alt_description"] = trim($image["alt_description"]); + + if(!empty($image["description"])){ $title[] = $image["description"]; } + if(!empty($image["alt_description"])){ $title[] = $image["alt_description"]; } + + $title = implode(": ", $title); + + $out["image"][] = [ + "title" => $title, + "source" => $source, + "url" => "https://unsplash.com/photos/" . $image["slug"] + ]; + } + + // next page stuff + if($filter["page"] < (int)$json["total_pages"]){ + + $filter["page"]++; + + $out["npt"] = + $this->backend->store( + json_encode($filter), + "images", + $proxy + ); + } + + return $out; + } + + private function imgratio($width, $height, $max_width){ + + $ratio = $max_width / $width; + $new_height = floor($height * $ratio); + + return [ + $max_width, + $new_height + ]; + } +} diff --git a/settings.php b/settings.php index f7f8b16..e1b9dbe 100644 --- a/settings.php +++ b/settings.php @@ -264,13 +264,21 @@ $settings = [ "text" => "Flickr" ], [ - "value" => "fivehpx", - "text" => "500px" + "value" => "pexels", + "text" => "Pexels" ], [ "value" => "pixabay", "text" => "Pixabay" ], + [ + "value" => "unsplash", + "text" => "Unsplash" + ], + [ + "value" => "fivehpx", + "text" => "500px" + ], [ "value" => "vsco", "text" => "VSCO"