diff --git a/data/api_keys/yep.txt b/data/api_keys/yep.txt new file mode 100644 index 0000000..80f6458 --- /dev/null +++ b/data/api_keys/yep.txt @@ -0,0 +1 @@ +# Paste Yep API keys here diff --git a/data/config.php b/data/config.php index 161458f..c9b9fb3 100644 --- a/data/config.php +++ b/data/config.php @@ -23,6 +23,16 @@ class config{ // Enable the API? const API_ENABLED = true; + // + // 4play (session provider) + // + // Enable 4play API? + const FPLAY_ENABLE_API = true; + + // 4play password. Please set this to something secure if you enable the 4play API. + // This password is used to POST sessions to /api/v2/provide_sesh + const FPLAY_PASSWORD = "1234"; + // // BOT PROTECTION // @@ -118,10 +128,10 @@ class config{ // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Changing this might break things. - const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0"; + const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0"; // User agent to use with 4get-friendly APIs - const USER_AGENT_FRIENDLY = "4get-scrapist"; + const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)"; // Proxy pool assignments for each scraper // false = Use server's raw IP @@ -131,7 +141,6 @@ class config{ const PROXY_YAHOO = false; const PROXY_YAHOO_JAPAN = false; const PROXY_BRAVE = false; - const PROXY_FB = false; // facebook const PROXY_GOOGLE = false; const PROXY_GOOGLE_API = false; const PROXY_GOOGLE_CSE = false; @@ -155,7 +164,6 @@ class config{ const PROXY_VIMEO = false; const PROXY_YEP = false; const PROXY_PINTEREST = false; - const PROXY_SANKAKUCOMPLEX = false; const PROXY_FLICKR = false; const PROXY_PIXABAY = false; const PROXY_UNSPLASH = false; @@ -164,8 +172,6 @@ class config{ const PROXY_VSCO = false; const PROXY_SEZNAM = false; const PROXY_NAVER = false; - const PROXY_GREPPR = false; - const PROXY_CROWDVIEW = false; const PROXY_MWMBL = false; const PROXY_FTM = false; // findthatmeme const PROXY_IMGUR = false; @@ -173,6 +179,11 @@ class config{ const PROXY_YANDEX_W = false; // yandex web const PROXY_YANDEX_I = false; // yandex images const PROXY_YANDEX_V = false; // yandex videos + const PROXY_SAFEBOORU = false; + const PROXY_KONACHAN = false; + const PROXY_YANDERE = false; + const PROXY_TBIB = false; + const PROXY_GELBOORU = false; // // Scraper-specific parameters @@ -185,4 +196,7 @@ class config{ // Use "null" to default out to HTML scraping OR specify a string to // use the API (Eg: "public"). API has less filters. const MARGINALIA_API_KEY = null; + + // Yep + const YEP_USE_API = false; } diff --git a/scraper/yep.php b/scraper/yep.php index ad6a4b0..c1855fb 100644 --- a/scraper/yep.php +++ b/scraper/yep.php @@ -216,7 +216,7 @@ class yep{ ]; } - private function get($proxy, $url, $get = []){ + private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){ $curlproc = curl_init(); @@ -231,21 +231,37 @@ class yep{ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding - curl_setopt($curlproc, CURLOPT_HTTPHEADER, - ["User-Agent: " . config::USER_AGENT, - "Accept: */*", - "Accept-Language: en-US,en;q=0.5", - "Accept-Encoding: gzip, deflate, br, zstd", - "Referer: https://yep.com/", - "Origin: https://yep.com", - "DNT: 1", - "Connection: keep-alive", - "Sec-Fetch-Dest: empty", - "Sec-Fetch-Mode: cors", - "Sec-Fetch-Site: same-site", - "Priority: u=4", - "TE: trailers"] - ); + + if($use_api){ + + $post_data = json_encode($post_data); + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["Content-Type: application/json", + "Authorization: Bearer $bearer", + "Content-Length: " . strlen($post_data)] + ); + + curl_setopt($curlproc, CURLOPT_POST, true); + curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data); + }else{ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip, deflate, br, zstd", + "Referer: https://yep.com/", + "Origin: https://yep.com", + "DNT: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site", + "Priority: u=4", + "TE: trailers"] + ); + } curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); @@ -270,6 +286,11 @@ class yep{ public function web($get){ + if(config::YEP_USE_API){ + + return $this->web_api($get); + } + $search = $get["s"]; if(strlen($search) === 0){ @@ -392,6 +413,116 @@ class yep{ } + private function web_api($get){ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + // parse filters + $filters = [ + "query" => $search, + "limit" => 100 + ]; + + if($get["nsfw"] == "no"){ $filters["safe_search"] = true; } + if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; } + + // add api key + $key_data = $this->backend->get_key(); + + print_r($filters); + + try{ + + $json = + $this->get( + $this->backend->get_ip($key_data["increment"]), + "https://platform.yep.com/api/search", + [], + true, + $filters, + $key_data["key"] + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + // should never happen + //$this->detect_cf($json); + + $json = json_decode($json, true); + //$json = json_decode(file_get_contents("scraper/yep.json"), true); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + if(isset($json["error"])){ + + throw new Exception("Yep API returned an error: " . $json["error"]); + } + + if(isset($json["errors"])){ + + throw new Exception("Yep API returned the following errors: {$json["message"]}"); + } + + if( + isset($json["success"]) && + $json["success"] !== true + ){ + + throw new Exception("Yep API returned a false-y success value"); + } + + if(!isset($json["results"])){ + + throw new Exception("Yep API did not return a results object"); + } + + foreach($json["results"] as $item){ + + $out["web"][] = [ + "title" => $item["title"], + "description" => $item["description"], + "url" => $item["url"], + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + } + + return $out; + } + + private function detect_cf($payload){ // detect cloudflare page