Compare commits
2 Commits
394f401921
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 95819bfe52 | |||
| e1e92d715e |
1
data/api_keys/yep.txt
Normal file
1
data/api_keys/yep.txt
Normal file
@@ -0,0 +1 @@
|
||||
# Paste Yep API keys here
|
||||
@@ -23,6 +23,16 @@ class config{
|
||||
// Enable the API?
|
||||
const API_ENABLED = true;
|
||||
|
||||
//
|
||||
// 4play (session provider)
|
||||
//
|
||||
// Enable 4play API?
|
||||
const FPLAY_ENABLE_API = true;
|
||||
|
||||
// 4play password. Please set this to something secure if you enable the 4play API.
|
||||
// This password is used to POST sessions to /api/v2/provide_sesh
|
||||
const FPLAY_PASSWORD = "1234";
|
||||
|
||||
//
|
||||
// BOT PROTECTION
|
||||
//
|
||||
@@ -118,10 +128,10 @@ class config{
|
||||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0";
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
|
||||
|
||||
// User agent to use with 4get-friendly APIs
|
||||
const USER_AGENT_FRIENDLY = "4get-scrapist";
|
||||
const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
@@ -131,7 +141,6 @@ class config{
|
||||
const PROXY_YAHOO = false;
|
||||
const PROXY_YAHOO_JAPAN = false;
|
||||
const PROXY_BRAVE = false;
|
||||
const PROXY_FB = false; // facebook
|
||||
const PROXY_GOOGLE = false;
|
||||
const PROXY_GOOGLE_API = false;
|
||||
const PROXY_GOOGLE_CSE = false;
|
||||
@@ -155,7 +164,6 @@ class config{
|
||||
const PROXY_VIMEO = false;
|
||||
const PROXY_YEP = false;
|
||||
const PROXY_PINTEREST = false;
|
||||
const PROXY_SANKAKUCOMPLEX = false;
|
||||
const PROXY_FLICKR = false;
|
||||
const PROXY_PIXABAY = false;
|
||||
const PROXY_UNSPLASH = false;
|
||||
@@ -164,8 +172,6 @@ class config{
|
||||
const PROXY_VSCO = false;
|
||||
const PROXY_SEZNAM = false;
|
||||
const PROXY_NAVER = false;
|
||||
const PROXY_GREPPR = false;
|
||||
const PROXY_CROWDVIEW = false;
|
||||
const PROXY_MWMBL = false;
|
||||
const PROXY_FTM = false; // findthatmeme
|
||||
const PROXY_IMGUR = false;
|
||||
@@ -173,6 +179,11 @@ class config{
|
||||
const PROXY_YANDEX_W = false; // yandex web
|
||||
const PROXY_YANDEX_I = false; // yandex images
|
||||
const PROXY_YANDEX_V = false; // yandex videos
|
||||
const PROXY_SAFEBOORU = false;
|
||||
const PROXY_KONACHAN = false;
|
||||
const PROXY_YANDERE = false;
|
||||
const PROXY_TBIB = false;
|
||||
const PROXY_GELBOORU = false;
|
||||
|
||||
//
|
||||
// Scraper-specific parameters
|
||||
@@ -185,4 +196,7 @@ class config{
|
||||
// Use "null" to default out to HTML scraping OR specify a string to
|
||||
// use the API (Eg: "public"). API has less filters.
|
||||
const MARGINALIA_API_KEY = null;
|
||||
|
||||
// Yep
|
||||
const YEP_USE_API = false;
|
||||
}
|
||||
|
||||
170
scraper/yep.php
170
scraper/yep.php
@@ -216,7 +216,7 @@ class yep{
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
@@ -231,21 +231,37 @@ class yep{
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Referer: https://yep.com/",
|
||||
"Origin: https://yep.com",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"Priority: u=4",
|
||||
"TE: trailers"]
|
||||
);
|
||||
|
||||
if($use_api){
|
||||
|
||||
$post_data = json_encode($post_data);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["Content-Type: application/json",
|
||||
"Authorization: Bearer $bearer",
|
||||
"Content-Length: " . strlen($post_data)]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Referer: https://yep.com/",
|
||||
"Origin: https://yep.com",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"Priority: u=4",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
@@ -270,6 +286,11 @@ class yep{
|
||||
|
||||
public function web($get){
|
||||
|
||||
if(config::YEP_USE_API){
|
||||
|
||||
return $this->web_api($get);
|
||||
}
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
@@ -392,6 +413,123 @@ class yep{
|
||||
}
|
||||
|
||||
|
||||
private function web_api($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// parse filters
|
||||
$filters = [
|
||||
"query" => $search,
|
||||
"limit" => 100
|
||||
];
|
||||
|
||||
if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
|
||||
if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
|
||||
|
||||
// add api key
|
||||
$key_data = $this->backend->get_key();
|
||||
|
||||
try{
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip($key_data["increment"]),
|
||||
"https://platform.yep.com/api/search",
|
||||
[],
|
||||
true,
|
||||
$filters,
|
||||
$key_data["key"]
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
// should never happen
|
||||
//$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json["error"])){
|
||||
|
||||
throw new Exception("Yep API returned an error: " . $json["error"]);
|
||||
}
|
||||
|
||||
if(isset($json["errors"])){
|
||||
|
||||
throw new Exception("Yep API returned the following errors: {$json["message"]}");
|
||||
}
|
||||
|
||||
if(
|
||||
isset($json["success"]) &&
|
||||
$json["success"] !== true
|
||||
){
|
||||
|
||||
throw new Exception("Yep API returned a false-y success value");
|
||||
}
|
||||
|
||||
if(!isset($json["results"])){
|
||||
|
||||
throw new Exception("Yep API did not return a results object");
|
||||
}
|
||||
|
||||
foreach($json["results"] as $item){
|
||||
|
||||
if(
|
||||
$item["url"] === null ||
|
||||
$item["url"] == ""
|
||||
){
|
||||
|
||||
// sometimes API fucks up
|
||||
continue;
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $item["title"],
|
||||
"description" => $item["description"],
|
||||
"url" => $item["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
private function detect_cf($payload){
|
||||
|
||||
// detect cloudflare page
|
||||
|
||||
Reference in New Issue
Block a user