Compare commits

..

4 Commits

Author SHA1 Message Date
95819bfe52 yep api fix 2026-05-20 11:05:41 -04:00
e1e92d715e add support for yep api 2026-05-20 11:01:18 -04:00
394f401921 Merge pull request 'add lwthiker/curl-impersonate' (#94) from docker-curl-impersonate into master
Reviewed-on: #94
2026-05-20 05:02:30 +00:00
cf3c77ed04 add lwthiker/curl-impersonate 2026-05-19 17:08:10 -07:00
4 changed files with 186 additions and 24 deletions

View File

@@ -1,8 +1,17 @@
FROM lwthiker/curl-impersonate:0.6.1-ff-alpine AS curl-impersonate
FROM alpine:3.21 FROM alpine:3.21
WORKDIR /var/www/html/4get WORKDIR /var/www/html/4get
RUN apk update && apk upgrade RUN apk update && apk upgrade
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg nss ca-certificates
COPY --from=curl-impersonate /usr/local/bin /usr/local/bin
COPY --from=curl-impersonate /usr/local/lib /usr/local/lib
ENV LD_PRELOAD=/usr/local/lib/libcurl-impersonate-ff.so
ENV CURL_IMPERSONATE=ff117
ENV CURL_IMPERSONATE_HEADERS=no
COPY . . COPY . .

1
data/api_keys/yep.txt Normal file
View File

@@ -0,0 +1 @@
# Paste Yep API keys here

View File

@@ -23,6 +23,16 @@ class config{
// Enable the API? // Enable the API?
const API_ENABLED = true; const API_ENABLED = true;
//
// 4play (session provider)
//
// Enable 4play API?
const FPLAY_ENABLE_API = true;
// 4play password. Please set this to something secure if you enable the 4play API.
// This password is used to POST sessions to /api/v2/provide_sesh
const FPLAY_PASSWORD = "1234";
// //
// BOT PROTECTION // BOT PROTECTION
// //
@@ -118,10 +128,10 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
// User agent to use with 4get-friendly APIs // User agent to use with 4get-friendly APIs
const USER_AGENT_FRIENDLY = "4get-scrapist"; const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP
@@ -131,7 +141,6 @@ class config{
const PROXY_YAHOO = false; const PROXY_YAHOO = false;
const PROXY_YAHOO_JAPAN = false; const PROXY_YAHOO_JAPAN = false;
const PROXY_BRAVE = false; const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false; const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false; const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false; const PROXY_GOOGLE_CSE = false;
@@ -155,7 +164,6 @@ class config{
const PROXY_VIMEO = false; const PROXY_VIMEO = false;
const PROXY_YEP = false; const PROXY_YEP = false;
const PROXY_PINTEREST = false; const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false; const PROXY_FLICKR = false;
const PROXY_PIXABAY = false; const PROXY_PIXABAY = false;
const PROXY_UNSPLASH = false; const PROXY_UNSPLASH = false;
@@ -164,8 +172,6 @@ class config{
const PROXY_VSCO = false; const PROXY_VSCO = false;
const PROXY_SEZNAM = false; const PROXY_SEZNAM = false;
const PROXY_NAVER = false; const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false; const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false; const PROXY_IMGUR = false;
@@ -173,6 +179,11 @@ class config{
const PROXY_YANDEX_W = false; // yandex web const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos const PROXY_YANDEX_V = false; // yandex videos
const PROXY_SAFEBOORU = false;
const PROXY_KONACHAN = false;
const PROXY_YANDERE = false;
const PROXY_TBIB = false;
const PROXY_GELBOORU = false;
// //
// Scraper-specific parameters // Scraper-specific parameters
@@ -185,4 +196,7 @@ class config{
// Use "null" to default out to HTML scraping OR specify a string to // Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters. // use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null; const MARGINALIA_API_KEY = null;
// Yep
const YEP_USE_API = false;
} }

View File

@@ -216,7 +216,7 @@ class yep{
]; ];
} }
private function get($proxy, $url, $get = []){ private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
$curlproc = curl_init(); $curlproc = curl_init();
@@ -231,6 +231,21 @@ class yep{
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($use_api){
$post_data = json_encode($post_data);
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["Content-Type: application/json",
"Authorization: Bearer $bearer",
"Content-Length: " . strlen($post_data)]
);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER, curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT, ["User-Agent: " . config::USER_AGENT,
"Accept: */*", "Accept: */*",
@@ -246,6 +261,7 @@ class yep{
"Priority: u=4", "Priority: u=4",
"TE: trailers"] "TE: trailers"]
); );
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
@@ -270,6 +286,11 @@ class yep{
public function web($get){ public function web($get){
if(config::YEP_USE_API){
return $this->web_api($get);
}
$search = $get["s"]; $search = $get["s"];
if(strlen($search) === 0){ if(strlen($search) === 0){
@@ -392,6 +413,123 @@ class yep{
} }
private function web_api($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// parse filters
$filters = [
"query" => $search,
"limit" => 100
];
if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
// add api key
$key_data = $this->backend->get_key();
try{
$json =
$this->get(
$this->backend->get_ip($key_data["increment"]),
"https://platform.yep.com/api/search",
[],
true,
$filters,
$key_data["key"]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
// should never happen
//$this->detect_cf($json);
$json = json_decode($json, true);
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if(isset($json["error"])){
throw new Exception("Yep API returned an error: " . $json["error"]);
}
if(isset($json["errors"])){
throw new Exception("Yep API returned the following errors: {$json["message"]}");
}
if(
isset($json["success"]) &&
$json["success"] !== true
){
throw new Exception("Yep API returned a false-y success value");
}
if(!isset($json["results"])){
throw new Exception("Yep API did not return a results object");
}
foreach($json["results"] as $item){
if(
$item["url"] === null ||
$item["url"] == ""
){
// sometimes API fucks up
continue;
}
$out["web"][] = [
"title" => $item["title"],
"description" => $item["description"],
"url" => $item["url"],
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
private function detect_cf($payload){ private function detect_cf($payload){
// detect cloudflare page // detect cloudflare page