Compare commits
4 Commits
25e8095d0d
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 95819bfe52 | |||
| e1e92d715e | |||
| 394f401921 | |||
| cf3c77ed04 |
11
Dockerfile
11
Dockerfile
@@ -1,8 +1,17 @@
|
|||||||
|
FROM lwthiker/curl-impersonate:0.6.1-ff-alpine AS curl-impersonate
|
||||||
|
|
||||||
FROM alpine:3.21
|
FROM alpine:3.21
|
||||||
WORKDIR /var/www/html/4get
|
WORKDIR /var/www/html/4get
|
||||||
|
|
||||||
RUN apk update && apk upgrade
|
RUN apk update && apk upgrade
|
||||||
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg
|
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg nss ca-certificates
|
||||||
|
|
||||||
|
COPY --from=curl-impersonate /usr/local/bin /usr/local/bin
|
||||||
|
COPY --from=curl-impersonate /usr/local/lib /usr/local/lib
|
||||||
|
|
||||||
|
ENV LD_PRELOAD=/usr/local/lib/libcurl-impersonate-ff.so
|
||||||
|
ENV CURL_IMPERSONATE=ff117
|
||||||
|
ENV CURL_IMPERSONATE_HEADERS=no
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
|||||||
1
data/api_keys/yep.txt
Normal file
1
data/api_keys/yep.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Paste Yep API keys here
|
||||||
@@ -23,6 +23,16 @@ class config{
|
|||||||
// Enable the API?
|
// Enable the API?
|
||||||
const API_ENABLED = true;
|
const API_ENABLED = true;
|
||||||
|
|
||||||
|
//
|
||||||
|
// 4play (session provider)
|
||||||
|
//
|
||||||
|
// Enable 4play API?
|
||||||
|
const FPLAY_ENABLE_API = true;
|
||||||
|
|
||||||
|
// 4play password. Please set this to something secure if you enable the 4play API.
|
||||||
|
// This password is used to POST sessions to /api/v2/provide_sesh
|
||||||
|
const FPLAY_PASSWORD = "1234";
|
||||||
|
|
||||||
//
|
//
|
||||||
// BOT PROTECTION
|
// BOT PROTECTION
|
||||||
//
|
//
|
||||||
@@ -118,10 +128,10 @@ class config{
|
|||||||
|
|
||||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||||
// Changing this might break things.
|
// Changing this might break things.
|
||||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0";
|
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
|
||||||
|
|
||||||
// User agent to use with 4get-friendly APIs
|
// User agent to use with 4get-friendly APIs
|
||||||
const USER_AGENT_FRIENDLY = "4get-scrapist";
|
const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
|
||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
// Proxy pool assignments for each scraper
|
||||||
// false = Use server's raw IP
|
// false = Use server's raw IP
|
||||||
@@ -131,7 +141,6 @@ class config{
|
|||||||
const PROXY_YAHOO = false;
|
const PROXY_YAHOO = false;
|
||||||
const PROXY_YAHOO_JAPAN = false;
|
const PROXY_YAHOO_JAPAN = false;
|
||||||
const PROXY_BRAVE = false;
|
const PROXY_BRAVE = false;
|
||||||
const PROXY_FB = false; // facebook
|
|
||||||
const PROXY_GOOGLE = false;
|
const PROXY_GOOGLE = false;
|
||||||
const PROXY_GOOGLE_API = false;
|
const PROXY_GOOGLE_API = false;
|
||||||
const PROXY_GOOGLE_CSE = false;
|
const PROXY_GOOGLE_CSE = false;
|
||||||
@@ -155,7 +164,6 @@ class config{
|
|||||||
const PROXY_VIMEO = false;
|
const PROXY_VIMEO = false;
|
||||||
const PROXY_YEP = false;
|
const PROXY_YEP = false;
|
||||||
const PROXY_PINTEREST = false;
|
const PROXY_PINTEREST = false;
|
||||||
const PROXY_SANKAKUCOMPLEX = false;
|
|
||||||
const PROXY_FLICKR = false;
|
const PROXY_FLICKR = false;
|
||||||
const PROXY_PIXABAY = false;
|
const PROXY_PIXABAY = false;
|
||||||
const PROXY_UNSPLASH = false;
|
const PROXY_UNSPLASH = false;
|
||||||
@@ -164,8 +172,6 @@ class config{
|
|||||||
const PROXY_VSCO = false;
|
const PROXY_VSCO = false;
|
||||||
const PROXY_SEZNAM = false;
|
const PROXY_SEZNAM = false;
|
||||||
const PROXY_NAVER = false;
|
const PROXY_NAVER = false;
|
||||||
const PROXY_GREPPR = false;
|
|
||||||
const PROXY_CROWDVIEW = false;
|
|
||||||
const PROXY_MWMBL = false;
|
const PROXY_MWMBL = false;
|
||||||
const PROXY_FTM = false; // findthatmeme
|
const PROXY_FTM = false; // findthatmeme
|
||||||
const PROXY_IMGUR = false;
|
const PROXY_IMGUR = false;
|
||||||
@@ -173,6 +179,11 @@ class config{
|
|||||||
const PROXY_YANDEX_W = false; // yandex web
|
const PROXY_YANDEX_W = false; // yandex web
|
||||||
const PROXY_YANDEX_I = false; // yandex images
|
const PROXY_YANDEX_I = false; // yandex images
|
||||||
const PROXY_YANDEX_V = false; // yandex videos
|
const PROXY_YANDEX_V = false; // yandex videos
|
||||||
|
const PROXY_SAFEBOORU = false;
|
||||||
|
const PROXY_KONACHAN = false;
|
||||||
|
const PROXY_YANDERE = false;
|
||||||
|
const PROXY_TBIB = false;
|
||||||
|
const PROXY_GELBOORU = false;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Scraper-specific parameters
|
// Scraper-specific parameters
|
||||||
@@ -185,4 +196,7 @@ class config{
|
|||||||
// Use "null" to default out to HTML scraping OR specify a string to
|
// Use "null" to default out to HTML scraping OR specify a string to
|
||||||
// use the API (Eg: "public"). API has less filters.
|
// use the API (Eg: "public"). API has less filters.
|
||||||
const MARGINALIA_API_KEY = null;
|
const MARGINALIA_API_KEY = null;
|
||||||
|
|
||||||
|
// Yep
|
||||||
|
const YEP_USE_API = false;
|
||||||
}
|
}
|
||||||
|
|||||||
170
scraper/yep.php
170
scraper/yep.php
@@ -216,7 +216,7 @@ class yep{
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
@@ -231,21 +231,37 @@ class yep{
|
|||||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
if($use_api){
|
||||||
"Accept: */*",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
$post_data = json_encode($post_data);
|
||||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
|
||||||
"Referer: https://yep.com/",
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
"Origin: https://yep.com",
|
["Content-Type: application/json",
|
||||||
"DNT: 1",
|
"Authorization: Bearer $bearer",
|
||||||
"Connection: keep-alive",
|
"Content-Length: " . strlen($post_data)]
|
||||||
"Sec-Fetch-Dest: empty",
|
);
|
||||||
"Sec-Fetch-Mode: cors",
|
|
||||||
"Sec-Fetch-Site: same-site",
|
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||||
"Priority: u=4",
|
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
|
||||||
"TE: trailers"]
|
}else{
|
||||||
);
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
|
["User-Agent: " . config::USER_AGENT,
|
||||||
|
"Accept: */*",
|
||||||
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||||
|
"Referer: https://yep.com/",
|
||||||
|
"Origin: https://yep.com",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Sec-Fetch-Dest: empty",
|
||||||
|
"Sec-Fetch-Mode: cors",
|
||||||
|
"Sec-Fetch-Site: same-site",
|
||||||
|
"Priority: u=4",
|
||||||
|
"TE: trailers"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
@@ -270,6 +286,11 @@ class yep{
|
|||||||
|
|
||||||
public function web($get){
|
public function web($get){
|
||||||
|
|
||||||
|
if(config::YEP_USE_API){
|
||||||
|
|
||||||
|
return $this->web_api($get);
|
||||||
|
}
|
||||||
|
|
||||||
$search = $get["s"];
|
$search = $get["s"];
|
||||||
if(strlen($search) === 0){
|
if(strlen($search) === 0){
|
||||||
|
|
||||||
@@ -392,6 +413,123 @@ class yep{
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private function web_api($get){
|
||||||
|
|
||||||
|
$search = $get["s"];
|
||||||
|
if(strlen($search) === 0){
|
||||||
|
|
||||||
|
throw new Exception("Search term is empty!");
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [
|
||||||
|
"status" => "ok",
|
||||||
|
"spelling" => [
|
||||||
|
"type" => "no_correction",
|
||||||
|
"using" => null,
|
||||||
|
"correction" => null
|
||||||
|
],
|
||||||
|
"npt" => null,
|
||||||
|
"answer" => [],
|
||||||
|
"web" => [],
|
||||||
|
"image" => [],
|
||||||
|
"video" => [],
|
||||||
|
"news" => [],
|
||||||
|
"related" => []
|
||||||
|
];
|
||||||
|
|
||||||
|
// parse filters
|
||||||
|
$filters = [
|
||||||
|
"query" => $search,
|
||||||
|
"limit" => 100
|
||||||
|
];
|
||||||
|
|
||||||
|
if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
|
||||||
|
if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
|
||||||
|
|
||||||
|
// add api key
|
||||||
|
$key_data = $this->backend->get_key();
|
||||||
|
|
||||||
|
try{
|
||||||
|
|
||||||
|
$json =
|
||||||
|
$this->get(
|
||||||
|
$this->backend->get_ip($key_data["increment"]),
|
||||||
|
"https://platform.yep.com/api/search",
|
||||||
|
[],
|
||||||
|
true,
|
||||||
|
$filters,
|
||||||
|
$key_data["key"]
|
||||||
|
);
|
||||||
|
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to fetch JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
// should never happen
|
||||||
|
//$this->detect_cf($json);
|
||||||
|
|
||||||
|
$json = json_decode($json, true);
|
||||||
|
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||||
|
|
||||||
|
if($json === null){
|
||||||
|
|
||||||
|
throw new Exception("Failed to decode JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($json["error"])){
|
||||||
|
|
||||||
|
throw new Exception("Yep API returned an error: " . $json["error"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($json["errors"])){
|
||||||
|
|
||||||
|
throw new Exception("Yep API returned the following errors: {$json["message"]}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
isset($json["success"]) &&
|
||||||
|
$json["success"] !== true
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Yep API returned a false-y success value");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!isset($json["results"])){
|
||||||
|
|
||||||
|
throw new Exception("Yep API did not return a results object");
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($json["results"] as $item){
|
||||||
|
|
||||||
|
if(
|
||||||
|
$item["url"] === null ||
|
||||||
|
$item["url"] == ""
|
||||||
|
){
|
||||||
|
|
||||||
|
// sometimes API fucks up
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$out["web"][] = [
|
||||||
|
"title" => $item["title"],
|
||||||
|
"description" => $item["description"],
|
||||||
|
"url" => $item["url"],
|
||||||
|
"date" => null,
|
||||||
|
"type" => "web",
|
||||||
|
"thumb" => [
|
||||||
|
"url" => null,
|
||||||
|
"ratio" => null
|
||||||
|
],
|
||||||
|
"sublink" => [],
|
||||||
|
"table" => []
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private function detect_cf($payload){
|
private function detect_cf($payload){
|
||||||
|
|
||||||
// detect cloudflare page
|
// detect cloudflare page
|
||||||
|
|||||||
Reference in New Issue
Block a user