captcha and imgur, findthatmeme, yep imagesearch

This commit is contained in:
lolcat 2023-10-16 02:30:43 -04:00
parent fa9dc4d6ef
commit 3aa0180774
26 changed files with 1710 additions and 63 deletions

View File

@ -23,6 +23,9 @@ https://4get.ca
- Yandex
- Google
- Brave
- Yep
- Imgur
- FindThatMeme
3. Videos
- YouTube
@ -39,6 +42,17 @@ https://4get.ca
5. Music
- SoundCloud
6. Autocompleter
- Brave
- DuckDuckGo
- Yandex
- Google
- Qwant
- Yep
- Marginalia
- YouTube
- SoundCloud
More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list.
# Setup
@ -166,7 +180,11 @@ certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com
After doing that certbot should deploy the certificate automatically into your 4get nginx config file. It should be ready to use at that point.
Ok bye!!!
## Captcha
Right now the setup for this shit is absolutely awful.
Edit line 190 in `lib/captcha_gen.php` and specify your image sets. You can't disable the captcha right now lol. Just use a previous commit if you want to do that. Call me a shitcoder all you want I've had no energy lately. Images must be stored in `data/captcha`. Create a folder for each category. All files in there should be named from `1.png` to `321839.png`, for example.
## Tor Setup

0
admin.php Normal file
View File

18
api.txt
View File

@ -30,12 +30,25 @@
under your own terms. Please respect the terms of use listed here so
that this website may be available to all in the far future.
P.s fuck whoever botted my site for months on end, choke on my dick
lol!!!!
Get your instance running here ::
https://git.lolcat.ca/lolcat/4get
Thanks!
+ Passes
Depending of the instance, you may need to provide a "pass" token
in the cookies of your request. These can be obtained from solving
a captcha which will allow you to make 100 requests in the next 24
hours. In the future, you will be able to ask the serber maintainer
for a "pass" which will allow you to bypass the captcha requirement.
The captcha doesn't need javascript to work.
+ Decode the data
All payloads returned by the API are encoded in the JSON format. If
you don't know how to tackle the problem, maybe programming is not
@ -47,8 +60,11 @@
+ Check if an API call was successful
All API responses come with an array index named "status". If the
status is something else than the string "ok", something went wrong.
You can supply the content of the "status" string back to your
application to inform the user of what went wrong.
The HTTP code will always be 200 as to not cause issues with CORS.
The HTTP code will be 429 if your pass is invalid. It is set to 200
otherwise.
+ Get the next page of results

View File

@ -17,7 +17,7 @@ class autocomplete{
"yep" => "https://api.yep.com/ac/?query={searchTerms}",
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en"
"sc" => "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=ArYppSEotE3YiXCO4Nsgid2LLqJutiww&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en"
];
/*
@ -100,7 +100,7 @@ class autocomplete{
foreach($js[1] as $item){
$json[] = strip_tags($item[0]);
$json[] = htmlspecialchars_decode(strip_tags($item[0]));
}
echo json_encode(

View File

@ -7,6 +7,12 @@ chdir("../../");
include "lib/frontend.php";
$frontend = new frontend();
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, false);
[$scraper, $filters] = $frontend->getscraperfilters(
"images",
isset($_GET["scraper"]) ? $_GET["scraper"] : null

View File

@ -7,6 +7,13 @@ chdir("../../");
include "lib/frontend.php";
$frontend = new frontend();
/*
Captcha
*/
$null = null;
include "lib/captcha_gen.php";
new captcha($null, $null, $null, $null, false);
[$scraper, $filters] = $frontend->getscraperfilters(
"music",
isset($_GET["scraper"]) ? $_GET["scraper"] : null

View File

@ -7,6 +7,12 @@ chdir("../../");
include "lib/frontend.php";
$frontend = new frontend();
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, false);
[$scraper, $filters] = $frontend->getscraperfilters(
"news",
isset($_GET["scraper"]) ? $_GET["scraper"] : null

View File

@ -7,6 +7,12 @@ chdir("../../");
include "lib/frontend.php";
$frontend = new frontend();
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, false);
[$scraper, $filters] = $frontend->getscraperfilters(
"videos",
isset($_GET["scraper"]) ? $_GET["scraper"] : null

View File

@ -7,6 +7,12 @@ chdir("../../");
include "lib/frontend.php";
$frontend = new frontend();
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, false);
[$scraper, $filters] = $frontend->getscraperfilters(
"web",
isset($_GET["scraper"]) ? $_GET["scraper"] : null

147
captcha.php Executable file
View File

@ -0,0 +1,147 @@
<?php
if(
!isset($_GET["k"]) ||
preg_match(
'/^c\.[0-9]+$/',
$_GET["k"]
)
){
header("Content-Type: text/plain");
echo "Fuck you";
die();
}
header("Content-Type: image/jpeg");
$grid = apcu_fetch($_GET["k"]);
if(
$grid === false ||
$grid[3] === true // has already been generated
){
http_response_code(304); // not modified
die();
}
header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
// only generate one captcha with this config
apcu_store(
$_GET["k"],
[
$grid[0],
$grid[1],
$grid[2],
true // has captcha been generated?
],
120 // we give user another 2 minutes to solve
);
// generate image
if(random_int(0,1) === 0){
$theme = [
"bg" => "#ebdbb2",
"fg" => "#1d2021"
];
}else{
$theme = [
"bg" => "#1d2021",
"fg" => "#ebdbb2"
];
}
$im = new Imagick();
$im->newImage(400, 400, $theme["bg"]);
$im->setImageBackgroundColor($theme["bg"]);
$im->setImageFormat("jpg");
$noise = [
imagick::NOISE_GAUSSIAN,
imagick::NOISE_LAPLACIAN
];
$distort = [
imagick::DISTORTION_AFFINE,
imagick::DISTORTION_SHEPARDS
];
$i = 0;
for($y=0; $y<4; $y++){
for($x=0; $x<4; $x++){
$tmp = new Imagick("./data/captcha/" . $grid[0][$i][0] . "/" . random_int(1, $grid[0][$i][1]) . ".png");
// convert transparency correctly
$tmp->setImageBackgroundColor("black");
$tmp->setImageAlphaChannel(Imagick::ALPHACHANNEL_REMOVE);
// distort $tmp
$tmp->distortImage(
$distort[random_int(0,1)],
[
0, 0,
random_int(-15, 15), random_int(-15, 15),
100, 0,
random_int(80, 120), random_int(-15, 15),
100, 100,
random_int(80, 120), random_int(80, 120),
0, 100,
random_int(-15, 15), random_int(80, 120)
],
false
);
// append image
$im->compositeImage($tmp->getImage(), Imagick::COMPOSITE_DEFAULT, $x * 100, $y * 100);
$i++;
}
}
// add noise
$im->addNoiseImage($noise[random_int(0, 1)]);
// expand top of image
$im->setImageGravity(Imagick::GRAVITY_SOUTH);
$im->chopImage(0, -27, 400, 400);
$im->extentImage(0, 0, 0, -27);
// add text
$draw = new ImagickDraw();
$draw->setFontSize(20);
$draw->setFillColor($theme["fg"]);
//$draw->setTextAntialias(false);
$draw->setFont("./data/captcha/font.ttf");
$text = "Pick " . $grid[1] . " images of " . str_replace("_", " ", $grid[2]);
$pos = 200 - ($im->queryFontMetrics($draw, $text)["textWidth"] / 2);
for($i=0; $i<strlen($text); $i++){
$im->annotateImage(
$draw,
$pos,
20,
random_int(-15, 15),
$text[$i]
);
$pos += $im->queryFontMetrics($draw, $text[$i])["textWidth"];
}
$im->setFormat("jpeg");
$im->setImageCompressionQuality(90);
$im->setImageCompression(Imagick::COMPRESSION_JPEG2000);
echo $im->getImageBlob();

BIN
data/captcha/font.ttf Normal file

Binary file not shown.

View File

@ -10,11 +10,11 @@ $frontend = new frontend();
$get = $frontend->parsegetfilters($_GET, $filters);
$frontend->loadheader(
$get,
$filters,
"images"
);
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, $get, $filters, "images", true);
$payload = [
"images" => "",

325
lib/captcha_gen.php Normal file
View File

@ -0,0 +1,325 @@
<?php
class captcha{
public function __construct($frontend, $get, $filters, $page, $output){
/*
Validate cookie, if it exists
*/
if(isset($_COOKIE["pass"])){
if(
// check if key is not malformed
preg_match(
'/^c[0-9]+\.[A-Za-z0-9]{20}$/',
$_COOKIE["pass"]
) &&
// does key exist
apcu_exists($_COOKIE["pass"])
){
// exists, increment counter
$inc = apcu_inc($_COOKIE["pass"]);
// we start counting from 1
// when it has been incremented to 102, it has reached
// 100 reqs
if($inc >= 102){
// reached limit, delete and give captcha
apcu_delete($_COOKIE["pass"]);
}else{
// the cookie is OK! dont die() and give results
if($output === true){
$frontend->loadheader(
$get,
$filters,
$page
);
}
return;
}
}
}
if($output === false){
echo json_encode([
"status" => "The \"pass\" token in your cookies is missing or has expired!!"
]);
die();
}
/*
Validate form data
*/
$lines =
explode(
"\r\n",
file_get_contents("php://input")
);
$invalid = false;
$answers = [];
$key = false;
$error = "";
foreach($lines as $line){
$line = explode("=", $line, 2);
if(count($line) !== 2){
$invalid = true;
break;
}
preg_match(
'/^c\[([0-9]+)\]$/',
$line[0],
$regex
);
if(
$line[1] != "on" ||
!isset($regex[0][1])
){
// check if its k
if(
$line[0] == "k" &&
strpos($line[1], "c.") === 0
){
$key = apcu_fetch($line[1]);
apcu_delete($line[1]);
}
break;
}
$regex = (int)$regex[1];
if(
$regex >= 16 ||
$regex <= -1
){
$invalid = true;
break;
}
$answers[] = $regex;
}
if(
!$invalid &&
$key !== false
){
$check = $key[1];
// validate answer
for($i=0; $i<count($key[0]); $i++){
if(!in_array($i, $answers)){
continue;
}
if($key[0][$i][0] == $key[2]){
$check--;
}else{
// got a wrong answer
$check = -1;
break;
}
}
if($check === 0){
// we passed the captcha
// set cookie
$inc = apcu_inc("cookie");
$chars =
array_merge(
range("A", "Z"),
range("a", "z"),
range(0, 9)
);
$c = count($chars) - 1;
$key = "c" . $inc . ".";
for($i=0; $i<20; $i++){
$key .= $chars[random_int(0, $c)];
}
apcu_inc($key, 1, $stupid, 86400);
setcookie(
"pass",
$key,
[
"expires" => time() + 86400, // expires in 24 hours
"samesite" => "Strict",
"path" => "/"
]
);
$frontend->loadheader(
$get,
$filters,
$page
);
return;
}else{
$error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>";
}
}
/*
Generate random grid data to pass to captcha.php
*/
$dataset = [
["birds", 2263],
["fumo_plushies", 1006],
["minecraft", 848]
];
// get the positions for the answers
// will return between 3 and 6 answer positions
$range = range(0, 15);
$answer_pos = [];
array_splice($range, 0, 1);
for($i=0; $i<random_int(3, 6); $i++){
$answer_pos_tmp =
array_splice(
$range,
random_int(
0,
14 - $i
),
1
);
$answer_pos[] = $answer_pos_tmp[0];
}
// choose a dataset
$choosen = &$dataset[random_int(0, count($dataset) - 1)];
$choices = [];
for($i=0; $i<count($dataset); $i++){
if($dataset[$i][0] == $choosen[0]){
continue;
}
$choices[] = $dataset[$i];
}
// generate grid data
$grid = [];
for($i=0; $i<16; $i++){
if(in_array($i, $answer_pos)){
$grid[] = $choosen;
}else{
$grid[] = $choices[random_int(0, count($choices) - 1)];
}
}
$key = "c." . apcu_inc("captcha_gen", 1) . "." . random_int(0, 100000000);
apcu_store(
$key,
[
$grid,
count($answer_pos),
$choosen[0],
false // has captcha been generated?
],
120 // we give user 2 minutes to get captcha, in case of network error
);
$payload = [
"class" => "",
"right-left" => "",
"right-right" => "",
"left" =>
'<div class="infobox">' .
'<h1>IQ test</h1>' .
'Due to getting hit with 20,000 bot requests per day, I had to put this up. Sorry.<br><br>' .
'Solving this captcha will allow you to make 100 searches today. I will add a way for legit users to bypass the captcha later. Sorry /g/tards!!' .
$error .
'<form method="POST" enctype="text/plain" autocomplete="off">' .
'<div class="captcha-wrapper">' .
'<div class="captcha">' .
'<img src="captcha.php?k=' . $key . '" alt="Captcha image">' .
'<div class="captcha-controls">' .
'<input type="checkbox" name="c[0]" id="c0">' .
'<label for="c0"></label>' .
'<input type="checkbox" name="c[1]" id="c1">' .
'<label for="c1"></label>' .
'<input type="checkbox" name="c[2]" id="c2">' .
'<label for="c2"></label>' .
'<input type="checkbox" name="c[3]" id="c3">' .
'<label for="c3"></label>' .
'<input type="checkbox" name="c[4]" id="c4">' .
'<label for="c4"></label>' .
'<input type="checkbox" name="c[5]" id="c5">' .
'<label for="c5"></label>' .
'<input type="checkbox" name="c[6]" id="c6">' .
'<label for="c6"></label>' .
'<input type="checkbox" name="c[7]" id="c7">' .
'<label for="c7"></label>' .
'<input type="checkbox" name="c[8]" id="c8">' .
'<label for="c8"></label>' .
'<input type="checkbox" name="c[9]" id="c9">' .
'<label for="c9"></label>' .
'<input type="checkbox" name="c[10]" id="c10">' .
'<label for="c10"></label>' .
'<input type="checkbox" name="c[11]" id="c11">' .
'<label for="c11"></label>' .
'<input type="checkbox" name="c[12]" id="c12">' .
'<label for="c12"></label>' .
'<input type="checkbox" name="c[13]" id="c13">' .
'<label for="c13"></label>' .
'<input type="checkbox" name="c[14]" id="c14">' .
'<label for="c14"></label>' .
'<input type="checkbox" name="c[15]" id="c15">' .
'<label for="c15"></label>' .
'</div>' .
'</div>' .
'</div>' .
'<input type="hidden" name="k" value="' . $key . '">' .
'<input type="submit" value="Check IQ" class="captcha-submit">' .
'</form>' .
'</div>'
];
http_response_code(429); // too many reqs
$frontend->loadheader(
$get,
$filters,
"web"
);
echo $frontend->load("search.html", $payload);
die();
}
}

View File

@ -127,6 +127,11 @@ class proxy{
throw new Exception("Too many redirects");
}
if($url == "https://i.imgur.com/removed.png"){
throw new Exception("Encountered imgur 404");
}
// sanitize URL
if($this->validateurl($url) === false){

View File

@ -901,7 +901,11 @@ class frontend{
"ddg" => "DuckDuckGo",
"yandex" => "Yandex",
"brave" => "Brave",
"google" => "Google"
"google" => "Google",
"yep" => "Yep",
//"pinterest" => "Pinterest",
"imgur" => "Imgur",
"ftm" => "FindThatMeme"
]
];
break;
@ -1011,10 +1015,30 @@ class frontend{
$lib = new wiby();
break;
case "yep":
include "scraper/yep.php";
$lib = new yep();
break;
case "sc":
include "scraper/sc.php";
$lib = new sc();
break;
case "pinterest":
include "scraper/pinterest.php";
$lib = new pinterest();
break;
case "imgur":
include "scraper/imgur.php";
$lib = new imgur();
break;
case "ftm":
include "scraper/ftm.php";
$lib = new ftm();
break;
}
// set scraper on $_GET

View File

@ -26,7 +26,7 @@ class nextpage{
apcu_store(
$page . "." .
$this->scraper .
(string)($key),
(string)$key,
gzdeflate($salt.$iv.$out.$tag),
900 // cache information for 15 minutes blaze it
);

View File

@ -10,11 +10,11 @@ $frontend = new frontend();
$get = $frontend->parsegetfilters($_GET, $filters);
$frontend->loadheader(
$get,
$filters,
"music"
);
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, $get, $filters, "music", true);
$payload = [
"class" => "",

View File

@ -10,11 +10,11 @@ $frontend = new frontend();
$get = $frontend->parsegetfilters($_GET, $filters);
$frontend->loadheader(
$get,
$filters,
"news"
);
/*
Captcha
*/
include "lib/captcha_gen.php";
new captcha($frontend, $get, $filters, "news", true);
$payload = [
"class" => "",

148
scraper/ftm.php Normal file
View File

@ -0,0 +1,148 @@
<?php
class ftm{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("ftm");
}
public function getfilters($page){
return [];
}
private function get($url, $search, $offset){
$curlproc = curl_init();
curl_setopt($curlproc, CURLOPT_URL, $url);
$payload =
json_encode(
[
"search" => $search,
"offset" => $offset
]
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Content-Length: " . strlen($payload),
"Content-Type: application/json",
"DNT: 1",
"Connection: keep-alive",
"Origin: https://findthatmeme.com",
"Referer: https://findthatmeme.com/?search=" . urlencode($search),
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1",
"X-Auth-Key: undefined",
"X-CSRF-Validation-Header: true"]
);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $payload);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$search = $get["s"];
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if($get["npt"]){
$count = (int)$this->nextpage->get($get["npt"], "images");
}else{
$count = 0;
}
try{
$json =
json_decode(
$this->get(
"https://findthatmeme.com/api/v1/search",
$search,
$count
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
foreach($json as $item){
$count++;
if($item["type"] == "VIDEO"){
$thumb = "thumb/" . $item["thumbnail"];
}else{
$thumb = $item["image_path"];
}
$out["image"][] = [
"title" => date("jS \of F Y @ g:ia", strtotime($item["created_at"])),
"source" => [
[
"url" =>
"https://findthatmeme.us-southeast-1.linodeobjects.com/" .
$thumb,
"width" => null,
"height" => null
]
],
"url" => $item["source_page_url"]
];
}
if($count === 50){
$out["npt"] =
$this->nextpage->store(
$count,
"images"
);
}
return $out;
}
}

249
scraper/imgur.php Normal file
View File

@ -0,0 +1,249 @@
<?php
class imgur{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("imgur");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"sort" => [ // /score/
"display" => "Sort by",
"option" => [
"score" => "Highest scoring",
"relevance" => "Most relevant",
"time" => "Newest first"
]
],
"time" => [ // /score/day/
"display" => "Time posted",
"option" => [
"all" => "All time",
"day" => "Today",
"week" => "This week",
"month" => "This month",
"year" => "This year"
]
],
"format" => [ // q_type
"display" => "Format",
"option" => [
"any" => "Any format",
"jpg" => "JPG",
"png" => "PNG",
"gif" => "GIF",
"anigif" => "Animated GIF",
"album" => "Albums"
]
],
"size" => [ // q_size_px
"display" => "Size",
"option" => [
"any" => "Any size",
"small" => "Small (500px or less)",
"med" => "Medium (500px to 2000px)",
"big" => "Big (2000px to 5000px)",
"lrg" => "Large (5000px to 10000px)",
"huge" => "Huge (10000px and above)"
]
]
];
}
private function get($url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?scrolled&" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Referer: https://imgur.com/search/",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"TE: trailers",
"X-Requested-With: XMLHttpRequest"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
$filter =
json_decode(
$this->nextpage->get(
$get["npt"],
"images"
),
true
);
$search = $filter["s"];
unset($filter["s"]);
$sort = $filter["sort"];
unset($filter["sort"]);
$time = $filter["time"];
unset($filter["time"]);
$format = $filter["format"];
unset($filter["format"]);
$size = $filter["size"];
unset($filter["size"]);
$page = $filter["page"];
unset($filter["page"]);
}else{
$search = $get["s"];
$sort = $get["sort"];
$time = $get["time"];
$format = $get["format"];
$size = $get["size"];
$page = 0;
$filter = [
"q" => $search
];
if($format != "any"){
$filter["q_type"] = $format;
}
if($size != "any"){
$filter["q_size_px"] = $size;
$filter["q_size_is_mpx"] = "off";
}
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
try{
$html =
$this->get(
"https://imgur.com/search/$sort/$time/page/$page",
$filter
);
}catch(Exception $error){
throw new Exception("Failed to fetch HTML");
}
$this->fuckhtml->load($html);
$posts =
$this->fuckhtml
->getElementsByClassName(
"post",
"div"
);
foreach($posts as $post){
$this->fuckhtml->load($post);
$image =
$this->fuckhtml
->getElementsByTagName("img")[0];
$image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
$out["image"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$image["attributes"]["alt"]
),
"source" => [
[
"url" => $image_url . ".jpg",
"width" => null,
"height" => null
],
[
"url" => $image_url . "m.jpg",
"width" => null,
"height" => null
]
],
"url" =>
"https://imgur.com" .
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"image-list-link",
"a"
)
[0]
["attributes"]
["href"]
)
];
}
if(isset($out["image"][0])){
// store nextpage
$filter["s"] = $search;
$filter["sort"] = $sort;
$filter["time"] = $time;
$filter["format"] = $format;
$filter["size"] = $size;
$filter["page"] = $page + 1;
$out["npt"] =
$this->nextpage->store(
json_encode($filter),
"images"
);
}
return $out;
}
}

224
scraper/pinterest.php Normal file
View File

@ -0,0 +1,224 @@
<?php
class pinterest{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("pinterest");
}
public function getfilters($page){
return [];
}
private function get($url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$search = $get["s"];
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
$filter = [
"source_url" => "/search/pins/?q=" . urlencode($search),
"rs" => "typed",
"data" =>
json_encode(
[
"options" => [
"article" => null,
"applied_filters" => null,
"appliedProductFilters" => "---",
"auto_correction_disabled" => false,
"corpus" => null,
"customized_rerank_type" => null,
"filters" => null,
"query" => $search,
"query_pin_sigs" => null,
"redux_normalize_feed" => true,
"rs" => "typed",
"scope" => "pins", // pins, boards, videos,
"source_id" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
try{
$json =
json_decode(
$this->get(
"https://www.pinterest.ca/resource/BaseSearchResource/get/",
$filter
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
//print_r($json);
foreach(
$json
["resource_response"]
["data"]
["results"]
as $item
){
switch($item["type"]){
case "pin":
/*
Handle image object
*/
$images = array_values($item["images"]);
$image = &$images[count($images) - 1]; // original
$thumb = &$images[1]; // 236x
$title = [];
if(
isset($item["grid_title"]) &&
trim($item["grid_title"]) != ""
){
$title[] = $item["grid_title"];
}
if(
isset($item["description"]) &&
trim($item["description"]) != ""
){
$title[] = $item["description"];
}
$title = implode(": ", $title);
if(
$title == "" &&
isset($item["board"]["name"]) &&
trim($item["board"]["name"]) != ""
){
$title = $item["board"]["name"];
}
if($title == ""){
$title = null;
}
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => $image["url"],
"width" => (int)$image["width"],
"height" => (int)$image["height"]
],
[
"url" => $thumb["url"],
"width" => (int)$thumb["width"],
"height" => (int)$thumb["height"]
]
],
"url" => "https://www.pinterest.com/pin/" . $item["id"]
];
break;
case "board":
if(isset($item["cover_pin"]["image_url"])){
$image = [
"url" => $item["cover_pin"]["image_url"],
"width" => (int)$item["cover_pin"]["size"][0],
"height" => (int)$item["cover_pin"]["size"][1]
];
}elseif(isset($item["image_cover_url_hd"])){
/*
$image = [
"url" =>
"width" => null,
"height" => null
];*/
}
break;
}
}
return $out;
}
private function getfullresimage($image, $has_og){
$has_og = $has_og ? "1200x" : "originals";
return
preg_replace(
'/https:\/\/i\.pinimg\.com\/[^\/]+\//',
"https://i.pinimg.com/" . $has_og . "/",
$image
);
}
}

370
scraper/yep.php Normal file
View File

@ -0,0 +1,370 @@
<?php
class yep{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("yep");
}
public function getfilters($page){
return [
"country" => [
"display" => "Country",
"option" => [
"all" => "All regions",
"af" => "Afghanistan",
"al" => "Albania",
"dz" => "Algeria",
"as" => "American Samoa",
"ad" => "Andorra",
"ao" => "Angola",
"ai" => "Anguilla",
"ag" => "Antigua and Barbuda",
"ar" => "Argentina",
"am" => "Armenia",
"aw" => "Aruba",
"au" => "Australia",
"at" => "Austria",
"az" => "Azerbaijan",
"bs" => "Bahamas",
"bh" => "Bahrain",
"bd" => "Bangladesh",
"bb" => "Barbados",
"by" => "Belarus",
"be" => "Belgium",
"bz" => "Belize",
"bj" => "Benin",
"bt" => "Bhutan",
"bo" => "Bolivia",
"ba" => "Bosnia and Herzegovina",
"bw" => "Botswana",
"br" => "Brazil",
"bn" => "Brunei Darussalam",
"bg" => "Bulgaria",
"bf" => "Burkina Faso",
"bi" => "Burundi",
"cv" => "Cabo Verde",
"kh" => "Cambodia",
"cm" => "Cameroon",
"ca" => "Canada",
"ky" => "Cayman Islands",
"cf" => "Central African Republic",
"td" => "Chad",
"cl" => "Chile",
"cn" => "China",
"co" => "Colombia",
"cg" => "Congo",
"cd" => "Congo, Democratic Republic",
"ck" => "Cook Islands",
"cr" => "Costa Rica",
"hr" => "Croatia",
"cu" => "Cuba",
"cy" => "Cyprus",
"cz" => "Czechia",
"ci" => "Côte d'Ivoire",
"dk" => "Denmark",
"dj" => "Djibouti",
"dm" => "Dominica",
"do" => "Dominican Republic",
"ec" => "Ecuador",
"eg" => "Egypt",
"sv" => "El Salvador",
"gq" => "Equatorial Guinea",
"ee" => "Estonia",
"et" => "Ethiopia",
"fo" => "Faroe Islands",
"fj" => "Fiji",
"fi" => "Finland",
"fr" => "France",
"gf" => "French Guiana",
"pf" => "French Polynesia",
"ga" => "Gabon",
"gm" => "Gambia",
"ge" => "Georgia",
"de" => "Germany",
"gh" => "Ghana",