added google api image scraper

This commit is contained in:
2026-04-25 22:58:28 -04:00
parent 4349bf232d
commit e63a17d6db
3 changed files with 295 additions and 96 deletions

View File

@@ -2,6 +2,52 @@
class frontend{ class frontend{
public function validateurl($url, $net_validate = false){
$url_parts = parse_url($url);
// check if required parts are there
if(
!isset($url_parts["scheme"]) ||
!(
$url_parts["scheme"] == "http" ||
$url_parts["scheme"] == "https"
) ||
!isset($url_parts["host"])
){
return false;
}
if($net_validate){
$ip =
str_replace(
["[", "]"], // handle ipv6
"",
$url_parts["host"]
);
// if its not an IP
if(!filter_var($ip, FILTER_VALIDATE_IP)){
// resolve domain's IP
$ip = gethostbyname($url_parts["host"] . ".");
}
// check if its localhost
if(
filter_var(
$ip,
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
) === false
){
return false;
}
}
return true;
}
public function load($template, $replacements = []){ public function load($template, $replacements = []){
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME); $replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
@@ -622,6 +668,7 @@ class frontend{
"yandex" => "Yandex", "yandex" => "Yandex",
"brave" => "Brave", "brave" => "Brave",
"google" => "Google", "google" => "Google",
"google_api" => "Google API",
"google_cse" => "Google CSE", "google_cse" => "Google CSE",
"yahoo_japan" => "Yahoo! JAPAN", "yahoo_japan" => "Yahoo! JAPAN",
"startpage" => "Startpage", "startpage" => "Startpage",
@@ -638,8 +685,7 @@ class frontend{
"fivehpx" => "500px", "fivehpx" => "500px",
"vsco" => "VSCO", "vsco" => "VSCO",
"imgur" => "Imgur", "imgur" => "Imgur",
"ftm" => "FindThatMeme", "ftm" => "FindThatMeme"
//"sankakucomplex" => "SankakuComplex"
] ]
]; ];
break; break;
@@ -695,6 +741,22 @@ class frontend{
] ]
]; ];
break; break;
case "booru":
$filters["scraper"] = [
"display" => "Scraper",
"option" => [
"safebooru" => "Safebooru",
"konachan" => "Konachan",
"tbib" => "The Big Imageboard",
"gelbooru" => "Gelbooru",
"yandere" => "Yande.re",
"tbib" => "The Big Imageboard",
"sankakucomplex" => "SankakuComplex",
"soybooru" => "SoyBooru"
]
];
break;
} }
// get scraper name from user input, or default out to preferred scraper // get scraper name from user input, or default out to preferred scraper
@@ -871,6 +933,7 @@ class frontend{
$html = null; $html = null;
//foreach(["web", "images", "videos", "news", "music", "booru"] as $type){
foreach(["web", "images", "videos", "news", "music"] as $type){ foreach(["web", "images", "videos", "news", "music"] as $type){
$html .= '<a href="/' . $type . '?s=' . urlencode($query); $html .= '<a href="/' . $type . '?s=' . urlencode($query);

View File

@@ -264,6 +264,25 @@ class google_api{
"yes" => "Yes", // safe=active "yes" => "Yes", // safe=active
"no" => "No" // safe=off "no" => "No" // safe=off
] ]
],
"sort" => [ // sort
"display" => "Sort by",
"option" => [
"any" => "Any order",
"date:d" => "Oldest",
"date:a" => "Newest"
]
],
"newer" => [
"display" => "Newer than",
"option" => "_DATE"
],
"rm_dupes" => [ // filter
"display" => "Remove duplicates",
"option" => [
"yes" => "Yes", // 1
"no" => "No" // 0
]
] ]
]; ];
@@ -313,109 +332,29 @@ class google_api{
"zh-CN" => "Chinese (Simplified)", "zh-CN" => "Chinese (Simplified)",
"zh-TW" => "Chinese (Traditional)" "zh-TW" => "Chinese (Traditional)"
] ]
],
"sort" => [
"display" => "Sort by",
"option" => [
"any" => "Any order",
"date:d" => "Oldest",
"date:a" => "Newest"
]
],
"newer" => [
"display" => "Newer than",
"option" => "_DATE"
],
"rm_dupes" => [
"display" => "Remove duplicates",
"option" => [
"yes" => "Yes",
"no" => "No"
]
] ]
] ]
); );
break; break;
/*
case "images": case "images":
return array_merge( return array_merge(
$base, $base,
[ [
"time" => [ // tbs=qdr:<time> "size" => [ // imgSize
"display" => "Time posted",
"option" => [
"any" => "Any time",
"d" => "Past 24 hours",
"w" => "Past week",
"m" => "Past month",
"y" => "Past year"
]
],
"size" => [ // imgsz
"display" => "Size", "display" => "Size",
"option" => [ "option" => [
"any" => "Any size", "any" => "Any size",
"l" => "Large", "icon" => "Icon",
"m" => "Medium", "small" => "Small",
"i" => "Icon", "medium" => "Medium",
"qsvga" => "Larger than 400x300", "large" => "Large",
"vga" => "Larger than 640x480", "xlarge" => "X-Large",
"svga" => "Larger than 800x600", "xxlarge" => "XX-Large",
"xga" => "Larger than 1024x768", "huge" => "Huge"
"2mp" => "Larger than 2MP",
"4mp" => "Larger than 4MP",
"6mp" => "Larger than 6MP",
"8mp" => "Larger than 8MP",
"10mp" => "Larger than 10MP",
"12mp" => "Larger than 12MP",
"15mp" => "Larger than 15MP",
"20mp" => "Larger than 20MP",
"40mp" => "Larger than 40MP",
"70mp" => "Larger than 70MP"
] ]
], ],
"ratio" => [ // imgar "format" => [ // fileType
"display" => "Aspect ratio",
"option" => [
"any" => "Any ratio",
"t|xt" => "Tall",
"s" => "Square",
"w" => "Wide",
"xw" => "Panoramic"
]
],
"color" => [ // imgc
"display" => "Color",
"option" => [
"any" => "Any color",
"color" => "Full color",
"bnw" => "Black & white",
"trans" => "Transparent",
// from here, imgcolor
"red" => "Red",
"orange" => "Orange",
"yellow" => "Yellow",
"green" => "Green",
"teal" => "Teal",
"blue" => "Blue",
"purple" => "Purple",
"pink" => "Pink",
"white" => "White",
"gray" => "Gray",
"black" => "Black",
"brown" => "Brown"
]
],
"type" => [ // tbs=itp:<type>
"display" => "Type",
"option" => [
"any" => "Any type",
"clipart" => "Clip Art",
"lineart" => "Line Drawing",
"animated" => "Animated"
]
],
"format" => [ // as_filetype
"display" => "Format", "display" => "Format",
"option" => [ "option" => [
"any" => "Any format", "any" => "Any format",
@@ -429,17 +368,55 @@ class google_api{
"craw" => "RAW" "craw" => "RAW"
] ]
], ],
"rights" => [ // tbs=sur:<rights> "color" => [
"display" => "Color",
"option" => [
"any" => "Any color",
"color" => "Full color", // imgColorType
"mono" => "Black & White",
"trans" => "Transparent background",
"red" => "Red", // imgDominantColor
"orange" => "Orange",
"yellow" => "Yellow",
"green" => "Green",
"teal" => "Teal",
"blue" => "Blue",
"purple" => "Purple",
"pink" => "Pink",
"white" => "White",
"gray" => "Gray",
"black" => "Black",
"brown" => "Brown"
]
],
"type" => [ // imgType
"display" => "Type",
"option" => [
"any" => "Any type",
"clipart" => "Clip Art",
"face" => "Faces",
"lineart" => "Line Drawing",
"stock" => "Stock photos",
"photo" => "Photos",
"animated" => "Animated",
]
],
"rights" => [ // rights
"display" => "Usage rights", "display" => "Usage rights",
"option" => [ "option" => [
"any" => "Any license", "any" => "Any license",
"cl" => "Creative Commons licenses", "cc_publicdomain" => "Public domain",
"ol" => "Commercial & other licenses" "cc_attribute" => "Attribution required",
"cc_sharealike" => "Sharealike",
"cc_noncommercial" => "Non-commercial use only",
"cc_nonderived" => "Original works"
] ]
] ]
] ]
); );
break;*/ break;
} }
} }
@@ -485,6 +462,7 @@ class google_api{
return $data; return $data;
} }
public function web($get){ public function web($get){
// rotate proxy + key on EVERY request // rotate proxy + key on EVERY request
@@ -731,6 +709,160 @@ class google_api{
return $out; return $out;
} }
public function image($get){
// rotate proxy + key on EVERY request
$keydata = $this->backend->get_key();
$proxy = $this->backend->get_ip($keydata["increment"]);
if($get["npt"]){
// $p is never used
[$params, $p] = $this->backend->get(
$get["npt"],
"web"
);
$params = json_decode($params, true);
$params["key"] = $keydata["key"];
}else{
//$json = file_get_contents("scraper/google.json");
$params = [
"q" => $get["s"],
"cx" => config::GOOGLE_CX_ENDPOINT,
"num" => 10,
"start" => 1,
"searchType" => "image",
"key" => $keydata["key"]
];
//
// parse filters
//
if($get["newer"] !== false){
$params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000));
}
if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; }
if($get["country"] != "any"){ $params["gl"] = $get["country"]; }
if($get["nsfw"] == "yes"){
$params["safe"] = "off";
}else{
$params["safe"] = "active";
}
if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; }
// image filters
if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; }
if($get["format"] != "any"){ $params["fileType"] = $get["format"]; }
switch($get["color"]){
case "any":
break;
case "color":
case "mono":
case "trans":
$params["imgColorType"] = $get["color"];
break;
default:
$params["imgDominantColor"] = $get["color"];
break;
}
if($get["type"] != "any"){ $params["imgType"] = $get["type"]; }
if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; }
}
try{
$json =
$this->get(
$proxy,
"https://www.googleapis.com/customsearch/v1",
$params
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if(isset($json["error"]["message"])){
throw new Exception(
"API returned an error: " .
$json["error"]["message"] .
" (key #" . $keydata["increment"] . ")"
);
}
if(!isset($json["items"])){
// google just doesnt return items when theres no results
return $out;
}
foreach($json["items"] as $image){
$out["image"][] = [
"title" => $this->titledots($image["title"]),
"source" => [
[
"url" => $image["link"],
"width" => (int)$image["image"]["width"],
"height" => (int)$image["image"]["height"]
],
[
"url" => $image["image"]["thumbnailLink"],
"width" => (int)$image["image"]["thumbnailWidth"],
"height" => (int)$image["image"]["thumbnailHeight"]
]
],
"url" => $image["image"]["contextLink"]
];
}
// get npt
if(isset($json["queries"]["nextPage"][0]["startIndex"])){
unset($params["key"]);
$params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"];
$out["npt"] =
$this->backend->store(
json_encode($params),
"web",
$proxy
);
}
return $out;
}
private function titledots($title){ private function titledots($title){
return trim($title, " .\t\n\r\0\x0B"); return trim($title, " .\t\n\r\0\x0B");

View File

@@ -223,6 +223,10 @@ $settings = [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
], ],
[
"value" => "google_api",
"text" => "Google API"
],
[ [
"value" => "google_cse", "value" => "google_cse",
"text" => "Google CSE" "text" => "Google CSE"