added google api image scraper
This commit is contained in:
@@ -2,6 +2,52 @@
|
|||||||
|
|
||||||
class frontend{
|
class frontend{
|
||||||
|
|
||||||
|
public function validateurl($url, $net_validate = false){
|
||||||
|
|
||||||
|
$url_parts = parse_url($url);
|
||||||
|
|
||||||
|
// check if required parts are there
|
||||||
|
if(
|
||||||
|
!isset($url_parts["scheme"]) ||
|
||||||
|
!(
|
||||||
|
$url_parts["scheme"] == "http" ||
|
||||||
|
$url_parts["scheme"] == "https"
|
||||||
|
) ||
|
||||||
|
!isset($url_parts["host"])
|
||||||
|
){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($net_validate){
|
||||||
|
$ip =
|
||||||
|
str_replace(
|
||||||
|
["[", "]"], // handle ipv6
|
||||||
|
"",
|
||||||
|
$url_parts["host"]
|
||||||
|
);
|
||||||
|
|
||||||
|
// if its not an IP
|
||||||
|
if(!filter_var($ip, FILTER_VALIDATE_IP)){
|
||||||
|
|
||||||
|
// resolve domain's IP
|
||||||
|
$ip = gethostbyname($url_parts["host"] . ".");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if its localhost
|
||||||
|
if(
|
||||||
|
filter_var(
|
||||||
|
$ip,
|
||||||
|
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
|
||||||
|
) === false
|
||||||
|
){
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public function load($template, $replacements = []){
|
public function load($template, $replacements = []){
|
||||||
|
|
||||||
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
|
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
|
||||||
@@ -622,6 +668,7 @@ class frontend{
|
|||||||
"yandex" => "Yandex",
|
"yandex" => "Yandex",
|
||||||
"brave" => "Brave",
|
"brave" => "Brave",
|
||||||
"google" => "Google",
|
"google" => "Google",
|
||||||
|
"google_api" => "Google API",
|
||||||
"google_cse" => "Google CSE",
|
"google_cse" => "Google CSE",
|
||||||
"yahoo_japan" => "Yahoo! JAPAN",
|
"yahoo_japan" => "Yahoo! JAPAN",
|
||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
@@ -638,8 +685,7 @@ class frontend{
|
|||||||
"fivehpx" => "500px",
|
"fivehpx" => "500px",
|
||||||
"vsco" => "VSCO",
|
"vsco" => "VSCO",
|
||||||
"imgur" => "Imgur",
|
"imgur" => "Imgur",
|
||||||
"ftm" => "FindThatMeme",
|
"ftm" => "FindThatMeme"
|
||||||
//"sankakucomplex" => "SankakuComplex"
|
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
@@ -695,6 +741,22 @@ class frontend{
|
|||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "booru":
|
||||||
|
$filters["scraper"] = [
|
||||||
|
"display" => "Scraper",
|
||||||
|
"option" => [
|
||||||
|
"safebooru" => "Safebooru",
|
||||||
|
"konachan" => "Konachan",
|
||||||
|
"tbib" => "The Big Imageboard",
|
||||||
|
"gelbooru" => "Gelbooru",
|
||||||
|
"yandere" => "Yande.re",
|
||||||
|
"tbib" => "The Big Imageboard",
|
||||||
|
"sankakucomplex" => "SankakuComplex",
|
||||||
|
"soybooru" => "SoyBooru"
|
||||||
|
]
|
||||||
|
];
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// get scraper name from user input, or default out to preferred scraper
|
// get scraper name from user input, or default out to preferred scraper
|
||||||
@@ -871,6 +933,7 @@ class frontend{
|
|||||||
|
|
||||||
$html = null;
|
$html = null;
|
||||||
|
|
||||||
|
//foreach(["web", "images", "videos", "news", "music", "booru"] as $type){
|
||||||
foreach(["web", "images", "videos", "news", "music"] as $type){
|
foreach(["web", "images", "videos", "news", "music"] as $type){
|
||||||
|
|
||||||
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
|
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
|
||||||
|
|||||||
@@ -264,6 +264,25 @@ class google_api{
|
|||||||
"yes" => "Yes", // safe=active
|
"yes" => "Yes", // safe=active
|
||||||
"no" => "No" // safe=off
|
"no" => "No" // safe=off
|
||||||
]
|
]
|
||||||
|
],
|
||||||
|
"sort" => [ // sort
|
||||||
|
"display" => "Sort by",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any order",
|
||||||
|
"date:d" => "Oldest",
|
||||||
|
"date:a" => "Newest"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"newer" => [
|
||||||
|
"display" => "Newer than",
|
||||||
|
"option" => "_DATE"
|
||||||
|
],
|
||||||
|
"rm_dupes" => [ // filter
|
||||||
|
"display" => "Remove duplicates",
|
||||||
|
"option" => [
|
||||||
|
"yes" => "Yes", // 1
|
||||||
|
"no" => "No" // 0
|
||||||
|
]
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -313,109 +332,29 @@ class google_api{
|
|||||||
"zh-CN" => "Chinese (Simplified)",
|
"zh-CN" => "Chinese (Simplified)",
|
||||||
"zh-TW" => "Chinese (Traditional)"
|
"zh-TW" => "Chinese (Traditional)"
|
||||||
]
|
]
|
||||||
],
|
|
||||||
"sort" => [
|
|
||||||
"display" => "Sort by",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any order",
|
|
||||||
"date:d" => "Oldest",
|
|
||||||
"date:a" => "Newest"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"newer" => [
|
|
||||||
"display" => "Newer than",
|
|
||||||
"option" => "_DATE"
|
|
||||||
],
|
|
||||||
"rm_dupes" => [
|
|
||||||
"display" => "Remove duplicates",
|
|
||||||
"option" => [
|
|
||||||
"yes" => "Yes",
|
|
||||||
"no" => "No"
|
|
||||||
]
|
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
/*
|
|
||||||
case "images":
|
case "images":
|
||||||
return array_merge(
|
return array_merge(
|
||||||
$base,
|
$base,
|
||||||
[
|
[
|
||||||
"time" => [ // tbs=qdr:<time>
|
"size" => [ // imgSize
|
||||||
"display" => "Time posted",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any time",
|
|
||||||
"d" => "Past 24 hours",
|
|
||||||
"w" => "Past week",
|
|
||||||
"m" => "Past month",
|
|
||||||
"y" => "Past year"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"size" => [ // imgsz
|
|
||||||
"display" => "Size",
|
"display" => "Size",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any size",
|
"any" => "Any size",
|
||||||
"l" => "Large",
|
"icon" => "Icon",
|
||||||
"m" => "Medium",
|
"small" => "Small",
|
||||||
"i" => "Icon",
|
"medium" => "Medium",
|
||||||
"qsvga" => "Larger than 400x300",
|
"large" => "Large",
|
||||||
"vga" => "Larger than 640x480",
|
"xlarge" => "X-Large",
|
||||||
"svga" => "Larger than 800x600",
|
"xxlarge" => "XX-Large",
|
||||||
"xga" => "Larger than 1024x768",
|
"huge" => "Huge"
|
||||||
"2mp" => "Larger than 2MP",
|
|
||||||
"4mp" => "Larger than 4MP",
|
|
||||||
"6mp" => "Larger than 6MP",
|
|
||||||
"8mp" => "Larger than 8MP",
|
|
||||||
"10mp" => "Larger than 10MP",
|
|
||||||
"12mp" => "Larger than 12MP",
|
|
||||||
"15mp" => "Larger than 15MP",
|
|
||||||
"20mp" => "Larger than 20MP",
|
|
||||||
"40mp" => "Larger than 40MP",
|
|
||||||
"70mp" => "Larger than 70MP"
|
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"ratio" => [ // imgar
|
"format" => [ // fileType
|
||||||
"display" => "Aspect ratio",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any ratio",
|
|
||||||
"t|xt" => "Tall",
|
|
||||||
"s" => "Square",
|
|
||||||
"w" => "Wide",
|
|
||||||
"xw" => "Panoramic"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"color" => [ // imgc
|
|
||||||
"display" => "Color",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any color",
|
|
||||||
"color" => "Full color",
|
|
||||||
"bnw" => "Black & white",
|
|
||||||
"trans" => "Transparent",
|
|
||||||
// from here, imgcolor
|
|
||||||
"red" => "Red",
|
|
||||||
"orange" => "Orange",
|
|
||||||
"yellow" => "Yellow",
|
|
||||||
"green" => "Green",
|
|
||||||
"teal" => "Teal",
|
|
||||||
"blue" => "Blue",
|
|
||||||
"purple" => "Purple",
|
|
||||||
"pink" => "Pink",
|
|
||||||
"white" => "White",
|
|
||||||
"gray" => "Gray",
|
|
||||||
"black" => "Black",
|
|
||||||
"brown" => "Brown"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"type" => [ // tbs=itp:<type>
|
|
||||||
"display" => "Type",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any type",
|
|
||||||
"clipart" => "Clip Art",
|
|
||||||
"lineart" => "Line Drawing",
|
|
||||||
"animated" => "Animated"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"format" => [ // as_filetype
|
|
||||||
"display" => "Format",
|
"display" => "Format",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any format",
|
"any" => "Any format",
|
||||||
@@ -429,17 +368,55 @@ class google_api{
|
|||||||
"craw" => "RAW"
|
"craw" => "RAW"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"rights" => [ // tbs=sur:<rights>
|
"color" => [
|
||||||
|
"display" => "Color",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any color",
|
||||||
|
|
||||||
|
"color" => "Full color", // imgColorType
|
||||||
|
"mono" => "Black & White",
|
||||||
|
"trans" => "Transparent background",
|
||||||
|
|
||||||
|
"red" => "Red", // imgDominantColor
|
||||||
|
"orange" => "Orange",
|
||||||
|
"yellow" => "Yellow",
|
||||||
|
"green" => "Green",
|
||||||
|
"teal" => "Teal",
|
||||||
|
"blue" => "Blue",
|
||||||
|
"purple" => "Purple",
|
||||||
|
"pink" => "Pink",
|
||||||
|
"white" => "White",
|
||||||
|
"gray" => "Gray",
|
||||||
|
"black" => "Black",
|
||||||
|
"brown" => "Brown"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"type" => [ // imgType
|
||||||
|
"display" => "Type",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any type",
|
||||||
|
"clipart" => "Clip Art",
|
||||||
|
"face" => "Faces",
|
||||||
|
"lineart" => "Line Drawing",
|
||||||
|
"stock" => "Stock photos",
|
||||||
|
"photo" => "Photos",
|
||||||
|
"animated" => "Animated",
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"rights" => [ // rights
|
||||||
"display" => "Usage rights",
|
"display" => "Usage rights",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any license",
|
"any" => "Any license",
|
||||||
"cl" => "Creative Commons licenses",
|
"cc_publicdomain" => "Public domain",
|
||||||
"ol" => "Commercial & other licenses"
|
"cc_attribute" => "Attribution required",
|
||||||
|
"cc_sharealike" => "Sharealike",
|
||||||
|
"cc_noncommercial" => "Non-commercial use only",
|
||||||
|
"cc_nonderived" => "Original works"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
break;*/
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -485,6 +462,7 @@ class google_api{
|
|||||||
return $data;
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public function web($get){
|
public function web($get){
|
||||||
|
|
||||||
// rotate proxy + key on EVERY request
|
// rotate proxy + key on EVERY request
|
||||||
@@ -731,6 +709,160 @@ class google_api{
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public function image($get){
|
||||||
|
|
||||||
|
// rotate proxy + key on EVERY request
|
||||||
|
$keydata = $this->backend->get_key();
|
||||||
|
$proxy = $this->backend->get_ip($keydata["increment"]);
|
||||||
|
|
||||||
|
if($get["npt"]){
|
||||||
|
|
||||||
|
// $p is never used
|
||||||
|
[$params, $p] = $this->backend->get(
|
||||||
|
$get["npt"],
|
||||||
|
"web"
|
||||||
|
);
|
||||||
|
|
||||||
|
$params = json_decode($params, true);
|
||||||
|
|
||||||
|
$params["key"] = $keydata["key"];
|
||||||
|
|
||||||
|
}else{
|
||||||
|
|
||||||
|
//$json = file_get_contents("scraper/google.json");
|
||||||
|
$params = [
|
||||||
|
"q" => $get["s"],
|
||||||
|
"cx" => config::GOOGLE_CX_ENDPOINT,
|
||||||
|
"num" => 10,
|
||||||
|
"start" => 1,
|
||||||
|
"searchType" => "image",
|
||||||
|
"key" => $keydata["key"]
|
||||||
|
];
|
||||||
|
|
||||||
|
//
|
||||||
|
// parse filters
|
||||||
|
//
|
||||||
|
if($get["newer"] !== false){
|
||||||
|
|
||||||
|
$params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000));
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; }
|
||||||
|
if($get["country"] != "any"){ $params["gl"] = $get["country"]; }
|
||||||
|
|
||||||
|
if($get["nsfw"] == "yes"){
|
||||||
|
|
||||||
|
$params["safe"] = "off";
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$params["safe"] = "active";
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; }
|
||||||
|
|
||||||
|
// image filters
|
||||||
|
if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; }
|
||||||
|
if($get["format"] != "any"){ $params["fileType"] = $get["format"]; }
|
||||||
|
|
||||||
|
switch($get["color"]){
|
||||||
|
|
||||||
|
case "any":
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "color":
|
||||||
|
case "mono":
|
||||||
|
case "trans":
|
||||||
|
$params["imgColorType"] = $get["color"];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$params["imgDominantColor"] = $get["color"];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["type"] != "any"){ $params["imgType"] = $get["type"]; }
|
||||||
|
if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
try{
|
||||||
|
$json =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://www.googleapis.com/customsearch/v1",
|
||||||
|
$params
|
||||||
|
);
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to fetch JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
$json = json_decode($json, true);
|
||||||
|
|
||||||
|
if($json === null){
|
||||||
|
|
||||||
|
throw new Exception("Failed to decode JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [
|
||||||
|
"status" => "ok",
|
||||||
|
"npt" => null,
|
||||||
|
"image" => []
|
||||||
|
];
|
||||||
|
|
||||||
|
if(isset($json["error"]["message"])){
|
||||||
|
|
||||||
|
throw new Exception(
|
||||||
|
"API returned an error: " .
|
||||||
|
$json["error"]["message"] .
|
||||||
|
" (key #" . $keydata["increment"] . ")"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!isset($json["items"])){
|
||||||
|
|
||||||
|
// google just doesnt return items when theres no results
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($json["items"] as $image){
|
||||||
|
|
||||||
|
$out["image"][] = [
|
||||||
|
"title" => $this->titledots($image["title"]),
|
||||||
|
"source" => [
|
||||||
|
[
|
||||||
|
"url" => $image["link"],
|
||||||
|
"width" => (int)$image["image"]["width"],
|
||||||
|
"height" => (int)$image["image"]["height"]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"url" => $image["image"]["thumbnailLink"],
|
||||||
|
"width" => (int)$image["image"]["thumbnailWidth"],
|
||||||
|
"height" => (int)$image["image"]["thumbnailHeight"]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"url" => $image["image"]["contextLink"]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
// get npt
|
||||||
|
if(isset($json["queries"]["nextPage"][0]["startIndex"])){
|
||||||
|
|
||||||
|
unset($params["key"]);
|
||||||
|
$params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"];
|
||||||
|
|
||||||
|
$out["npt"] =
|
||||||
|
$this->backend->store(
|
||||||
|
json_encode($params),
|
||||||
|
"web",
|
||||||
|
$proxy
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private function titledots($title){
|
private function titledots($title){
|
||||||
|
|
||||||
return trim($title, " .\t\n\r\0\x0B…");
|
return trim($title, " .\t\n\r\0\x0B…");
|
||||||
|
|||||||
@@ -223,6 +223,10 @@ $settings = [
|
|||||||
"value" => "google",
|
"value" => "google",
|
||||||
"text" => "Google"
|
"text" => "Google"
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
"value" => "google_api",
|
||||||
|
"text" => "Google API"
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"value" => "google_cse",
|
"value" => "google_cse",
|
||||||
"text" => "Google CSE"
|
"text" => "Google CSE"
|
||||||
|
|||||||
Reference in New Issue
Block a user