Compare commits

..

No commits in common. "master" and "dockerfile-fix" have entirely different histories.

11 changed files with 799 additions and 836 deletions

View File

@ -129,7 +129,6 @@ class config{
const PROXY_BRAVE = false; const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false; const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false; const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false; const PROXY_STARTPAGE = false;
const PROXY_QWANT = false; const PROXY_QWANT = false;
@ -144,8 +143,6 @@ class config{
const PROXY_YT = false; // youtube const PROXY_YT = false; // youtube
const PROXY_YEP = false; const PROXY_YEP = false;
const PROXY_PINTEREST = false; const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_FIVEHPX = false; const PROXY_FIVEHPX = false;
const PROXY_VSCO = false; const PROXY_VSCO = false;
const PROXY_SEZNAM = false; const PROXY_SEZNAM = false;
@ -163,7 +160,7 @@ class config{
// Scraper-specific parameters // Scraper-specific parameters
// //
// GOOGLE CSE & GOOGLE API // GOOGLE CSE
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0"; const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA // MARGINALIA

View File

@ -15,12 +15,7 @@ class favicon{
header("Content-Type: image/png"); header("Content-Type: image/png");
if( if(substr_count($url, "/") !== 2){
preg_match(
'/^https?:\/\/[A-Za-z0-9.-]+$/',
$url
) === 0
){
header("X-Error: Only provide the protocol and domain"); header("X-Error: Only provide the protocol and domain");
$this->defaulticon(); $this->defaulticon();

View File

@ -939,7 +939,6 @@ class frontend{
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex", "yandex" => "Yandex",
"google" => "Google", "google" => "Google",
//"google_api" => "Google API",
"google_cse" => "Google CSE", "google_cse" => "Google CSE",
"startpage" => "Startpage", "startpage" => "Startpage",
"qwant" => "Qwant", "qwant" => "Qwant",
@ -971,12 +970,10 @@ class frontend{
"yep" => "Yep", "yep" => "Yep",
"solofield" => "Solofield", "solofield" => "Solofield",
"pinterest" => "Pinterest", "pinterest" => "Pinterest",
"flickr" => "Flickr",
"fivehpx" => "500px", "fivehpx" => "500px",
"vsco" => "VSCO", "vsco" => "VSCO",
"imgur" => "Imgur", "imgur" => "Imgur",
"ftm" => "FindThatMeme", "ftm" => "FindThatMeme"
//"sankakucomplex" => "SankakuComplex"
] ]
]; ];
break; break;

View File

@ -34,46 +34,22 @@ try{
) )
){ ){
if(!isset($image["path"])){ if(
!isset($image["query"]) ||
!isset($image["path"]) ||
$image["path"] != "/th"
){
header("X-Error: Missing bing image path"); header("X-Error: Invalid bing image path");
$proxy->do404(); $proxy->do404();
die(); die();
} }
// parse_str($image["query"], $str);
// get image ID
// formations:
// https://tse2.mm.bing.net/th/id/OIP.3yLBkUPn8EXA1wlhWP2BHwHaE3
// https://tse2.mm.bing.net/th?id=OIP.3yLBkUPn8EXA1wlhWP2BHwHaE3
//
$id = null;
if(isset($image["query"])){
parse_str($image["query"], $str);
if(isset($str["id"])){
$id = $str["id"];
}
}
if($id === null){ if(!isset($str["id"])){
$id = explode("/th/id/", $image["path"], 2); header("X-Error: Missing bing ID");
if(count($id) !== 2){
// malformed
return $url;
}
$id = $id[1];
}
if(is_array($id)){
header("X-Error: Missing bing id parameter");
$proxy->do404(); $proxy->do404();
die(); die();
} }
@ -87,7 +63,7 @@ try{
case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break; case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break;
} }
$proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . rawurlencode($id) . $req, "https://www.bing.com"); $proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . urlencode($str["id"]) . $req, "https://www.bing.com");
die(); die();
} }

View File

@ -285,7 +285,6 @@ class ddg{
"display" => "NSFW", "display" => "NSFW",
"option" => [ "option" => [
"yes" => "Yes", "yes" => "Yes",
"maybe" => "Maybe",
"no" => "No" "no" => "No"
] ]
], ],
@ -1346,7 +1345,7 @@ class ddg{
$get_filters["iaf"] = $filters; $get_filters["iaf"] = $filters;
} }
$nsfw = $get["nsfw"] == "yes" ? "-1" : "1"; $nsfw = $get["nsfw"] == "yes" ? "-2" : "-1";
$get_filters["kp"] = $nsfw; $get_filters["kp"] = $nsfw;
try{ try{
@ -1499,12 +1498,8 @@ class ddg{
"ia" => "videos" "ia" => "videos"
]; ];
switch($get["nsfw"]){ $nsfw = $get["nsfw"] == "yes" ? "-2" : "-1";
$get_filters["kp"] = $nsfw;
case "yes": $nsfw = "-2"; break;
case "maybe": $nsfw = "-1"; break;
case "no": $nsfw = "1"; break;
}
$filters = []; $filters = [];
@ -1943,33 +1938,10 @@ class ddg{
private function bingimg($url){ private function bingimg($url){
$image = parse_url($url); $parse = parse_url($url);
parse_str($parse["query"], $parts);
$id = null; return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
if(isset($image["query"])){
parse_str($image["query"], $str);
if(isset($str["id"])){
$id = $str["id"];
}
}
if($id === null){
$id = explode("/th/id/", $image["path"], 2);
if(count($id) !== 2){
// malformed
return $url;
}
$id = $id[1];
}
return "https://" . $image["host"] . "/th?id=" . rawurlencode($id);
} }
private function bingratio($width, $height){ private function bingratio($width, $height){

View File

@ -1,415 +0,0 @@
<?php
class flickr{
const req_web = 0;
const req_xhr = 1;
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("flickr");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No",
]
],
"sort" => [
"display" => "Sort by",
"option" => [
"relevance" => "Relevance",
"date-posted-desc" => "Newest uploads",
"date-posted-asc" => "Oldest uploads",
"date-taken-desc" => "Newest taken",
"date-taken-asc" => "Oldest taken",
"interestingness-desc" => "Interesting"
]
],
"color" => [
"display" => "Color",
"option" => [
"any" => "Any color",
// color_codes=
"0" => "Red",
"1" => "Brown",
"2" => "Orange",
"b" => "Pink",
"4" => "Yellow",
"3" => "Golden",
"5" => "Lime",
"6" => "Green",
"7" => "Sky blue",
"8" => "Blue",
"9" => "Purple",
"a" => "Hot pink",
"c" => "White",
"d" => "Gray",
"e" => "Black",
// styles= override
"blackandwhite" => "Black & white",
]
],
"style" => [ // styles=
"display" => "Style",
"option" => [
"any" => "Any style",
"depthoffield" => "Depth of field",
"minimalism" => "Minimalism",
"pattern" => "Patterns"
]
],
"license" => [
"display" => "License",
"option" => [
"any" => "Any license",
"1,2,3,4,5,6,9,11,12,13,14,15,16" => "All creative commons",
"4,5,6,9,10,11,12,13" => "Commercial use allowed",
"1,2,4,5,9,10,11,12,14,15" => "Modifications allowed",
"4,5,9,10,11,12" => "Commercial use & mods allowed",
"7,9,10" => "No known copyright restrictions",
"8" => "U.S Government works"
]
]
];
}
private function get($proxy, $url, $get = [], $reqtype){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($reqtype === flickr::req_web){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i",
"TE: trailers"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Origin: https://www.flickr.com",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Referer: https://www.flickr.com/",
// Cookie:
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"TE: trailers"]
);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
[$filters, $proxy] =
$this->backend->get(
$get["npt"], "images"
);
$filters = json_decode($filters, true);
// Workaround for the future, if flickr deprecates &page argument on html page
/*
try{
$json =
$this->get(
$proxy,
"https://api.flickr.com/services/rest",
[
"sort" => $data["sort"],
"parse_tags" => 1,
// url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o
"extras" => "can_comment,can_print,count_comments,count_faves,description,isfavorite,license,media,needs_interstitial,owner_name,path_alias,realname,rotation,url_sq,url_q,url_t,url_s,url_n,url_w,url_m,url_z,url_c,url_l",
"per_page" => 100,
"page" => $data["page"],
"lang" => "en-US",
"text" => $data["search"],
"viewerNSID" => "",
"method" => "flickr.photos.search",
"csrf" => "",
"api_key" => $data["api_key"],
"format" => "json",
"hermes" => 1,
"hermesClient" => 1,
"reqId" => $data["reqId"],
"nojsoncallback" => 1
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}*/
}else{
if(strlen($get["s"]) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
// compute filters
$filters = [
"page" => 1,
"sort" => $get["sort"]
];
if($get["style"] != "any"){
$filters["styles"] = $get["style"];
}
if($get["color"] != "any"){
if($get["color"] != "blackandwhite"){
$filters["color_codes"] = $get["color"];
}else{
$filters["styles"] = "blackandwhite";
}
}
if($get["license"] != "any"){
$filters["license"] = $get["license"];
}
switch($get["nsfw"]){
case "yes": $filters["safe_search"] = 0; break;
case "maybe": $filters["safe_search"] = 2; break;
case "no": $filters["safe_search"] = 1; break;
}
}
$get_params = [
"text" => $get["s"],
"per_page" => 50,
// scrape highest resolution
"extras" => "url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o",
"view_all" => 1
];
$get_params = array_merge($get_params, $filters);
$html =
$this->get(
$proxy,
"https://www.flickr.com/search/",
$get_params,
flickr::req_web
);
// @TODO
// get api_key and reqId, if flickr deprecates &page
$this->fuckhtml->load($html);
//
// get response JSON
//
$scripts =
$this->fuckhtml
->getElementsByClassName(
"modelExport",
"script"
);
$found = false;
foreach($scripts as $script){
$json =
preg_split(
'/modelExport: ?/',
$script["innerHTML"],
2
);
if(count($json) !== 0){
$found = true;
$json = $json[1];
break;
}
}
if($found === false){
throw new Exception("Failed to grep JSON");
}
$json =
json_decode(
$this->fuckhtml
->extract_json(
$json
),
true
);
if($json === null){
throw new Exception("Failed to decode JSON");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if(!isset($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"])){
throw new Exception("Failed to access data object");
}
foreach($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"] as $image){
if(!isset($image["data"])){
// flickr likes to gives us empty array objects
continue;
}
$image = $image["data"];
$title = [];
if(isset($image["title"])){
$title[] =
$this->fuckhtml
->getTextContent(
$image["title"]
);
}
if(isset($image["description"])){
$title[] =
$this->fuckhtml
->getTextContent(
str_replace(
"\n",
" ",
$image["description"]
)
);
}
$title = implode(": ", $title);
$sources = array_values($image["sizes"]["data"]);
$suitable_sizes = ["n", "m", "w", "s"];
$thumb = &$sources[0]["data"];
foreach($suitable_sizes as $testing_size){
if(isset($image["sizes"]["data"][$testing_size])){
$thumb = &$image["sizes"]["data"][$testing_size]["data"];
break;
}
}
$og = &$sources[count($sources) - 1]["data"];
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => "https:" . $og["displayUrl"],
"width" => (int)$og["width"],
"height" => (int)$og["height"]
],
[
"url" => "https:" . $thumb["displayUrl"],
"width" => (int)$thumb["width"],
"height" => (int)$thumb["height"]
]
],
"url" => "https://www.flickr.com/photos/" . $image["ownerNsid"] . "/" . $image["id"] . "/"
];
}
$total_items = (int)$json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["totalItems"];
if(($filters["page"]) * 50 < $total_items){
$filters["page"]++;
$out["npt"] =
$this->backend->store(
json_encode($filters),
"images",
$proxy
);
}
return $out;
}
}

View File

@ -578,6 +578,697 @@ class google{
} }
private function parsepage($html, $pagetype, $search, $proxy, $params){
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
$this->detect_sorry();
// parse all <style> tags
$this->parsestyles();
// get javascript images
$this->scrape_dimg($html);
// get html blobs
preg_match_all(
'/function\(\){window\.jsl\.dh\(\'([^\']+?)\',\'(.+?[^\'])\'\);/',
$html,
$blobs
);
$this->blobs = [];
if(isset($blobs[1])){
for($i=0; $i<count($blobs[1]); $i++){
$this->blobs[$blobs[1][$i]] =
$this->fuckhtml
->parseJsString(
$blobs[2][$i]
);
}
}
$this->scrape_imagearr($html);
//
// load result column
//
$result_div =
$this->fuckhtml
->getElementById(
"center_col",
"div"
);
if($result_div === false){
throw new Exception("Failed to grep result div");
}
$this->fuckhtml->load($result_div);
// important for later
$last_page = false;
//
// Get text results
//
$results =
$this->fuckhtml
->getElementsByClassName(
"g",
"div"
);
$this->skip_next = false;
foreach($results as $result){
if($this->skip_next){
$this->skip_next = false;
continue;
}
$this->fuckhtml->load($result);
$web = [
"title" => null,
"description" => null,
"url" => null,
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
// Detect presence of sublinks
$g =
$this->fuckhtml
->getElementsByClassName(
"g",
"div"
);
if(count($g) > 0){
// skip on next iteration
$this->skip_next = true;
}
// get title
$h3 =
$this->fuckhtml
->getElementsByTagName(
"h3"
);
if(count($h3) === 0){
continue;
}
$web["title"] =
$this->titledots(
$this->fuckhtml
->getTextContent(
$h3[0]
)
);
// get url
$as =
$this->fuckhtml
->getElementsByTagName(
"a"
);
$web["url"] =
$this->unshiturl(
$as[0]
["attributes"]
["href"]
);
if(
!preg_match(
'/^http/',
$web["url"]
)
){
// skip if invalid url is found
continue;
}
//
// get viewcount, time posted and follower count from <cite> tag
//
$cite =
$this->fuckhtml
->getElementsByTagName(
"cite"
);
if(count($cite) !== 0){
$this->fuckhtml->load($cite[0]);
$spans =
$this->fuckhtml
->getElementsByTagName("span");
if(count($spans) === 0){
$cites =
explode(
"·",
$this->fuckhtml
->getTextContent(
$cite[0]
)
);
foreach($cites as $cite){
$cite = trim($cite);
if(
preg_match(
'/(.+) (views|followers|likes)$/',
$cite,
$match
)
){
$web["table"][ucfirst($match[2])] =
$match[1];
}elseif(
preg_match(
'/ago$/',
$cite
)
){
$web["date"] =
strtotime($cite);
}
}
}
// reset
$this->fuckhtml->load($result);
}
//
// attempt to fetch description cleanly
//
$description =
$this->fuckhtml
->getElementsByAttributeValue(
"style",
"-webkit-line-clamp:2"
);
if(count($description) !== 0){
$web["description"] =
$this->titledots(
$this->fuckhtml
->getTextContent(
$description[0]
)
);
}else{
// use ANOTHER method where the description is a header of the result
$description =
$this->fuckhtml
->getElementsByAttributeValue(
"data-attrid",
"wa:/description"
);
if(count($description) !== 0){
// get date off that shit
$date =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"font-size" => "12px",
"line-height" => "1.34",
"display" => "inline-block",
"font-family" => "google sans,arial,sans-serif",
"padding-right" => "0",
"white-space" => "nowrap"
]
),
"span"
);
if(count($date) !== 0){
$description[0]["innerHTML"] =
str_replace(
$date[0]["outerHTML"],
"",
$description[0]["innerHTML"]
);
$web["date"] =
strtotime(
$this->fuckhtml
->getTextContent(
$date[0]
)
);
}
$web["description"] =
$this->fuckhtml
->getTextContent(
$description[0]
);
}else{
// Yes.. You guessed it, use ANOTHER method to get descriptions
// off youtube containers
$description =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"-webkit-box-orient" => "vertical",
"display" => "-webkit-box",
"font-size" => "14px",
"-webkit-line-clamp" => "2",
"line-height" => "22px",
"overflow" => "hidden",
"word-break" => "break-word",
"color" => "#4d5156"
]
),
"div"
);
if(count($description) !== 0){
// check for video duration
$duration =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"background-color" => "rgba(0,0,0,0.6)",
"color" => "#fff",
"fill" => "#fff"
]
),
"div"
);
if(count($duration) !== 0){
$web["table"]["Duration"] =
$this->fuckhtml
->getTextContent(
$duration[0]
);
}
$web["description"] =
$this->titledots(
html_entity_decode(
$this->fuckhtml
->getTextContent(
$description[0]
)
)
);
// get author + time posted
$info =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"color" => "var(" . $this->getcolorvar("#70757a") . ")",
"font-size" => "14px",
"line-height" => "20px",
"margin-top" => "12px"
]
),
"div"
);
if(count($info) !== 0){
$info =
explode(
"·",
$this->fuckhtml
->getTextContent(
$info[0]
)
);
switch(count($info)){
case 3:
$web["table"]["Author"] = trim($info[1]);
$web["date"] = strtotime(trim($info[2]));
break;
case 2:
$web["date"] = strtotime(trim($info[1]));
break;
}
}
}
}
}
//
// get categories of content within the search result
//
$cats =
$this->fuckhtml
->getElementsByAttributeName(
"data-sncf",
"div"
);
foreach($cats as $cat){
$this->fuckhtml->load($cat);
// detect image category
$images =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(count($images) !== 0){
foreach($images as $image){
if(isset($image["attributes"]["id"])){
// we found an image
if(isset($image["attributes"]["width"])){
$width = (int)$image["attributes"]["width"];
if($width == 110){
$ratio = "1:1";
}elseif($width > 110){
$ratio = "16:9";
}else{
$ratio = "9:16";
}
}else{
$ratio = "1:1";
}
$web["thumb"] = [
"url" => $this->getdimg($image["attributes"]["id"]),
"ratio" => $ratio
];
continue 2;
}
}
}
// Detect rating
$spans_unfiltered =
$this->fuckhtml
->getElementsByTagName(
"span"
);
$spans =
$this->fuckhtml
->getElementsByAttributeName(
"aria-label",
$spans_unfiltered
);
foreach($spans as $span){
if(
preg_match(
'/^Rated/',
$span["attributes"]["aria-label"]
)
){
// found rating
// scrape rating
preg_match(
'/([0-9.]+).*([0-9.]+)/',
$span["attributes"]["aria-label"],
$rating
);
if(isset($rating[1])){
$web["table"]["Rating"] =
$rating[1] . "/" . $rating[2];
}
$has_seen_reviews = 0;
foreach($spans_unfiltered as $span_unfiltered){
if(
preg_match(
'/([0-9,.]+) +([A-z]+)$/',
$this->fuckhtml
->getTextContent(
$span_unfiltered
),
$votes
)
){
$has_seen_reviews++;
$web["table"][ucfirst($votes[2])] = $votes[1];
continue;
}
$text =
$this->fuckhtml
->getTextContent(
$span_unfiltered
);
if(
$text == "&nbsp;&nbsp;&nbsp;" ||
$text == ""
){
break;
}
switch($has_seen_reviews){
case 1:
// scrape price
$web["table"]["Price"] = $text;
$has_seen_reviews++;
break;
case 2:
// scrape platform
$web["table"]["Platform"] = $text;
$has_seen_reviews++;
break;
case 3:
// Scrape type
$web["table"]["Medium"] = $text;
break;
}
}
continue 2;
}
}
// check if its an answer header
$answer_header =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"overflow" => "hidden",
"text-overflow" => "ellipsis"
]
),
"span"
);
if(count($answer_header) !== 0){
$link =
$this->fuckhtml
->getElementsByTagName(
"a"
);
$cat["innerHTML"] =
str_replace(
$link[0]["outerHTML"],
"",
$cat["innerHTML"]
);
continue;
}
// we probed everything, assume this is the description
// if we didn't find one cleanly previously
if($web["description"] === null){
$web["description"] =
$this->titledots(
$this->fuckhtml
->getTextContent(
$cat
)
);
}
}
// check if description contains date
$description = explode("", $web["description"], 2);
if(
count($description) === 2 &&
strlen($description[0]) <= 20
){
$date = strtotime($description[0]);
if($date !== false){
$web["date"] = $date;
$web["description"] = ltrim($description[1]);
}
}
// fetch youtube thumbnail
$thumbnail =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle(
[
"border-radius" => "8px",
"height" => "fit-content",
"justify-content" => "center",
"margin-right" => "20px",
"margin-top" => "4px",
"position" => "relative",
"width" => "fit-content"
]
),
"div"
);
if(count($thumbnail) !== 0){
// load thumbnail container
$this->fuckhtml->load($thumbnail[0]);
$image =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(
count($image) !== 0 &&
isset($image[0]["attributes"]["id"])
){
$web["thumb"] = [
"url" =>
$this->unshit_thumb(
$this->getdimg(
$image[0]["attributes"]["id"]
)
),
"ratio" => "16:9"
];
}
// reset
$this->fuckhtml->load($result);
}
$out["web"][] = $web;
}
// reset
$this->fuckhtml->load($result_div);
//
// craft $npt token
//
if(
$last_page === false &&
count($out["web"]) !== 0
){
if(!isset($params["start"])){
$params["start"] = 20;
}else{
$params["start"] += 20;
}
$out["npt"] =
$this->backend
->store(
json_encode($params),
$pagetype,
$proxy
);
}
return $out;
}
private function scrape_dimg($html){ private function scrape_dimg($html){
// get images loaded through javascript // get images loaded through javascript
@ -1863,6 +2554,8 @@ class google{
[$params, $proxy] = $this->backend->get($get["npt"], "video"); [$params, $proxy] = $this->backend->get($get["npt"], "video");
$params = json_decode($params, true); $params = json_decode($params, true);
$search = $params["q"];
}else{ }else{
$search = $get["s"]; $search = $get["s"];
$country = $get["country"]; $country = $get["country"];
@ -1876,9 +2569,9 @@ class google{
$params = [ $params = [
"q" => $search, "q" => $search,
"udm" => "7", "tbm" => "vid",
"hl" => "en", "hl" => "en",
"num" => 20 "num" => "20"
]; ];
// country // country
@ -1944,35 +2637,12 @@ class google{
throw new Exception("Failed to get HTML"); throw new Exception("Failed to get HTML");
} }
if(!isset($params["start"])){ //$html = file_get_contents("scraper/google.html");
$params["start"] = 0;
}
$params["start"] += 20;
$this->fuckhtml->load($html);
//
// Parse web video page
//
$this->detect_sorry();
// parse all <style> tags
$this->parsestyles();
// get javascript images
$this->scrape_dimg($html);
$this->scrape_imagearr($html);
$response = $this->parsepage($html, "videos", $search, $proxy, $params);
$out = [ $out = [
"status" => "ok", "status" => "ok",
"npt" => "npt" => $response["npt"],
$this->backend->store(
json_encode($params),
"videos",
$proxy
),
"video" => [], "video" => [],
"author" => [], "author" => [],
"livestream" => [], "livestream" => [],
@ -1980,192 +2650,21 @@ class google{
"reel" => [] "reel" => []
]; ];
$search_div = foreach($response["web"] as $result){
$this->fuckhtml
->getElementById(
"center_col"
);
if($search_div === false){
throw new Exception("Failed to grep search div");
}
$this->fuckhtml->load($search_div);
$results =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle([
"margin" => "0px 0px 30px"
]),
"div"
);
foreach($results as $result){
$this->fuckhtml->load($result);
$url =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($url) === 0){
// no url, weird, continue
continue;
}
$title =
$this->fuckhtml
->getElementsByTagName(
"h3"
);
if(count($title) === 0){
// no title, weird, continue
continue;
}
// get description
$description =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle([
"-webkit-box-orient" => "vertical",
"display" => "-webkit-box",
"-webkit-line-clamp" => "2",
"overflow" => "hidden",
"word-break" => "break-word"
]),
"div"
);
if(count($description) === 0){
$description = null;
}else{
$description =
html_entity_decode(
$this->titledots(
$this->fuckhtml
->getTextContent(
$description[0]
)
)
);
}
// get author + date posted
$metadiv =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle([
"margin-top" => "12px"
]),
"div"
);
$author = null;
$date = null;
if(count($metadiv) !== 0){
$metadiv =
explode(
"·",
$this->fuckhtml
->getTextContent(
$metadiv[0]
)
);
if(count($metadiv) === 3){
$author = trim($metadiv[1]);
$date = strtotime(trim($metadiv[2]));
}elseif(count($metadiv) === 2){
$author = trim($metadiv[0]);
$date = strtotime(trim($metadiv[1]));
}
}
$thumb = [
"url" => null,
"ratio" => null
];
$image =
$this->fuckhtml
->getElementsByTagName(
"img"
);
$duration = null;
if(
count($image) !== 0 &&
isset($image[0]["attributes"]["id"])
){
$thumb = [
"url" => $this->getdimg($image[0]["attributes"]["id"]),
"ratio" => "16:9"
];
// get duration
$duration =
$this->fuckhtml
->getElementsByClassName(
$this->getstyle([
"background-color" => "rgba(0,0,0,0.6)",
"color" => "#fff",
"fill" => "#fff"
])
);
if(count($duration) !== 0){
$duration =
$this->hms2int(
$this->fuckhtml
->getTextContent(
$duration[0]
));
}else{
$duration = null;
}
}
$out["video"][] = [ $out["video"][] = [
"title" => "title" => $result["title"],
$this->titledots( "description" => $result["description"],
$this->fuckhtml
->getTextContent(
$title[0]
)
),
"description" => $description,
"author" => [ "author" => [
"name" => $author, "name" => isset($result["table"]["Author"]) ? $result["table"]["Author"] : null,
"url" => null, "url" => null,
"avatar" => null "avatar" => null
], ],
"date" => $date, "date" => $result["date"],
"duration" => $duration, "duration" => isset($result["table"]["Duration"]) ? $this->hms2int($result["table"]["Duration"]) : null,
"views" => null, "views" => null,
"thumb" => $thumb, "thumb" => $result["thumb"],
"url" => "url" => $result["url"]
$this->fuckhtml
->getTextContent(
$url[0]["attributes"]["href"]
)
]; ];
} }

View File

@ -410,7 +410,10 @@ class qwant{
"thumb" => "thumb" =>
$answer["data"]["result"]["thumbnail"]["landscape"] == null ? $answer["data"]["result"]["thumbnail"]["landscape"] == null ?
null : null :
$this->unshitimage($answer["data"]["result"]["thumbnail"]["landscape"]), $this->unshitimage(
$answer["data"]["result"]["thumbnail"]["landscape"],
false
),
"table" => [], "table" => [],
"sublink" => [] "sublink" => []
]; ];
@ -767,7 +770,7 @@ class qwant{
}else{ }else{
$thumb = [ $thumb = [
"url" => $this->unshitimage($video["thumbnail"]), "url" => $this->unshitimage($video["thumbnail"], false),
"ratio" => "16:9" "ratio" => "16:9"
]; ];
} }
@ -867,7 +870,7 @@ class qwant{
}else{ }else{
$thumb = [ $thumb = [
"url" => $this->unshitimage($news["media"][0]["pict_big"]["url"]), "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false),
"ratio" => "16:9" "ratio" => "16:9"
]; ];
} }
@ -917,77 +920,18 @@ class qwant{
return trim($text, ". "); return trim($text, ". ");
} }
private function unshitimage($url){ private function unshitimage($url, $is_bing = true){
// https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0 // https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
// https://s2.qwant.com/thumbr/474x289/7/f/412d13b3fe3a03eb2b89633c8e88b609b7d0b93cdd9a5e52db3c663e41e65e/th.jpg?u=https%3A%2F%2Ftse.mm.bing.net%2Fth%3Fid%3DOIP.9Tm_Eo6m7V7ltN19mxduDgHaEh%26pid%3DApi&q=0&b=1&p=0&a=0 parse_str(parse_url($url)["query"], $parts);
$image = parse_url($url); if($is_bing){
$parse = parse_url($parts["u"]);
if( parse_str($parse["query"], $parts);
!isset($image["host"]) ||
!isset($image["query"])
){
// cant do anything return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
return $url;
} }
$id = null; return $parts["u"];
if(
preg_match(
'/s[0-9]+\.qwant\.com$/',
$image["host"]
)
){
parse_str($image["query"], $str);
// we're being served a proxy URL
if(isset($str["u"])){
$bing_url = $str["u"];
}else{
// give up
return $url;
}
}
// parse bing URL
$id = null;
$image = parse_url($bing_url);
if(isset($image["query"])){
parse_str($image["query"], $str);
if(isset($str["id"])){
$id = $str["id"];
}
}
if($id === null){
$id = explode("/th/id/", $image["path"], 2);
if(count($id) !== 2){
// malformed
return $url;
}
$id = $id[1];
}
if(is_array($id)){
// fuck off, let proxy.php deal with it
return $url;
}
return "https://" . $image["host"] . "/th?id=" . rawurlencode($id);
} }
} }

View File

@ -37,7 +37,7 @@ class yandex{
"Accept-Encoding: gzip", "Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5", "Accept-Language: en-US,en;q=0.5",
"DNT: 1", "DNT: 1",
"Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw, "Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
"Referer: https://yandex.com/images/search", "Referer: https://yandex.com/images/search",
"Connection: keep-alive", "Connection: keep-alive",
"Upgrade-Insecure-Requests: 1", "Upgrade-Insecure-Requests: 1",
@ -668,6 +668,7 @@ class yandex{
foreach($json["blocks"] as $block){ foreach($json["blocks"] as $block){
$html .= $block["html"]; $html .= $block["html"];
// get next page // get next page
if( if(
isset($block["params"]["nextPageUrl"]) && isset($block["params"]["nextPageUrl"]) &&

View File

@ -231,10 +231,6 @@ $settings = [
"value" => "pinterest", "value" => "pinterest",
"text" => "Pinterest" "text" => "Pinterest"
], ],
[
"value" => "flickr",
"text" => "Flickr"
],
[ [
"value" => "fivehpx", "value" => "fivehpx",
"text" => "500px" "text" => "500px"

View File

@ -1,45 +1,48 @@
:root{ :root{
--1d2021:#1d2021; --1d2021: #1d2021;
--282828:#282828; --282828: #282828;
--3c3836:#3c3836; --3c3836: #3c3836;
--504945:#504945; --504945: #504945;
/* font */ /* font */
--928374:#928374; --928374: #928374;
--a89984:#c9c5bf; --a89984: #c9c5bf;
--bdae93:#bdae93; --bdae93: #bdae93;
--8ec07c:#8ec07c; --8ec07c: #8ec07c;
--ebdbb2:#ebdbb2; --ebdbb2: #ebdbb2;
} }
body{ body{
padding:15px 4% 40px; padding:15px 4% 40px;
margin:unset; margin:unset;
} }
h1, h2, h3, h4, h5, h6{ h1,h2,h3,h4,h5,h6{
padding:0; padding:0;
margin:0 0 7px 0; margin:0 0 7px 0;
line-height:initial; line-height:initial;
color:var(--bdae93); color:var(--bdae93);
} }
h3, h4, h5, h6{ h3,h4,h5,h6{
margin-bottom:14px; margin-bottom:14px;
} }
/* /*
Web styles Web styles
*/ */
.searchbox input[type="submit"]{ .searchbox input[type="submit"]{
float:right; float:right;
cursor:pointer; cursor:pointer;
padding:0 10px; padding:0 10px;
border-left:1px solid var(--504945); border-left: 1px solid var(--504945);
background:#723c0b; background: #723c0b;
} }
.searchbox input{ .searchbox input{
all:unset; all:unset;
line-height:36px; line-height:36px;
@ -94,6 +97,7 @@ h3, h4, h5, h6{
display:inline-block; display:inline-block;
} }
.tabs .tab.selected{ .tabs .tab.selected{
border-bottom:2px solid #fc92a5; border-bottom:2px solid #fc92a5;
} }
@ -103,7 +107,7 @@ h3, h4, h5, h6{
padding-bottom:12px; padding-bottom:12px;
padding-top:7px; padding-top:7px;
margin-bottom:7px; margin-bottom:7px;
background-color:#232525; background-color:#232525
} }
.filters .filter{ .filters .filter{
@ -166,6 +170,7 @@ h3, h4, h5, h6{
font-size:12px; font-size:12px;
} }
.web .hover{ .web .hover{
display:block; display:block;
text-decoration:none; text-decoration:none;
@ -189,13 +194,16 @@ h3, h4, h5, h6{
color:#9760b1 !important; color:#9760b1 !important;
} }
.web .text-result .greentext{ .web .text-result .greentext{
font-size:14px; font-size:14px;
color:var(--bdae93); color:var(--bdae93);
} }
/* favicon */ /* favicon */
.favicon-dropdown a{ .favicon-dropdown a{
text-decoration:none; text-decoration:none;
color:#d3d0c1; color:#d3d0c1;
@ -204,33 +212,39 @@ h3, h4, h5, h6{
font-size:13px; font-size:13px;
} }
.web .favicon img, .favicon-dropdown img{
.web .favicon img,
.favicon-dropdown img{
margin:3px 7px 0 0; margin:3px 7px 0 0;
height:16px; height:16px;
font-size:12px; font-size:12px;
line-height:16px; line-height:16px;;
display:block; display:block;
text-align:left; text-align:left;
} }
.web .sublinks{ .web .sublinks{
padding:17px 10px; padding:17px 10px;
font-size:15px; font-size:15px;
color:var(--#928374); color:var(--#928374);
} }
.web .text-result .sublinks:last-child{ .web .text-result .sublinks:last-child{
padding-bottom:0; padding-bottom:0;
} }
/* Wikipedia head */ /* Wikipedia head */
.wiki-head{ .wiki-head{
padding:5px; padding:5px;
background-color:#322f2b; background-color: #322f2b
} }
/* /*
Images tab Images tab
*/ */
#images{ #images{
@ -244,14 +258,17 @@ h3, h4, h5, h6{
float:left; float:left;
} }
#images .image .title{ #images .image .title{
white-space:nowrap; white-space:nowrap;
overflow:hidden; overflow:hidden;
margin-bottom:7px; margin-bottom:7px;
font-weight:bold; font-weight:bold;
color:var(--bdae93); color:var(--bdae93);
} }
#popup-status{ #popup-status{
display:none; display:none;
position:fixed; position:fixed;
@ -264,59 +281,43 @@ h3, h4, h5, h6{
} }
/* /*
Settings page Settings page
*/ */
.web .settings-submit a{ .web .settings-submit a{
margin-right:17px; margin-right:17px;
color:#bdae93; color:#bdae93;
} }
/*
Responsive image
*/
@media only screen and (max-width:1454px){
#images .image-wrapper{
width:25%;
}
}
@media only screen and (max-width:1161px){
#images .image-wrapper{
width:25%;
}
}
@media only screen and (max-width:750px){
#images .image-wrapper{
width:50%;
}
}
@media only screen and (max-width:450px){
#images .image-wrapper{
width:100%;
}
}
/* /*
Responsive design Responsive image
*/ */
@media only screen and (max-width: 1454px){ #images .image-wrapper{ width:25%; } }
@media only screen and (max-width: 1161px){ #images .image-wrapper{ width:25%; } }
@media only screen and (max-width: 750px){ #images .image-wrapper{ width:50%; } }
@media only screen and (max-width: 450px){ #images .image-wrapper{ width:100%; } }
@media only screen and (max-width:1550px){
.web .left, /*
Responsive design
*/
@media only screen and (max-width: 1550px){
.web .left,
.searchbox{ .searchbox{
width:60%; width:60%;
} }
} }
@media only screen and (max-width:1100px){ @media only screen and (max-width: 1000px){
.web .left,
.searchbox{
width:100%;
}
} }
.type{ .type{
color:var(--bdae93); color:var(--bdae93);
}
} }