1580 lines
34 KiB
PHP
1580 lines
34 KiB
PHP
<?php
|
|
|
|
class startpage{
|
|
|
|
public function __construct(){
|
|
|
|
include "lib/backend.php";
|
|
$this->backend = new backend("startpage");
|
|
|
|
include "lib/fuckhtml.php";
|
|
$this->fuckhtml = new fuckhtml();
|
|
}
|
|
|
|
public function getfilters($page){
|
|
|
|
switch($page){
|
|
case "web":
|
|
return [
|
|
"country" => [
|
|
"display" => "Country",
|
|
"option" => [
|
|
"any" => "All Regions",
|
|
"es_AR" => "Argentina",
|
|
"en_AU" => "Australia",
|
|
"de_AT" => "Austria",
|
|
"ru_BY" => "Belarus",
|
|
"fr_BE" => "Belgium (FR)",
|
|
"nl_BE" => "Belgium (NL)",
|
|
"bg_BG" => "Bulgaria",
|
|
"en_CA" => "Canada (EN)",
|
|
"fr_CA" => "Canada (FR)",
|
|
"es_CL" => "Chile",
|
|
"es_CO" => "Colombia",
|
|
"cs_CZ" => "Czech Republic",
|
|
"da_DK" => "Denmark",
|
|
"ar_EG" => "Egypt",
|
|
"et_EE" => "Estonia",
|
|
"fi_FI" => "Finland",
|
|
"fr_FR" => "France",
|
|
"de_DE" => "Germany",
|
|
"el_GR" => "Greece",
|
|
"hu_HU" => "Hungary",
|
|
"hi_IN" => "India (HI)",
|
|
"en_IN" => "India (EN)",
|
|
"id_ID" => "Indonesia (ID)",
|
|
"en_ID" => "Indonesia (EN)",
|
|
"en_IE" => "Ireland",
|
|
"it_IT" => "Italy",
|
|
"ja_JP" => "Japan",
|
|
"ko_KR" => "Korea",
|
|
"ms_MY" => "Malaysia (MS)",
|
|
"en_MY" => "Malaysia (EN)",
|
|
"es_MX" => "Mexico",
|
|
"nl_NL" => "Netherlands",
|
|
"en_NZ" => "New Zealand",
|
|
"no_NO" => "Norway",
|
|
"es_PE" => "Peru",
|
|
"fil_PH" => "Philippines (FIL)",
|
|
"en_PH" => "Philippines (EN)",
|
|
"pl_PL" => "Poland",
|
|
"pt_PT" => "Portugal",
|
|
"ro_RO" => "Romania",
|
|
"ru_RU" => "Russia",
|
|
"ms_SG" => "Singapore (MS)",
|
|
"en_SG" => "Singapore (EN)",
|
|
"es_ES" => "Spain (ES)",
|
|
"ca_ES" => "Spain (CA)",
|
|
"sv_SE" => "Sweden",
|
|
"de_CH" => "Switzerland (DE)",
|
|
"fr_CH" => "Switzerland (FR)",
|
|
"it_CH" => "Switzerland (IT)",
|
|
"tr_TR" => "Turkey",
|
|
"uk_UA" => "Ukraine",
|
|
"en_US" => "US (EN)",
|
|
"es_US" => "US (ES)",
|
|
"es_UY" => "Uruguay",
|
|
"es_VE" => "Venezuela",
|
|
"vi_VN" => "Vietnam (VI)",
|
|
"en_VN" => "Vietnam (EN)",
|
|
"en_ZA" => "South Africa"
|
|
]
|
|
],
|
|
"nsfw" => [ // qadf
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes", // qadf=none
|
|
"no" => "No" // qadf=heavy
|
|
]
|
|
],
|
|
"time" => [ // with_date
|
|
"display" => "Time posted",
|
|
"option" => [
|
|
"any" => "Any time",
|
|
"d" => "Past 24 hours",
|
|
"w" => "Past week",
|
|
"m" => "Past month",
|
|
"y" => "Past year",
|
|
]
|
|
],
|
|
"extendedsearch" => [
|
|
// undefined display, so it wont show in frontend
|
|
"option" => [
|
|
"yes" => "Yes",
|
|
"no" => "No"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
|
|
case "images":
|
|
return [
|
|
"nsfw" => [ // qadf
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes", // qadf=none
|
|
"no" => "No" // qadf=heavy
|
|
]
|
|
],
|
|
"size" => [ // flimgsize
|
|
"display" => "Size",
|
|
"option" => [
|
|
"any" => "Any size",
|
|
"Small" => "Small",
|
|
"Medium" => "Medium",
|
|
"Large" => "Large",
|
|
"Wallpaper" => "Wallpaper",
|
|
// from here, image-size-select, var prefix = isz:lt,islt:
|
|
"qsvgs" => "Larger than 400x300",
|
|
"vga" => "Larger than 640x480",
|
|
"svga" => "Larger than 800x600",
|
|
"xga" => "Larger than 1024x768",
|
|
"qsvgs" => "Larger than 400x300",
|
|
"2mp" => "Larger than 2 MP (1600x1200)",
|
|
"4mp" => "Larger than 4 MP (2272x1704)",
|
|
"6mp" => "Larger than 6 MP (2816x2112)",
|
|
"8mp" => "Larger than 8 MP (3264x2448)",
|
|
"10mp" => "Larger than 10 MP (3648x2736)",
|
|
"12mp" => "Larger than 12 MP (4096x3072)",
|
|
"15mp" => "Larger than 15 MP (4480x3360)",
|
|
"20mp" => "Larger than 20 MP (5120x3840)",
|
|
"40mp" => "Larger than 40 MP (7216x5412)",
|
|
"70mp" => "Larger than 70 MP (9600x7200)"
|
|
]
|
|
],
|
|
"color" => [ // flimgcolor
|
|
"display" => "Color",
|
|
"option" => [
|
|
"any" => "Any color",
|
|
// from here, var prefix = ic:
|
|
"color" => "Color only",
|
|
"bnw" => "Black & white", // set to "gray"
|
|
// from here, var prefix = ic:specific,isc:
|
|
"red" => "Red",
|
|
"orange" => "Orange",
|
|
"yellow" => "Yellow",
|
|
"green" => "Green",
|
|
"teal" => "Teal",
|
|
"blue" => "Blue",
|
|
"purple" => "Purple",
|
|
"pink" => "Pink",
|
|
"white" => "White",
|
|
"gray" => "Gray",
|
|
"black" => "Black",
|
|
"brown" => "Brown"
|
|
]
|
|
],
|
|
"type" => [ // flimgtype
|
|
"display" => "Type",
|
|
"option" => [
|
|
"any" => "Any type",
|
|
"AnimatedGif" => "Animated GIF",
|
|
"Clipart" => "Clip Art",
|
|
"Line" => "Line Drawing",
|
|
"Photo" => "Photograph",
|
|
"Transparent" => "Transparent Background"
|
|
]
|
|
],
|
|
"license" => [ // flimglicense
|
|
"display" => "License",
|
|
"option" => [
|
|
"any" => "Any license",
|
|
"p" => "Public domain",
|
|
"s" => "Free to share",
|
|
"sc" => "Free to share commercially",
|
|
"m" => "Free to modify",
|
|
"mc" => "Free to modify commercially"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
|
|
case "videos":
|
|
return [
|
|
"nsfw" => [ // qadf
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes", // qadf=none
|
|
"no" => "No" // qadf=heavy
|
|
]
|
|
],
|
|
"sort" => [
|
|
"display" => "Sort by",
|
|
"option" => [
|
|
"relevance" => "Most relevant",
|
|
"popular" => "Most popular",
|
|
"recent" => "Most recent"
|
|
]
|
|
],
|
|
"duration" => [ // with_duration
|
|
"display" => "Duration",
|
|
"option" => [
|
|
"any" => "Any duration",
|
|
"short" => "Short",
|
|
"medium" => "Medium",
|
|
"long" => "Long"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
|
|
case "news":
|
|
return [
|
|
"nsfw" => [ // qadf
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes", // qadf=none
|
|
"no" => "No" // qadf=heavy
|
|
]
|
|
],
|
|
"time" => [ // with_date
|
|
"display" => "Time posted",
|
|
"option" => [
|
|
"any" => "Any time",
|
|
"d" => "Past 24 hours",
|
|
"w" => "Past week",
|
|
"m" => "Past month"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
|
|
//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEazerbaijaniN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:21:58 GMT; Secure; Path=/
|
|
//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:22:52 GMT; Secure; Path=/
|
|
}
|
|
}
|
|
|
|
private function get($proxy, $url, $get = [], $post = false, $is_xhr = false){
|
|
|
|
$curlproc = curl_init();
|
|
|
|
if($post === true){
|
|
|
|
curl_setopt($curlproc, CURLOPT_POST, true);
|
|
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
|
|
|
}elseif($get !== []){
|
|
|
|
$get = http_build_query($get);
|
|
$url .= "?" . $get;
|
|
}
|
|
|
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
|
|
|
// http2 bypass
|
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
|
|
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
|
|
|
if($is_xhr === true){
|
|
|
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
["User-Agent: " . config::USER_AGENT,
|
|
"Accept: application/json",
|
|
"Accept-Language: en-US,en;q=0.5",
|
|
"Accept-Encoding: gzip",
|
|
"Referer: https://www.startpage.com/",
|
|
"Content-Type: application/json",
|
|
"Content-Length: " . strlen($get),
|
|
"Origin: https://www.startpage.com/",
|
|
"DNT: 1",
|
|
"Connection: keep-alive",
|
|
"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
|
|
"Sec-Fetch-Dest: empty",
|
|
"Sec-Fetch-Mode: cors",
|
|
"Sec-Fetch-Site: same-origin",
|
|
"TE: trailers"]
|
|
);
|
|
|
|
}elseif($post === true){
|
|
|
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
["User-Agent: " . config::USER_AGENT,
|
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
"Accept-Language: en-US,en;q=0.5",
|
|
"Accept-Encoding: gzip",
|
|
"Referer: https://www.startpage.com/",
|
|
"Content-Type: application/x-www-form-urlencoded",
|
|
"Content-Length: " . strlen($get),
|
|
"DNT: 1",
|
|
"Connection: keep-alive",
|
|
"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
|
|
"Upgrade-Insecure-Requests: 1",
|
|
"Sec-Fetch-Dest: document",
|
|
"Sec-Fetch-Mode: navigate",
|
|
"Sec-Fetch-Site: none",
|
|
"Sec-Fetch-User: ?1",
|
|
"Priority: u=0, i",
|
|
"TE: trailers"]
|
|
);
|
|
}else{
|
|
|
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
["User-Agent: " . config::USER_AGENT,
|
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
"Accept-Language: en-US,en;q=0.5",
|
|
"Accept-Encoding: gzip",
|
|
"DNT: 1",
|
|
"Connection: keep-alive",
|
|
"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
|
|
"Sec-Fetch-Dest: document",
|
|
"Sec-Fetch-Mode: navigate",
|
|
"Sec-Fetch-Site: none",
|
|
"Sec-Fetch-User: ?1",
|
|
"Priority: u=0, i",
|
|
"TE: trailers"]
|
|
);
|
|
}
|
|
|
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
|
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
|
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
|
|
|
$this->backend->assign_proxy($curlproc, $proxy);
|
|
|
|
$data = curl_exec($curlproc);
|
|
|
|
if(curl_errno($curlproc)){
|
|
|
|
throw new Exception(curl_error($curlproc));
|
|
}
|
|
|
|
curl_close($curlproc);
|
|
return $data;
|
|
}
|
|
|
|
public function web($get){
|
|
|
|
if($get["npt"]){
|
|
|
|
[$post, $proxy] = $this->backend->get($get["npt"], "web");
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$post,
|
|
true
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
|
|
$get_instant_answer = false;
|
|
|
|
}else{
|
|
|
|
$proxy = $this->backend->get_ip();
|
|
|
|
$params = [
|
|
"query" => $get["s"],
|
|
"cat" => "web",
|
|
"pl" => "opensearch"
|
|
];
|
|
|
|
if($get["nsfw"] == "no"){
|
|
|
|
$params["qadf"] = "heavy";
|
|
$get_instant_answer = false;
|
|
}else{
|
|
|
|
$get_instant_answer = true;
|
|
}
|
|
|
|
if($get["country"] !== "any"){
|
|
|
|
$params["qsr"] = $get["country"];
|
|
}
|
|
|
|
if($get["time"] !== "any"){
|
|
|
|
$params["with_date"] = $get["time"];
|
|
}
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$params
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
|
|
//$html = file_get_contents("scraper/startpage.html");
|
|
}
|
|
|
|
$this->detect_captcha($html);
|
|
|
|
if(
|
|
preg_match(
|
|
'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),?$/m',
|
|
$html,
|
|
$matches
|
|
) === 0
|
|
){
|
|
|
|
throw new Exception("Failed to grep JSON object");
|
|
}
|
|
|
|
$json = json_decode($matches[1], true);
|
|
|
|
if($json === null){
|
|
|
|
throw new Exception("Failed to decode JSON");
|
|
}
|
|
|
|
//print_r($json);
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"spelling" => [
|
|
"type" => "no_correction",
|
|
"using" => null,
|
|
"correction" => null
|
|
],
|
|
"npt" => null,
|
|
"answer" => [],
|
|
"web" => [],
|
|
"image" => [],
|
|
"video" => [],
|
|
"news" => [],
|
|
"related" => []
|
|
];
|
|
|
|
// get npt
|
|
$out["npt"] = $this->parse_npt($json, "web", $proxy);
|
|
|
|
foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
|
|
|
|
if(!isset($category["display_type"])){
|
|
|
|
continue;
|
|
}
|
|
|
|
switch($category["display_type"]){
|
|
|
|
case "web-google":
|
|
foreach($category["results"] as $result){
|
|
|
|
$sublinks = [];
|
|
|
|
foreach($result["siteLinks"] as $sublink){
|
|
|
|
$sublinks[] = [
|
|
"title" => $sublink["title"],
|
|
"description" => null,
|
|
"url" => $sublink["clickUrl"]
|
|
];
|
|
}
|
|
|
|
$description =
|
|
explode(
|
|
"...",
|
|
$this->titledots(
|
|
html_entity_decode(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$result["description"]
|
|
)
|
|
)
|
|
),
|
|
2
|
|
);
|
|
|
|
$date = strtotime(trim($description[0]));
|
|
|
|
if(
|
|
$date === false ||
|
|
count($description) !== 2 ||
|
|
strlen($description[0]) > 14
|
|
){
|
|
|
|
// no date found
|
|
$description =
|
|
implode(
|
|
" ... ",
|
|
$description
|
|
);
|
|
|
|
$date = null;
|
|
}else{
|
|
|
|
// date found
|
|
$description = ltrim($description[1]);
|
|
}
|
|
|
|
$out["web"][] = [
|
|
"title" =>
|
|
$this->titledots(
|
|
html_entity_decode(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$result["title"]
|
|
)
|
|
)
|
|
),
|
|
"description" => $description,
|
|
"url" => $result["clickUrl"],
|
|
"date" => $date,
|
|
"type" => "web",
|
|
"thumb" => [
|
|
"url" => null,
|
|
"ratio" => null
|
|
],
|
|
"sublink" => $sublinks,
|
|
"table" => []
|
|
];
|
|
}
|
|
break;
|
|
|
|
case "images-qi-top":
|
|
foreach($category["results"] as $result){
|
|
|
|
$out["image"][] = [
|
|
"title" =>
|
|
$this->titledots(
|
|
html_entity_decode(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$result["title"]
|
|
)
|
|
)
|
|
),
|
|
"source" => [
|
|
[
|
|
"url" => $result["rawImageUrl"],
|
|
"width" => (int)$result["width"],
|
|
"height" => (int)$result["height"]
|
|
],
|
|
[
|
|
"url" => $this->unshitimage($result["mdThumbnailUrl"]),
|
|
"width" => (int)$result["mdThumbnailWidth"],
|
|
"height" => (int)$result["mdThumbnailHeight"]
|
|
]
|
|
],
|
|
"url" =>
|
|
$result["altClickUrl"]
|
|
];
|
|
}
|
|
break;
|
|
|
|
case "spellsuggest-google":
|
|
$out["spelling"] =
|
|
[
|
|
"type" => "including",
|
|
"using" => $json["render"]["query"],
|
|
"correction" => $category["results"][0]["query"]
|
|
];
|
|
break;
|
|
|
|
case "dictionary-qi":
|
|
foreach($category["results"] as $result){
|
|
|
|
$answer = [
|
|
"title" => $result["word"],
|
|
"description" => [],
|
|
"url" => null,
|
|
"thumb" => null,
|
|
"table" => [],
|
|
"sublink" => []
|
|
];
|
|
|
|
foreach($result["lexical_categories"] as $lexic_type => $definitions){
|
|
|
|
$answer["description"][] = [
|
|
"type" => "title",
|
|
"value" => $lexic_type
|
|
];
|
|
|
|
$i = 0;
|
|
|
|
foreach($definitions as $definition){
|
|
|
|
$text_definition = trim($definition["definition"]);
|
|
$text_example = trim($definition["example"]);
|
|
$text_synonyms = implode(", ", $definition["synonyms"]);
|
|
|
|
if($text_definition != ""){
|
|
|
|
$i++;
|
|
|
|
$c = count($answer["description"]) - 1;
|
|
if(
|
|
$c !== 0 &&
|
|
$answer["description"][$c]["type"] == "text"
|
|
){
|
|
|
|
$answer["description"][$c]["value"] .=
|
|
"\n\n" . $i . ". " . $text_definition;
|
|
|
|
}else{
|
|
|
|
$answer["description"][] = [
|
|
"type" => "text",
|
|
"value" => $i . ". " . $text_definition
|
|
];
|
|
}
|
|
}
|
|
|
|
if($text_example != ""){
|
|
|
|
$answer["description"][] = [
|
|
"type" => "quote",
|
|
"value" => $text_example
|
|
];
|
|
}
|
|
|
|
if($text_synonyms != ""){
|
|
|
|
$answer["description"][] = [
|
|
"type" => "text",
|
|
"value" => "Synonyms: " . $text_synonyms
|
|
];
|
|
}
|
|
}
|
|
}
|
|
|
|
$out["answer"][] = $answer;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// parse instant answers
|
|
if(
|
|
$get["extendedsearch"] == "yes" &&
|
|
$get_instant_answer === true
|
|
){
|
|
|
|
// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
|
|
try{
|
|
$post = [
|
|
"se" => "n0vze2y9dqwy",
|
|
"q" => $json["render"]["query"],
|
|
"results" => [], // populate
|
|
"enableKnowledgePanel" => true,
|
|
"enableMediaThumbBar" => false,
|
|
"enableSearchSuggestions" => false,
|
|
"enableTripadvisorProperties" => [],
|
|
"enableTripadvisorPlaces" => [],
|
|
"enableTripadvisorPlacesForLocations" => [],
|
|
"enableWebProducts" => false,
|
|
"tripadvisorPartnerId" => null,
|
|
"tripadvisorMapColorMode" => "light",
|
|
"tripadvisorDisablesKnowledgePanel" => false,
|
|
"instantAnswers" => [
|
|
"smartAnswers",
|
|
"youtube",
|
|
"tripadvisor"
|
|
],
|
|
"iaType" => null,
|
|
"forceEnhancedKnowledgePanel" => false,
|
|
"shoppingOnly" => false,
|
|
"allowAdultProducts" => true,
|
|
"lang" => "en",
|
|
"browserLang" => "en-US",
|
|
"browserTimezone" => "America/New_York",
|
|
"market" => null,
|
|
"userLocation" => null,
|
|
"userDate" => date("Y-m-d"),
|
|
"userAgentType" => "unknown"
|
|
];
|
|
|
|
foreach($out["web"] as $result){
|
|
|
|
$post["results"][] = [
|
|
"url" => $result["url"],
|
|
"title" => $result["title"]
|
|
];
|
|
}
|
|
|
|
$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
|
|
|
|
$additional_data =
|
|
$this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
|
|
$post,
|
|
true,
|
|
true
|
|
);
|
|
|
|
$additional_data = json_decode($additional_data, true);
|
|
|
|
if($additional_data === null){
|
|
|
|
throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
|
|
}
|
|
|
|
if(!isset($additional_data["knowledgePanel"])){
|
|
|
|
throw new Exception("Response has missing data (knowledgePanel)");
|
|
}
|
|
|
|
$additional_data = $additional_data["knowledgePanel"];
|
|
|
|
$answer = [
|
|
"title" => $additional_data["meta"]["title"],
|
|
"description" => [
|
|
[
|
|
"type" => "quote",
|
|
"value" => $additional_data["meta"]["description"]
|
|
]
|
|
],
|
|
"url" => $additional_data["meta"]["origWikiUrl"],
|
|
"thumb" => $additional_data["meta"]["image"],
|
|
"table" => [],
|
|
"sublink" => []
|
|
];
|
|
|
|
// parse html for instant answer
|
|
$this->fuckhtml->load($additional_data["html"]);
|
|
|
|
$div =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"div"
|
|
);
|
|
|
|
// get description
|
|
$description =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"sx-kp-short-extract sx-kp-short-extract-complete",
|
|
$div
|
|
);
|
|
|
|
if(count($description) !== 0){
|
|
|
|
$answer["description"][] = [
|
|
"type" => "text",
|
|
"value" =>
|
|
html_entity_decode(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$description[0]
|
|
)
|
|
)
|
|
];
|
|
}
|
|
|
|
// get socials
|
|
$socials =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"sx-wiki-social-link",
|
|
"a"
|
|
);
|
|
|
|
foreach($socials as $social){
|
|
|
|
$title =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$social["attributes"]["title"]
|
|
);
|
|
|
|
$url =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$social["attributes"]["href"]
|
|
);
|
|
|
|
switch($title){
|
|
|
|
case "Official Website":
|
|
$title = "Website";
|
|
break;
|
|
}
|
|
|
|
$answer["sublink"][$title] = $url;
|
|
}
|
|
|
|
// get videos
|
|
$videos =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"sx-kp-video-grid-item",
|
|
$div
|
|
);
|
|
|
|
foreach($videos as $video){
|
|
|
|
$this->fuckhtml->load($video);
|
|
|
|
$as =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"a"
|
|
);
|
|
|
|
if(count($as) === 0){
|
|
|
|
// ?? invalid
|
|
continue;
|
|
}
|
|
|
|
$image =
|
|
$this->fuckhtml
|
|
->getElementsByAttributeName(
|
|
"data-sx-src",
|
|
"img"
|
|
);
|
|
|
|
if(count($image) !== 0){
|
|
|
|
$thumb = [
|
|
"ratio" => "16:9",
|
|
"url" =>
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$image[0]["attributes"]["data-sx-src"]
|
|
)
|
|
];
|
|
}else{
|
|
|
|
$thumb = [
|
|
"ratio" => null,
|
|
"url" => null
|
|
];
|
|
}
|
|
|
|
$out["video"][] = [
|
|
"title" =>
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$as[0]["attributes"]["title"]
|
|
),
|
|
"description" => null,
|
|
"date" => null,
|
|
"duration" => null,
|
|
"views" => null,
|
|
"thumb" => $thumb,
|
|
"url" =>
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$as[0]["attributes"]["href"]
|
|
)
|
|
];
|
|
}
|
|
|
|
// reset
|
|
$this->fuckhtml->load($additional_data["html"]);
|
|
|
|
// get table elements
|
|
$table =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"sx-infobox",
|
|
"table"
|
|
);
|
|
|
|
if(count($table) !== 0){
|
|
|
|
$trs =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"tr"
|
|
);
|
|
|
|
foreach($trs as $tr){
|
|
|
|
$this->fuckhtml->load($tr);
|
|
|
|
// ok so startpage devs cant fucking code a table
|
|
// td = content
|
|
// th (AAAHH) = title
|
|
$tds =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"td"
|
|
);
|
|
|
|
$ths =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"th"
|
|
);
|
|
|
|
if(
|
|
count($ths) === 1 &&
|
|
count($tds) === 1
|
|
){
|
|
|
|
$title =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$ths[0]
|
|
);
|
|
|
|
$description = [];
|
|
|
|
$this->fuckhtml->load($tds[0]);
|
|
|
|
$lis =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"li"
|
|
);
|
|
|
|
if(count($lis) !== 0){
|
|
|
|
foreach($lis as $li){
|
|
|
|
$description[] =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$li
|
|
);
|
|
}
|
|
|
|
$description = implode(", ", $description);
|
|
}else{
|
|
|
|
$description =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$tds[0]
|
|
);
|
|
}
|
|
|
|
$answer["table"][$title] = $description;
|
|
}
|
|
}
|
|
}
|
|
|
|
$out["answer"][] = $answer;
|
|
|
|
}catch(Exception $error){
|
|
|
|
// do nothing
|
|
//echo "error!";
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function image($get){
|
|
|
|
if($get["npt"]){
|
|
|
|
[$post, $proxy] = $this->backend->get($get["npt"], "images");
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$post,
|
|
true
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
|
|
}else{
|
|
|
|
$search = $get["s"];
|
|
if(strlen($search) === 0){
|
|
|
|
throw new Exception("Search term is empty!");
|
|
}
|
|
|
|
try{
|
|
|
|
$proxy = $this->backend->get_ip();
|
|
|
|
$params = [
|
|
"query" => $get["s"],
|
|
"cat" => "images",
|
|
"pl" => "opensearch"
|
|
];
|
|
|
|
if($get["nsfw"] == "no"){
|
|
|
|
$params["qadf"] = "heavy";
|
|
}
|
|
|
|
if($get["size"] != "any"){
|
|
|
|
if(
|
|
$get["size"] == "Small" ||
|
|
$get["size"] == "Medium" ||
|
|
$get["size"] == "Large" ||
|
|
$get["size"] == "Wallpaper"
|
|
){
|
|
|
|
$params["flimgsize"] = $get["size"];
|
|
}else{
|
|
|
|
$params["image-size-select"] = "isz:lt,islt:" . $get["size"];
|
|
}
|
|
}
|
|
|
|
if($get["color"] != "any"){
|
|
|
|
if($get["color"] == "color"){
|
|
|
|
$params["flimgcolor"] = "ic:color";
|
|
}elseif($get["color"] == "bnw"){
|
|
|
|
$params["flimgcolor"] = "ic:gray";
|
|
}else{
|
|
|
|
$params["flimgcolor"] = "ic:specific,isc:" . $get["color"];
|
|
}
|
|
}
|
|
|
|
if($get["type"] != "any"){
|
|
|
|
$params["flimgtype"] = $get["type"];
|
|
}
|
|
|
|
if($get["license"] != "any"){
|
|
|
|
$params["flimglicense"] = $get["license"];
|
|
}
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$params
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
//$html = file_get_contents("scraper/startpage.html");
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
}
|
|
|
|
$this->detect_captcha($html);
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"image" => []
|
|
];
|
|
|
|
if(
|
|
preg_match(
|
|
'/React\.createElement\(UIStartpage\.AppSerpImages, ?(.+)\),?$/m',
|
|
$html,
|
|
$matches
|
|
) === 0
|
|
){
|
|
|
|
throw new Exception("Failed to grep JSON object");
|
|
}
|
|
|
|
$json = json_decode($matches[1], true);
|
|
|
|
if($json === null){
|
|
|
|
throw new Exception("Failed to decode JSON object");
|
|
}
|
|
|
|
// get npt
|
|
$out["npt"] = $this->parse_npt($json, "images", $proxy);
|
|
|
|
// get images
|
|
foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
|
|
|
|
if($category["display_type"] != "images-bing"){
|
|
|
|
// ignore ads and !! suggestions !! @todo
|
|
continue;
|
|
}
|
|
|
|
foreach($category["results"] as $image){
|
|
|
|
$out["image"][] = [
|
|
"title" => $this->titledots($image["title"]),
|
|
"source" => [
|
|
[
|
|
"url" => $this->unshitimage($image["clickUrl"]),
|
|
"width" => (int)$image["width"],
|
|
"height" => (int)$image["height"]
|
|
],
|
|
[
|
|
"url" => $this->unshitimage($image["thumbnailUrl"]),
|
|
"width" => (int)$image["thumbnailWidth"],
|
|
"height" => (int)$image["thumbnailHeight"]
|
|
]
|
|
],
|
|
"url" => $image["altClickUrl"]
|
|
];
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function video($get){
|
|
|
|
if($get["npt"]){
|
|
|
|
[$post, $proxy] = $this->backend->get($get["npt"], "videos");
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$post,
|
|
true
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
|
|
}else{
|
|
|
|
$search = $get["s"];
|
|
if(strlen($search) === 0){
|
|
|
|
throw new Exception("Search term is empty!");
|
|
}
|
|
|
|
try{
|
|
|
|
$proxy = $this->backend->get_ip();
|
|
|
|
$params = [
|
|
"query" => $get["s"],
|
|
"cat" => "video",
|
|
"pl" => "opensearch"
|
|
];
|
|
|
|
if($get["nsfw"] == "no"){
|
|
|
|
$params["qadf"] = "heavy";
|
|
}
|
|
|
|
if($get["sort"] != "relevance"){
|
|
|
|
$params["sort_by"] = $get["sort"];
|
|
}
|
|
|
|
if($get["duration"] != "any"){
|
|
|
|
$params["with_duration"] = $get["duration"];
|
|
}
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$params
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
//$html = file_get_contents("scraper/startpage.html");
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
}
|
|
|
|
$this->detect_captcha($html);
|
|
|
|
if(
|
|
preg_match(
|
|
'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),?$/m',
|
|
$html,
|
|
$matches
|
|
) === 0
|
|
){
|
|
|
|
throw new Exception("Failed to get JSON object");
|
|
}
|
|
|
|
$json = json_decode($matches[1], true);
|
|
|
|
if($json === null){
|
|
|
|
throw new Exception("Failed to decode JSON object");
|
|
}
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"video" => [],
|
|
"author" => [],
|
|
"livestream" => [],
|
|
"playlist" => [],
|
|
"reel" => []
|
|
];
|
|
|
|
// get npt
|
|
$out["npt"] = $this->parse_npt($json, "video", $proxy);
|
|
|
|
// get results
|
|
foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
|
|
|
|
if($category["display_type"] == "video-youtube"){
|
|
|
|
foreach($category["results"] as $video){
|
|
|
|
if(
|
|
isset($video["thumbnailUrl"]) &&
|
|
$video["thumbnailUrl"] !== null
|
|
){
|
|
|
|
$thumb = [
|
|
"ratio" => "16:9",
|
|
"url" => $this->unshitimage($video["thumbnailUrl"])
|
|
];
|
|
}else{
|
|
|
|
$thumb = [
|
|
"ratio" => null,
|
|
"url" => null
|
|
];
|
|
}
|
|
|
|
$out["video"][] = [
|
|
"title" => $video["title"],
|
|
"description" => $this->limitstrlen($video["description"]),
|
|
"author" => [
|
|
"name" => $video["channelTitle"],
|
|
"url" => null,
|
|
"avatar" => null
|
|
],
|
|
"date" => strtotime($video["publishDate"]),
|
|
"duration" => $this->hms2int($video["duration"]),
|
|
"views" => (int)$video["viewCount"],
|
|
"thumb" => $thumb,
|
|
"url" => $video["clickUrl"]
|
|
];
|
|
}
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function news($get){
|
|
|
|
if($get["npt"]){
|
|
|
|
[$post, $proxy] = $this->backend->get($get["npt"], "news");
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$post,
|
|
true
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
|
|
}else{
|
|
|
|
$search = $get["s"];
|
|
if(strlen($search) === 0){
|
|
|
|
throw new Exception("Search term is empty!");
|
|
}
|
|
|
|
try{
|
|
|
|
$proxy = $this->backend->get_ip();
|
|
|
|
$params = [
|
|
"query" => $get["s"],
|
|
"cat" => "news",
|
|
"pl" => "opensearch"
|
|
];
|
|
|
|
if($get["nsfw"] == "no"){
|
|
|
|
$params["qadf"] = "heavy";
|
|
}
|
|
|
|
if($get["time"] != "any"){
|
|
|
|
$params["with_date"] = $get["time"];
|
|
}
|
|
|
|
try{
|
|
$html = $this->get(
|
|
$proxy,
|
|
"https://www.startpage.com/sp/search",
|
|
$params
|
|
);
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
//$html = file_get_contents("scraper/startpage.html");
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Failed to fetch search page");
|
|
}
|
|
}
|
|
|
|
$this->detect_captcha($html);
|
|
|
|
if(
|
|
preg_match(
|
|
'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),?$/m',
|
|
$html,
|
|
$matches
|
|
) === 0
|
|
){
|
|
|
|
throw new Exception("Failed to get JSON object");
|
|
}
|
|
|
|
$json = json_decode($matches[1], true);
|
|
|
|
if($json === null){
|
|
|
|
throw new Exception("Failed to decode JSON object");
|
|
}
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"news" => []
|
|
];
|
|
|
|
// get npt
|
|
$out["npt"] = $this->parse_npt($json, "news", $proxy);
|
|
|
|
foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
|
|
|
|
if($category["display_type"] != "news-bing"){
|
|
|
|
// unsupported category
|
|
continue;
|
|
}
|
|
|
|
foreach($category["results"] as $news){
|
|
|
|
if(
|
|
isset($news["thumbnailUrl"]) &&
|
|
$news["thumbnailUrl"] !== null
|
|
){
|
|
|
|
$thumb = [
|
|
"ratio" => "16:9",
|
|
"url" => $this->unshitimage($news["thumbnailUrl"])
|
|
];
|
|
}else{
|
|
|
|
$thumb = [
|
|
"ratio" => null,
|
|
"url" => null
|
|
];
|
|
}
|
|
|
|
$out["news"][] = [
|
|
"title" => $this->titledots($this->remove_penguins($news["title"])),
|
|
"author" => $news["source"],
|
|
"description" => $this->titledots($this->remove_penguins($news["description"])),
|
|
"date" => (int)substr((string)$news["date"], 0, -3),
|
|
"thumb" => $thumb,
|
|
"url" => $news["clickUrl"]
|
|
];
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
private function parse_npt($json, $pagetype, $proxy){
|
|
|
|
foreach($json["render"]["presenter"]["pagination"]["pages"] as $page){
|
|
|
|
if($page["name"] == "Next"){
|
|
|
|
parse_str(
|
|
explode(
|
|
"?",
|
|
$page["url"],
|
|
2
|
|
)[1],
|
|
$str
|
|
);
|
|
|
|
return
|
|
$this->backend->store(
|
|
http_build_query(
|
|
[
|
|
"lui" => "english",
|
|
"language" => "english",
|
|
"query" => $str["q"],
|
|
"cat" => $pagetype,
|
|
"sc" => $str["sc"],
|
|
"t" => "device",
|
|
"segment" => "startpage.udog",
|
|
"page" => $str["page"]
|
|
]
|
|
),
|
|
$pagetype,
|
|
$proxy
|
|
);
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private function unshitimage($url){
|
|
|
|
$query = parse_url($url, PHP_URL_QUERY);
|
|
parse_str($query, $query);
|
|
|
|
if(isset($query["piurl"])){
|
|
|
|
if(strpos($query["piurl"], "gstatic.com/")){
|
|
|
|
return
|
|
explode(
|
|
"&",
|
|
$query["piurl"],
|
|
2
|
|
)[0];
|
|
}
|
|
|
|
if(
|
|
strpos($query["piurl"], "bing.net/") ||
|
|
strpos($query["piurl"], "bing.com/")
|
|
){
|
|
|
|
return
|
|
explode(
|
|
"&",
|
|
$query["piurl"],
|
|
2
|
|
)[0];
|
|
}
|
|
|
|
return $query["piurl"];
|
|
}
|
|
|
|
return $url;
|
|
}
|
|
|
|
private function limitstrlen($text){
|
|
|
|
return
|
|
explode(
|
|
"\n",
|
|
wordwrap(
|
|
str_replace(
|
|
["\n\r", "\r\n", "\n", "\r"],
|
|
" ",
|
|
$text
|
|
),
|
|
300,
|
|
"\n"
|
|
),
|
|
2
|
|
)[0];
|
|
}
|
|
|
|
private function titledots($title){
|
|
|
|
return trim($title, " .\t\n\r\0\x0B…");
|
|
}
|
|
|
|
private function hms2int($time){
|
|
|
|
$parts = explode(":", $time, 3);
|
|
$time = 0;
|
|
|
|
if(count($parts) === 3){
|
|
|
|
// hours
|
|
$time = $time + ((int)$parts[0] * 3600);
|
|
array_shift($parts);
|
|
}
|
|
|
|
if(count($parts) === 2){
|
|
|
|
// minutes
|
|
$time = $time + ((int)$parts[0] * 60);
|
|
array_shift($parts);
|
|
}
|
|
|
|
// seconds
|
|
$time = $time + (int)$parts[0];
|
|
|
|
return $time;
|
|
}
|
|
|
|
private function remove_penguins($text){
|
|
|
|
return str_replace(
|
|
["", ""],
|
|
"",
|
|
$text
|
|
);
|
|
}
|
|
|
|
private function detect_captcha($html){
|
|
|
|
$this->fuckhtml->load($html);
|
|
|
|
$title =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"title"
|
|
);
|
|
|
|
if(
|
|
count($title) !== 0 &&
|
|
$title[0]["innerHTML"] == "Redirecting..."
|
|
){
|
|
|
|
// check if it's a captcha
|
|
$as =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"a"
|
|
);
|
|
|
|
foreach($as as $a){
|
|
|
|
if(
|
|
strpos(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$a["innerHTML"]
|
|
),
|
|
"https://www.startpage.com/sp/captcha"
|
|
) !== false
|
|
){
|
|
|
|
throw new Exception("Startpage returned a captcha");
|
|
}
|
|
}
|
|
|
|
throw new Exception("Startpage redirected the scraper to an unhandled page");
|
|
}
|
|
}
|
|
}
|