forked from lolcat/4get
		
	
		
			
				
	
	
		
			1580 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1580 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
class startpage{
 | 
						|
	
 | 
						|
	public function __construct(){
 | 
						|
		
 | 
						|
		include "lib/backend.php";
 | 
						|
		$this->backend = new backend("startpage");
 | 
						|
		
 | 
						|
		include "lib/fuckhtml.php";
 | 
						|
		$this->fuckhtml = new fuckhtml();
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function getfilters($page){
 | 
						|
		
 | 
						|
		switch($page){
 | 
						|
			case "web":
 | 
						|
				return [
 | 
						|
					"country" => [
 | 
						|
						"display" => "Country",
 | 
						|
						"option" => [
 | 
						|
							"any" => "All Regions",
 | 
						|
							"es_AR" => "Argentina",
 | 
						|
							"en_AU" => "Australia",
 | 
						|
							"de_AT" => "Austria",
 | 
						|
							"ru_BY" => "Belarus",
 | 
						|
							"fr_BE" => "Belgium (FR)",
 | 
						|
							"nl_BE" => "Belgium (NL)",
 | 
						|
							"bg_BG" => "Bulgaria",
 | 
						|
							"en_CA" => "Canada (EN)",
 | 
						|
							"fr_CA" => "Canada (FR)",
 | 
						|
							"es_CL" => "Chile",
 | 
						|
							"es_CO" => "Colombia",
 | 
						|
							"cs_CZ" => "Czech Republic",
 | 
						|
							"da_DK" => "Denmark",
 | 
						|
							"ar_EG" => "Egypt",
 | 
						|
							"et_EE" => "Estonia",
 | 
						|
							"fi_FI" => "Finland",
 | 
						|
							"fr_FR" => "France",
 | 
						|
							"de_DE" => "Germany",
 | 
						|
							"el_GR" => "Greece",
 | 
						|
							"hu_HU" => "Hungary",
 | 
						|
							"hi_IN" => "India (HI)",
 | 
						|
							"en_IN" => "India (EN)",
 | 
						|
							"id_ID" => "Indonesia (ID)",
 | 
						|
							"en_ID" => "Indonesia (EN)",
 | 
						|
							"en_IE" => "Ireland",
 | 
						|
							"it_IT" => "Italy",
 | 
						|
							"ja_JP" => "Japan",
 | 
						|
							"ko_KR" => "Korea",
 | 
						|
							"ms_MY" => "Malaysia (MS)",
 | 
						|
							"en_MY" => "Malaysia (EN)",
 | 
						|
							"es_MX" => "Mexico",
 | 
						|
							"nl_NL" => "Netherlands",
 | 
						|
							"en_NZ" => "New Zealand",
 | 
						|
							"no_NO" => "Norway",
 | 
						|
							"es_PE" => "Peru",
 | 
						|
							"fil_PH" => "Philippines (FIL)",
 | 
						|
							"en_PH" => "Philippines (EN)",
 | 
						|
							"pl_PL" => "Poland",
 | 
						|
							"pt_PT" => "Portugal",
 | 
						|
							"ro_RO" => "Romania",
 | 
						|
							"ru_RU" => "Russia",
 | 
						|
							"ms_SG" => "Singapore (MS)",
 | 
						|
							"en_SG" => "Singapore (EN)",
 | 
						|
							"es_ES" => "Spain (ES)",
 | 
						|
							"ca_ES" => "Spain (CA)",
 | 
						|
							"sv_SE" => "Sweden",
 | 
						|
							"de_CH" => "Switzerland (DE)",
 | 
						|
							"fr_CH" => "Switzerland (FR)",
 | 
						|
							"it_CH" => "Switzerland (IT)",
 | 
						|
							"tr_TR" => "Turkey",
 | 
						|
							"uk_UA" => "Ukraine",
 | 
						|
							"en_US" => "US (EN)",
 | 
						|
							"es_US" => "US (ES)",
 | 
						|
							"es_UY" => "Uruguay",
 | 
						|
							"es_VE" => "Venezuela",
 | 
						|
							"vi_VN" => "Vietnam (VI)",
 | 
						|
							"en_VN" => "Vietnam (EN)",
 | 
						|
							"en_ZA" => "South Africa"
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"nsfw" => [ // qadf
 | 
						|
						"display" => "NSFW",
 | 
						|
						"option" => [
 | 
						|
							"yes" => "Yes", // qadf=none
 | 
						|
							"no" => "No" // qadf=heavy
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"time" => [ // with_date
 | 
						|
						"display" => "Time posted",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any time",
 | 
						|
							"d" => "Past 24 hours",
 | 
						|
							"w" => "Past week",
 | 
						|
							"m" => "Past month",
 | 
						|
							"y" => "Past year",
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"extendedsearch" => [
 | 
						|
						// undefined display, so it wont show in frontend
 | 
						|
						"option" => [
 | 
						|
							"yes" => "Yes",
 | 
						|
							"no" => "No"
 | 
						|
						]
 | 
						|
					]
 | 
						|
				];
 | 
						|
				break;
 | 
						|
			
 | 
						|
			case "images":
 | 
						|
				return [
 | 
						|
					"nsfw" => [ // qadf
 | 
						|
						"display" => "NSFW",
 | 
						|
						"option" => [
 | 
						|
							"yes" => "Yes", // qadf=none
 | 
						|
							"no" => "No" // qadf=heavy
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"size" => [ // flimgsize
 | 
						|
						"display" => "Size",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any size",
 | 
						|
							"Small" => "Small",
 | 
						|
							"Medium" => "Medium",
 | 
						|
							"Large" => "Large",
 | 
						|
							"Wallpaper" => "Wallpaper",
 | 
						|
							// from here, image-size-select, var prefix = isz:lt,islt:
 | 
						|
							"qsvgs" => "Larger than 400x300",
 | 
						|
							"vga" => "Larger than 640x480",
 | 
						|
							"svga" => "Larger than 800x600",
 | 
						|
							"xga" => "Larger than 1024x768",
 | 
						|
							"qsvgs" => "Larger than 400x300",
 | 
						|
							"2mp" => "Larger than 2 MP (1600x1200)",
 | 
						|
							"4mp" => "Larger than 4 MP (2272x1704)",
 | 
						|
							"6mp" => "Larger than 6 MP (2816x2112)",
 | 
						|
							"8mp" => "Larger than 8 MP (3264x2448)",
 | 
						|
							"10mp" => "Larger than 10 MP (3648x2736)",
 | 
						|
							"12mp" => "Larger than 12 MP (4096x3072)",
 | 
						|
							"15mp" => "Larger than 15 MP (4480x3360)",
 | 
						|
							"20mp" => "Larger than 20 MP (5120x3840)",
 | 
						|
							"40mp" => "Larger than 40 MP (7216x5412)",
 | 
						|
							"70mp" => "Larger than 70 MP (9600x7200)"
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"color" => [ // flimgcolor
 | 
						|
						"display" => "Color",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any color",
 | 
						|
							// from here, var prefix = ic:
 | 
						|
							"color" => "Color only",
 | 
						|
							"bnw" => "Black & white", // set to "gray"
 | 
						|
							// from here, var prefix = ic:specific,isc:
 | 
						|
							"red" => "Red",
 | 
						|
							"orange" => "Orange",
 | 
						|
							"yellow" => "Yellow",
 | 
						|
							"green" => "Green",
 | 
						|
							"teal" => "Teal",
 | 
						|
							"blue" => "Blue",
 | 
						|
							"purple" => "Purple",
 | 
						|
							"pink" => "Pink",
 | 
						|
							"white" => "White",
 | 
						|
							"gray" => "Gray",
 | 
						|
							"black" => "Black",
 | 
						|
							"brown" => "Brown"
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"type" => [ // flimgtype
 | 
						|
						"display" => "Type",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any type",
 | 
						|
							"AnimatedGif" => "Animated GIF",
 | 
						|
							"Clipart" => "Clip Art",
 | 
						|
							"Line" => "Line Drawing",
 | 
						|
							"Photo" => "Photograph",
 | 
						|
							"Transparent" => "Transparent Background"
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"license" => [ // flimglicense
 | 
						|
						"display" => "License",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any license",
 | 
						|
							"p" => "Public domain",
 | 
						|
							"s" => "Free to share",
 | 
						|
							"sc" => "Free to share commercially",
 | 
						|
							"m" => "Free to modify",
 | 
						|
							"mc" => "Free to modify commercially"
 | 
						|
						]
 | 
						|
					]
 | 
						|
				];
 | 
						|
				break;
 | 
						|
			
 | 
						|
			case "videos":
 | 
						|
				return [
 | 
						|
					"nsfw" => [ // qadf
 | 
						|
						"display" => "NSFW",
 | 
						|
						"option" => [
 | 
						|
							"yes" => "Yes", // qadf=none
 | 
						|
							"no" => "No" // qadf=heavy
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"sort" => [
 | 
						|
						"display" => "Sort by",
 | 
						|
						"option" => [
 | 
						|
							"relevance" => "Most relevant",
 | 
						|
							"popular" => "Most popular",
 | 
						|
							"recent" => "Most recent"
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"duration" => [ // with_duration
 | 
						|
						"display" => "Duration",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any duration",
 | 
						|
							"short" => "Short",
 | 
						|
							"medium" => "Medium",
 | 
						|
							"long" => "Long"
 | 
						|
						]
 | 
						|
					]
 | 
						|
				];
 | 
						|
				break;
 | 
						|
			
 | 
						|
			case "news":
 | 
						|
				return [
 | 
						|
					"nsfw" => [ // qadf
 | 
						|
						"display" => "NSFW",
 | 
						|
						"option" => [
 | 
						|
							"yes" => "Yes", // qadf=none
 | 
						|
							"no" => "No" // qadf=heavy
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"time" => [ // with_date
 | 
						|
						"display" => "Time posted",
 | 
						|
						"option" => [
 | 
						|
							"any" => "Any time",
 | 
						|
							"d" => "Past 24 hours",
 | 
						|
							"w" => "Past week",
 | 
						|
							"m" => "Past month"
 | 
						|
						]
 | 
						|
					]
 | 
						|
				];
 | 
						|
				break;
 | 
						|
				
 | 
						|
				//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEazerbaijaniN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:21:58 GMT; Secure; Path=/
 | 
						|
				//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:22:52 GMT; Secure; Path=/
 | 
						|
		}
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function get($proxy, $url, $get = [], $post = false, $is_xhr = false){
 | 
						|
		
 | 
						|
		$curlproc = curl_init();
 | 
						|
		
 | 
						|
		if($post === true){
 | 
						|
			
 | 
						|
			curl_setopt($curlproc, CURLOPT_POST, true);
 | 
						|
			curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
 | 
						|
			
 | 
						|
		}elseif($get !== []){
 | 
						|
			
 | 
						|
			$get = http_build_query($get);
 | 
						|
			$url .= "?" . $get;
 | 
						|
		}
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_URL, $url);
 | 
						|
		
 | 
						|
		// http2 bypass
 | 
						|
		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
 | 
						|
		
 | 
						|
		if($is_xhr === true){
 | 
						|
			
 | 
						|
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | 
						|
				["User-Agent: " . config::USER_AGENT,
 | 
						|
				"Accept: application/json",
 | 
						|
				"Accept-Language: en-US,en;q=0.5",
 | 
						|
				"Accept-Encoding: gzip",
 | 
						|
				"Referer: https://www.startpage.com/",
 | 
						|
				"Content-Type: application/json",
 | 
						|
				"Content-Length: " . strlen($get),
 | 
						|
				"Origin: https://www.startpage.com/",
 | 
						|
				"DNT: 1",
 | 
						|
				"Connection: keep-alive",
 | 
						|
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | 
						|
				"Sec-Fetch-Dest: empty",
 | 
						|
				"Sec-Fetch-Mode: cors",
 | 
						|
				"Sec-Fetch-Site: same-origin",
 | 
						|
				"TE: trailers"]
 | 
						|
			);
 | 
						|
			
 | 
						|
		}elseif($post === true){
 | 
						|
			
 | 
						|
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | 
						|
				["User-Agent: " . config::USER_AGENT,
 | 
						|
				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 | 
						|
				"Accept-Language: en-US,en;q=0.5",
 | 
						|
				"Accept-Encoding: gzip",
 | 
						|
				"Referer: https://www.startpage.com/",
 | 
						|
				"Content-Type: application/x-www-form-urlencoded",
 | 
						|
				"Content-Length: " . strlen($get),
 | 
						|
				"DNT: 1",
 | 
						|
				"Connection: keep-alive",
 | 
						|
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | 
						|
				"Upgrade-Insecure-Requests: 1",
 | 
						|
				"Sec-Fetch-Dest: document",
 | 
						|
				"Sec-Fetch-Mode: navigate",
 | 
						|
				"Sec-Fetch-Site: none",
 | 
						|
				"Sec-Fetch-User: ?1",
 | 
						|
				"Priority: u=0, i",
 | 
						|
				"TE: trailers"]
 | 
						|
			);
 | 
						|
		}else{
 | 
						|
			
 | 
						|
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | 
						|
				["User-Agent: " . config::USER_AGENT,
 | 
						|
				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 | 
						|
				"Accept-Language: en-US,en;q=0.5",
 | 
						|
				"Accept-Encoding: gzip",
 | 
						|
				"DNT: 1",
 | 
						|
				"Connection: keep-alive",
 | 
						|
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | 
						|
				"Sec-Fetch-Dest: document",
 | 
						|
				"Sec-Fetch-Mode: navigate",
 | 
						|
				"Sec-Fetch-Site: none",
 | 
						|
				"Sec-Fetch-User: ?1",
 | 
						|
				"Priority: u=0, i",
 | 
						|
				"TE: trailers"]
 | 
						|
			);
 | 
						|
		}
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
 | 
						|
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
 | 
						|
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
 | 
						|
		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
 | 
						|
		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
 | 
						|
		
 | 
						|
		$this->backend->assign_proxy($curlproc, $proxy);
 | 
						|
		
 | 
						|
		$data = curl_exec($curlproc);
 | 
						|
		
 | 
						|
		if(curl_errno($curlproc)){
 | 
						|
			
 | 
						|
			throw new Exception(curl_error($curlproc));
 | 
						|
		}
 | 
						|
		
 | 
						|
		curl_close($curlproc);
 | 
						|
		return $data;
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function web($get){
 | 
						|
		
 | 
						|
		if($get["npt"]){
 | 
						|
			
 | 
						|
			[$post, $proxy] = $this->backend->get($get["npt"], "web");
 | 
						|
			
 | 
						|
			try{
 | 
						|
				$html = $this->get(
 | 
						|
					$proxy,
 | 
						|
					"https://www.startpage.com/sp/search",
 | 
						|
					$post,
 | 
						|
					true
 | 
						|
				);
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
			
 | 
						|
			$get_instant_answer = false;
 | 
						|
			
 | 
						|
		}else{
 | 
						|
			
 | 
						|
			$proxy = $this->backend->get_ip();
 | 
						|
			
 | 
						|
			$params = [
 | 
						|
				"query" => $get["s"],
 | 
						|
				"cat" => "web",
 | 
						|
				"pl" => "opensearch"
 | 
						|
			];
 | 
						|
			
 | 
						|
			if($get["nsfw"] == "no"){
 | 
						|
				
 | 
						|
				$params["qadf"] = "heavy";
 | 
						|
				$get_instant_answer = false;
 | 
						|
			}else{
 | 
						|
				
 | 
						|
				$get_instant_answer = true;
 | 
						|
			}
 | 
						|
			
 | 
						|
			if($get["country"] !== "any"){
 | 
						|
				
 | 
						|
				$params["qsr"] = $get["country"];
 | 
						|
			}
 | 
						|
			
 | 
						|
			if($get["time"] !== "any"){
 | 
						|
				
 | 
						|
				$params["with_date"] = $get["time"];
 | 
						|
			}
 | 
						|
			
 | 
						|
			try{
 | 
						|
				$html = $this->get(
 | 
						|
					$proxy,
 | 
						|
					"https://www.startpage.com/sp/search",
 | 
						|
					$params
 | 
						|
				);
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
			
 | 
						|
			//$html = file_get_contents("scraper/startpage.html");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$this->detect_captcha($html);
 | 
						|
		
 | 
						|
		if(
 | 
						|
			preg_match(
 | 
						|
				'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),?$/m',
 | 
						|
				$html,
 | 
						|
				$matches
 | 
						|
			) === 0
 | 
						|
		){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to grep JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$json = json_decode($matches[1], true);
 | 
						|
		
 | 
						|
		if($json === null){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to decode JSON");
 | 
						|
		}
 | 
						|
		
 | 
						|
		//print_r($json);
 | 
						|
		
 | 
						|
		$out = [
 | 
						|
			"status" => "ok",
 | 
						|
			"spelling" => [
 | 
						|
				"type" => "no_correction",
 | 
						|
				"using" => null,
 | 
						|
				"correction" => null
 | 
						|
			],
 | 
						|
			"npt" => null,
 | 
						|
			"answer" => [],
 | 
						|
			"web" => [],
 | 
						|
			"image" => [],
 | 
						|
			"video" => [],
 | 
						|
			"news" => [],
 | 
						|
			"related" => []
 | 
						|
		];
 | 
						|
		
 | 
						|
		// get npt
 | 
						|
		$out["npt"] = $this->parse_npt($json, "web", $proxy);
 | 
						|
		
 | 
						|
		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | 
						|
			
 | 
						|
			if(!isset($category["display_type"])){
 | 
						|
				
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
			
 | 
						|
			switch($category["display_type"]){
 | 
						|
				
 | 
						|
				case "web-google":
 | 
						|
					foreach($category["results"] as $result){
 | 
						|
						
 | 
						|
						$sublinks = [];
 | 
						|
						
 | 
						|
						foreach($result["siteLinks"] as $sublink){
 | 
						|
							
 | 
						|
							$sublinks[] = [
 | 
						|
								"title" => $sublink["title"],
 | 
						|
								"description" => null,
 | 
						|
								"url" => $sublink["clickUrl"]
 | 
						|
							];
 | 
						|
						}
 | 
						|
						
 | 
						|
						$description =
 | 
						|
							explode(
 | 
						|
								"...",
 | 
						|
								$this->titledots(
 | 
						|
									html_entity_decode(
 | 
						|
										$this->fuckhtml
 | 
						|
										->getTextContent(
 | 
						|
											$result["description"]
 | 
						|
										)
 | 
						|
									)
 | 
						|
								),
 | 
						|
								2
 | 
						|
							);
 | 
						|
						
 | 
						|
						$date = strtotime(trim($description[0]));
 | 
						|
						
 | 
						|
						if(
 | 
						|
							$date === false ||
 | 
						|
							count($description) !== 2 ||
 | 
						|
							strlen($description[0]) > 14
 | 
						|
						){
 | 
						|
							
 | 
						|
							// no date found
 | 
						|
							$description =
 | 
						|
								implode(
 | 
						|
									" ... ",
 | 
						|
									$description
 | 
						|
								);
 | 
						|
							
 | 
						|
							$date = null;
 | 
						|
						}else{
 | 
						|
							
 | 
						|
							// date found
 | 
						|
							$description = ltrim($description[1]);
 | 
						|
						}
 | 
						|
						
 | 
						|
						$out["web"][] = [
 | 
						|
							"title" =>
 | 
						|
								$this->titledots(
 | 
						|
									html_entity_decode(
 | 
						|
										$this->fuckhtml
 | 
						|
										->getTextContent(
 | 
						|
											$result["title"]
 | 
						|
										)
 | 
						|
									)
 | 
						|
								),
 | 
						|
							"description" => $description,
 | 
						|
							"url" => $result["clickUrl"],
 | 
						|
							"date" => $date,
 | 
						|
							"type" => "web",
 | 
						|
							"thumb" => [
 | 
						|
								"url" => null,
 | 
						|
								"ratio" => null
 | 
						|
							],
 | 
						|
							"sublink" => $sublinks,
 | 
						|
							"table" => []
 | 
						|
						];
 | 
						|
					}
 | 
						|
					break;
 | 
						|
				
 | 
						|
				case "images-qi-top":
 | 
						|
					foreach($category["results"] as $result){
 | 
						|
						
 | 
						|
						$out["image"][] = [
 | 
						|
							"title" =>
 | 
						|
								$this->titledots(
 | 
						|
									html_entity_decode(
 | 
						|
										$this->fuckhtml
 | 
						|
										->getTextContent(
 | 
						|
											$result["title"]
 | 
						|
										)
 | 
						|
									)
 | 
						|
								),
 | 
						|
							"source" => [
 | 
						|
								[
 | 
						|
									"url" => $result["rawImageUrl"],
 | 
						|
									"width" => (int)$result["width"],	
 | 
						|
									"height" => (int)$result["height"]
 | 
						|
								],
 | 
						|
								[
 | 
						|
									"url" => $this->unshitimage($result["mdThumbnailUrl"]),
 | 
						|
									"width" => (int)$result["mdThumbnailWidth"],
 | 
						|
									"height" => (int)$result["mdThumbnailHeight"]
 | 
						|
								]
 | 
						|
							],
 | 
						|
							"url" =>
 | 
						|
								$result["altClickUrl"]
 | 
						|
						];
 | 
						|
					}
 | 
						|
					break;
 | 
						|
				
 | 
						|
				case "spellsuggest-google":
 | 
						|
					$out["spelling"] =
 | 
						|
						[
 | 
						|
							"type" => "including",
 | 
						|
							"using" => $json["render"]["query"],
 | 
						|
							"correction" => $category["results"][0]["query"]
 | 
						|
						];
 | 
						|
					break;
 | 
						|
				
 | 
						|
				case "dictionary-qi":
 | 
						|
					foreach($category["results"] as $result){
 | 
						|
						
 | 
						|
						$answer = [
 | 
						|
							"title" => $result["word"],
 | 
						|
							"description" => [],
 | 
						|
							"url" => null,
 | 
						|
							"thumb" => null,
 | 
						|
							"table" => [],
 | 
						|
							"sublink" => []
 | 
						|
						];
 | 
						|
						
 | 
						|
						foreach($result["lexical_categories"] as $lexic_type => $definitions){
 | 
						|
							
 | 
						|
							$answer["description"][] = [
 | 
						|
								"type" => "title",
 | 
						|
								"value" => $lexic_type
 | 
						|
							];
 | 
						|
							
 | 
						|
							$i = 0;
 | 
						|
							
 | 
						|
							foreach($definitions as $definition){
 | 
						|
								
 | 
						|
								$text_definition = trim($definition["definition"]);
 | 
						|
								$text_example = trim($definition["example"]);
 | 
						|
								$text_synonyms = implode(", ", $definition["synonyms"]);
 | 
						|
								
 | 
						|
								if($text_definition != ""){
 | 
						|
									
 | 
						|
									$i++;
 | 
						|
									
 | 
						|
									$c = count($answer["description"]) - 1;
 | 
						|
									if(
 | 
						|
										$c !== 0 &&
 | 
						|
										$answer["description"][$c]["type"] == "text"
 | 
						|
									){
 | 
						|
										
 | 
						|
										$answer["description"][$c]["value"] .=
 | 
						|
											"\n\n" . $i . ". " . $text_definition;
 | 
						|
										
 | 
						|
									}else{
 | 
						|
										
 | 
						|
										$answer["description"][] = [
 | 
						|
											"type" => "text",
 | 
						|
											"value" => $i . ". " . $text_definition
 | 
						|
										];
 | 
						|
									}
 | 
						|
								}
 | 
						|
								
 | 
						|
								if($text_example != ""){
 | 
						|
									
 | 
						|
									$answer["description"][] = [
 | 
						|
										"type" => "quote",
 | 
						|
										"value" => $text_example
 | 
						|
									];
 | 
						|
								}
 | 
						|
								
 | 
						|
								if($text_synonyms != ""){
 | 
						|
									
 | 
						|
									$answer["description"][] = [
 | 
						|
										"type" => "text",
 | 
						|
										"value" => "Synonyms: " . $text_synonyms
 | 
						|
									];
 | 
						|
								}
 | 
						|
							}
 | 
						|
						}
 | 
						|
						
 | 
						|
						$out["answer"][] = $answer;
 | 
						|
					}
 | 
						|
					break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		// parse instant answers
 | 
						|
		if(
 | 
						|
			$get["extendedsearch"] == "yes" &&
 | 
						|
			$get_instant_answer === true
 | 
						|
		){
 | 
						|
			
 | 
						|
			// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
 | 
						|
			try{
 | 
						|
				$post = [
 | 
						|
					"se" => "n0vze2y9dqwy",
 | 
						|
					"q" => $json["render"]["query"],
 | 
						|
					"results" => [], // populate
 | 
						|
					"enableKnowledgePanel" => true,
 | 
						|
					"enableMediaThumbBar" => false,
 | 
						|
					"enableSearchSuggestions" => false,
 | 
						|
					"enableTripadvisorProperties" => [],
 | 
						|
					"enableTripadvisorPlaces" => [],
 | 
						|
					"enableTripadvisorPlacesForLocations" => [],
 | 
						|
					"enableWebProducts" => false,
 | 
						|
					"tripadvisorPartnerId" => null,
 | 
						|
					"tripadvisorMapColorMode" => "light",
 | 
						|
					"tripadvisorDisablesKnowledgePanel" => false,
 | 
						|
					"instantAnswers" => [
 | 
						|
						"smartAnswers",
 | 
						|
						"youtube",
 | 
						|
						"tripadvisor"
 | 
						|
					],
 | 
						|
					"iaType" => null,
 | 
						|
					"forceEnhancedKnowledgePanel" => false,
 | 
						|
					"shoppingOnly" => false,
 | 
						|
					"allowAdultProducts" => true,
 | 
						|
					"lang" => "en",
 | 
						|
					"browserLang" => "en-US",
 | 
						|
					"browserTimezone" => "America/New_York",
 | 
						|
					"market" => null,
 | 
						|
					"userLocation" => null,
 | 
						|
					"userDate" => date("Y-m-d"),
 | 
						|
					"userAgentType" => "unknown"
 | 
						|
				];
 | 
						|
				
 | 
						|
				foreach($out["web"] as $result){
 | 
						|
					
 | 
						|
					$post["results"][] = [
 | 
						|
						"url" => $result["url"],
 | 
						|
						"title" => $result["title"]
 | 
						|
					];
 | 
						|
				}
 | 
						|
				
 | 
						|
				$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
 | 
						|
				
 | 
						|
				$additional_data =
 | 
						|
					$this->get(
 | 
						|
						$proxy,
 | 
						|
						"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
 | 
						|
						$post,
 | 
						|
						true,
 | 
						|
						true
 | 
						|
					);
 | 
						|
				
 | 
						|
				$additional_data = json_decode($additional_data, true);
 | 
						|
				
 | 
						|
				if($additional_data === null){
 | 
						|
					
 | 
						|
					throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
 | 
						|
				}
 | 
						|
				
 | 
						|
				if(!isset($additional_data["knowledgePanel"])){
 | 
						|
					
 | 
						|
					throw new Exception("Response has missing data (knowledgePanel)");
 | 
						|
				}
 | 
						|
				
 | 
						|
				$additional_data = $additional_data["knowledgePanel"];
 | 
						|
				
 | 
						|
				$answer = [
 | 
						|
					"title" => $additional_data["meta"]["title"],
 | 
						|
					"description" => [
 | 
						|
						[
 | 
						|
							"type" => "quote",
 | 
						|
							"value" => $additional_data["meta"]["description"]
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"url" => $additional_data["meta"]["origWikiUrl"],
 | 
						|
					"thumb" => $additional_data["meta"]["image"],
 | 
						|
					"table" => [],
 | 
						|
					"sublink" => []
 | 
						|
				];
 | 
						|
				
 | 
						|
				// parse html for instant answer
 | 
						|
				$this->fuckhtml->load($additional_data["html"]);
 | 
						|
				
 | 
						|
				$div =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByTagName(
 | 
						|
						"div"
 | 
						|
					);
 | 
						|
				
 | 
						|
				// get description
 | 
						|
				$description =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByClassName(
 | 
						|
						"sx-kp-short-extract sx-kp-short-extract-complete",
 | 
						|
						$div
 | 
						|
					);
 | 
						|
				
 | 
						|
				if(count($description) !== 0){
 | 
						|
					
 | 
						|
					$answer["description"][] = [
 | 
						|
						"type" => "text",
 | 
						|
						"value" =>
 | 
						|
							html_entity_decode(
 | 
						|
								$this->fuckhtml
 | 
						|
								->getTextContent(
 | 
						|
									$description[0]
 | 
						|
								)
 | 
						|
							)
 | 
						|
					];
 | 
						|
				}
 | 
						|
				
 | 
						|
				// get socials
 | 
						|
				$socials =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByClassName(
 | 
						|
						"sx-wiki-social-link",
 | 
						|
						"a"
 | 
						|
					);
 | 
						|
				
 | 
						|
				foreach($socials as $social){
 | 
						|
					
 | 
						|
					$title =
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$social["attributes"]["title"]
 | 
						|
						);
 | 
						|
					
 | 
						|
					$url =
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$social["attributes"]["href"]
 | 
						|
						);
 | 
						|
					
 | 
						|
					switch($title){
 | 
						|
						
 | 
						|
						case "Official Website":
 | 
						|
							$title = "Website";
 | 
						|
							break;
 | 
						|
					}
 | 
						|
					
 | 
						|
					$answer["sublink"][$title] = $url;
 | 
						|
				}
 | 
						|
				
 | 
						|
				// get videos
 | 
						|
				$videos =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByClassName(
 | 
						|
						"sx-kp-video-grid-item",
 | 
						|
						$div
 | 
						|
					);
 | 
						|
				
 | 
						|
				foreach($videos as $video){
 | 
						|
					
 | 
						|
					$this->fuckhtml->load($video);
 | 
						|
					
 | 
						|
					$as =
 | 
						|
						$this->fuckhtml
 | 
						|
						->getElementsByTagName(
 | 
						|
							"a"
 | 
						|
						);
 | 
						|
					
 | 
						|
					if(count($as) === 0){
 | 
						|
						
 | 
						|
						// ?? invalid
 | 
						|
						continue;
 | 
						|
					}
 | 
						|
					
 | 
						|
					$image =
 | 
						|
						$this->fuckhtml
 | 
						|
						->getElementsByAttributeName(
 | 
						|
							"data-sx-src",
 | 
						|
							"img"
 | 
						|
						);
 | 
						|
					
 | 
						|
					if(count($image) !== 0){
 | 
						|
						
 | 
						|
						$thumb = [
 | 
						|
							"ratio" => "16:9",
 | 
						|
							"url" =>
 | 
						|
								$this->fuckhtml
 | 
						|
								->getTextContent(
 | 
						|
									$image[0]["attributes"]["data-sx-src"]
 | 
						|
								)
 | 
						|
						];
 | 
						|
					}else{
 | 
						|
 | 
						|
						$thumb = [
 | 
						|
							"ratio" => null,
 | 
						|
							"url" => null
 | 
						|
						];
 | 
						|
					}
 | 
						|
					
 | 
						|
					$out["video"][] = [
 | 
						|
						"title" =>
 | 
						|
							$this->fuckhtml
 | 
						|
							->getTextContent(
 | 
						|
								$as[0]["attributes"]["title"]
 | 
						|
							),
 | 
						|
						"description" => null,
 | 
						|
						"date" => null,
 | 
						|
						"duration" => null,
 | 
						|
						"views" => null,
 | 
						|
						"thumb" => $thumb,
 | 
						|
						"url" =>
 | 
						|
							$this->fuckhtml
 | 
						|
							->getTextContent(
 | 
						|
								$as[0]["attributes"]["href"]
 | 
						|
							)
 | 
						|
					];
 | 
						|
				}
 | 
						|
				
 | 
						|
				// reset
 | 
						|
				$this->fuckhtml->load($additional_data["html"]);
 | 
						|
				
 | 
						|
				// get table elements
 | 
						|
				$table =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByClassName(
 | 
						|
						"sx-infobox",
 | 
						|
						"table"
 | 
						|
					);
 | 
						|
				
 | 
						|
				if(count($table) !== 0){
 | 
						|
					
 | 
						|
					$trs =
 | 
						|
						$this->fuckhtml
 | 
						|
						->getElementsByTagName(
 | 
						|
							"tr"
 | 
						|
						);
 | 
						|
					
 | 
						|
					foreach($trs as $tr){
 | 
						|
						
 | 
						|
						$this->fuckhtml->load($tr);
 | 
						|
						
 | 
						|
						// ok so startpage devs cant fucking code a table
 | 
						|
						// td = content
 | 
						|
						// th (AAAHH) = title
 | 
						|
						$tds =
 | 
						|
							$this->fuckhtml
 | 
						|
							->getElementsByTagName(
 | 
						|
								"td"	
 | 
						|
							);
 | 
						|
						
 | 
						|
						$ths =
 | 
						|
							$this->fuckhtml
 | 
						|
							->getElementsByTagName(
 | 
						|
								"th"
 | 
						|
							);
 | 
						|
						
 | 
						|
						if(
 | 
						|
							count($ths) === 1 &&
 | 
						|
							count($tds) === 1
 | 
						|
						){
 | 
						|
							
 | 
						|
							$title =
 | 
						|
								$this->fuckhtml
 | 
						|
								->getTextContent(
 | 
						|
									$ths[0]
 | 
						|
								);
 | 
						|
							
 | 
						|
							$description = [];
 | 
						|
							
 | 
						|
							$this->fuckhtml->load($tds[0]);
 | 
						|
							
 | 
						|
							$lis =
 | 
						|
								$this->fuckhtml
 | 
						|
								->getElementsByTagName(
 | 
						|
									"li"
 | 
						|
								);
 | 
						|
							
 | 
						|
							if(count($lis) !== 0){
 | 
						|
								
 | 
						|
								foreach($lis as $li){
 | 
						|
									
 | 
						|
									$description[] =
 | 
						|
										$this->fuckhtml
 | 
						|
										->getTextContent(
 | 
						|
											$li
 | 
						|
										);
 | 
						|
								}
 | 
						|
								
 | 
						|
								$description = implode(", ", $description);
 | 
						|
							}else{
 | 
						|
								
 | 
						|
								$description =
 | 
						|
									$this->fuckhtml
 | 
						|
									->getTextContent(
 | 
						|
										$tds[0]
 | 
						|
									);
 | 
						|
							}
 | 
						|
							
 | 
						|
							$answer["table"][$title] = $description;
 | 
						|
						}
 | 
						|
					}
 | 
						|
				}
 | 
						|
				
 | 
						|
				$out["answer"][] = $answer;
 | 
						|
				
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				// do nothing
 | 
						|
				//echo "error!";
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $out;
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function image($get){
 | 
						|
		
 | 
						|
		if($get["npt"]){
 | 
						|
			
 | 
						|
			[$post, $proxy] = $this->backend->get($get["npt"], "images");
 | 
						|
			
 | 
						|
			try{
 | 
						|
				$html = $this->get(
 | 
						|
					$proxy,
 | 
						|
					"https://www.startpage.com/sp/search",
 | 
						|
					$post,
 | 
						|
					true
 | 
						|
				);
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
			
 | 
						|
		}else{
 | 
						|
			
 | 
						|
			$search = $get["s"];
 | 
						|
			if(strlen($search) === 0){
 | 
						|
				
 | 
						|
				throw new Exception("Search term is empty!");
 | 
						|
			}
 | 
						|
			
 | 
						|
			try{
 | 
						|
				
 | 
						|
				$proxy = $this->backend->get_ip();
 | 
						|
				
 | 
						|
				$params = [
 | 
						|
					"query" => $get["s"],
 | 
						|
					"cat" => "images",
 | 
						|
					"pl" => "opensearch"
 | 
						|
				];
 | 
						|
				
 | 
						|
				if($get["nsfw"] == "no"){
 | 
						|
					
 | 
						|
					$params["qadf"] = "heavy";
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["size"] != "any"){
 | 
						|
					
 | 
						|
					if(
 | 
						|
						$get["size"] == "Small" ||
 | 
						|
						$get["size"] == "Medium" ||
 | 
						|
						$get["size"] == "Large" ||
 | 
						|
						$get["size"] == "Wallpaper"
 | 
						|
					){
 | 
						|
						
 | 
						|
						$params["flimgsize"] = $get["size"];
 | 
						|
					}else{
 | 
						|
						
 | 
						|
						$params["image-size-select"] = "isz:lt,islt:" . $get["size"];
 | 
						|
					}
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["color"] != "any"){
 | 
						|
					
 | 
						|
					if($get["color"] == "color"){
 | 
						|
						
 | 
						|
						$params["flimgcolor"] = "ic:color";
 | 
						|
					}elseif($get["color"] == "bnw"){
 | 
						|
						
 | 
						|
						$params["flimgcolor"] = "ic:gray";
 | 
						|
					}else{
 | 
						|
						
 | 
						|
						$params["flimgcolor"] = "ic:specific,isc:" . $get["color"];
 | 
						|
					}
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["type"] != "any"){
 | 
						|
					
 | 
						|
					$params["flimgtype"] = $get["type"];
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["license"] != "any"){
 | 
						|
					
 | 
						|
					$params["flimglicense"] = $get["license"];
 | 
						|
				}
 | 
						|
				
 | 
						|
				try{
 | 
						|
					$html = $this->get(
 | 
						|
						$proxy,
 | 
						|
						"https://www.startpage.com/sp/search",
 | 
						|
						$params
 | 
						|
					);
 | 
						|
				}catch(Exception $error){
 | 
						|
					
 | 
						|
					throw new Exception("Failed to fetch search page");
 | 
						|
				}
 | 
						|
				//$html = file_get_contents("scraper/startpage.html");
 | 
						|
				
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		$this->detect_captcha($html);
 | 
						|
		
 | 
						|
		$out = [
 | 
						|
			"status" => "ok",
 | 
						|
			"npt" => null,
 | 
						|
			"image" => []
 | 
						|
		];
 | 
						|
		
 | 
						|
		if(
 | 
						|
			preg_match(
 | 
						|
				'/React\.createElement\(UIStartpage\.AppSerpImages, ?(.+)\),?$/m',
 | 
						|
				$html,
 | 
						|
				$matches
 | 
						|
			) === 0
 | 
						|
		){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to grep JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$json = json_decode($matches[1], true);
 | 
						|
		
 | 
						|
		if($json === null){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to decode JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		// get npt
 | 
						|
		$out["npt"] = $this->parse_npt($json, "images", $proxy);
 | 
						|
		
 | 
						|
		// get images
 | 
						|
		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | 
						|
			
 | 
						|
			if($category["display_type"] != "images-bing"){
 | 
						|
				
 | 
						|
				// ignore ads and !! suggestions !! @todo
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
			
 | 
						|
			foreach($category["results"] as $image){
 | 
						|
				
 | 
						|
				$out["image"][] = [
 | 
						|
					"title" => $this->titledots($image["title"]),
 | 
						|
					"source" => [
 | 
						|
						[
 | 
						|
							"url" => $this->unshitimage($image["clickUrl"]),
 | 
						|
							"width" => (int)$image["width"],
 | 
						|
							"height" => (int)$image["height"]
 | 
						|
						],
 | 
						|
						[
 | 
						|
							"url" => $this->unshitimage($image["thumbnailUrl"]),
 | 
						|
							"width" => (int)$image["thumbnailWidth"],
 | 
						|
							"height" => (int)$image["thumbnailHeight"]
 | 
						|
						]
 | 
						|
					],
 | 
						|
					"url" => $image["altClickUrl"]
 | 
						|
				];
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $out;
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function video($get){
 | 
						|
		
 | 
						|
		if($get["npt"]){
 | 
						|
			
 | 
						|
			[$post, $proxy] = $this->backend->get($get["npt"], "videos");
 | 
						|
			
 | 
						|
			try{
 | 
						|
				$html = $this->get(
 | 
						|
					$proxy,
 | 
						|
					"https://www.startpage.com/sp/search",
 | 
						|
					$post,
 | 
						|
					true
 | 
						|
				);
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
			
 | 
						|
		}else{
 | 
						|
			
 | 
						|
			$search = $get["s"];
 | 
						|
			if(strlen($search) === 0){
 | 
						|
				
 | 
						|
				throw new Exception("Search term is empty!");
 | 
						|
			}
 | 
						|
			
 | 
						|
			try{
 | 
						|
				
 | 
						|
				$proxy = $this->backend->get_ip();
 | 
						|
				
 | 
						|
				$params = [
 | 
						|
					"query" => $get["s"],
 | 
						|
					"cat" => "video",
 | 
						|
					"pl" => "opensearch"
 | 
						|
				];
 | 
						|
				
 | 
						|
				if($get["nsfw"] == "no"){
 | 
						|
					
 | 
						|
					$params["qadf"] = "heavy";
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["sort"] != "relevance"){
 | 
						|
					
 | 
						|
					$params["sort_by"] = $get["sort"];
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["duration"] != "any"){
 | 
						|
					
 | 
						|
					$params["with_duration"] = $get["duration"];
 | 
						|
				}
 | 
						|
				
 | 
						|
				try{
 | 
						|
					$html = $this->get(
 | 
						|
						$proxy,
 | 
						|
						"https://www.startpage.com/sp/search",
 | 
						|
						$params
 | 
						|
					);
 | 
						|
				}catch(Exception $error){
 | 
						|
					
 | 
						|
					throw new Exception("Failed to fetch search page");
 | 
						|
				}
 | 
						|
				//$html = file_get_contents("scraper/startpage.html");
 | 
						|
				
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		$this->detect_captcha($html);
 | 
						|
		
 | 
						|
		if(
 | 
						|
			preg_match(
 | 
						|
				'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),?$/m',
 | 
						|
				$html,
 | 
						|
				$matches
 | 
						|
			) === 0
 | 
						|
		){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to get JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$json = json_decode($matches[1], true);
 | 
						|
		
 | 
						|
		if($json === null){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to decode JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$out = [
 | 
						|
			"status" => "ok",
 | 
						|
			"npt" => null,
 | 
						|
			"video" => [],
 | 
						|
			"author" => [],
 | 
						|
			"livestream" => [],
 | 
						|
			"playlist" => [],
 | 
						|
			"reel" => []
 | 
						|
		];
 | 
						|
		
 | 
						|
		// get npt
 | 
						|
		$out["npt"] = $this->parse_npt($json, "video", $proxy);
 | 
						|
		
 | 
						|
		// get results
 | 
						|
		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | 
						|
			
 | 
						|
			if($category["display_type"] == "video-youtube"){
 | 
						|
				
 | 
						|
				foreach($category["results"] as $video){
 | 
						|
					
 | 
						|
					if(
 | 
						|
						isset($video["thumbnailUrl"]) &&
 | 
						|
						$video["thumbnailUrl"] !== null
 | 
						|
					){
 | 
						|
						
 | 
						|
						$thumb = [
 | 
						|
							"ratio" => "16:9",
 | 
						|
							"url" => $this->unshitimage($video["thumbnailUrl"])
 | 
						|
						];
 | 
						|
					}else{
 | 
						|
						
 | 
						|
						$thumb = [
 | 
						|
							"ratio" => null,
 | 
						|
							"url" => null
 | 
						|
						];
 | 
						|
					}
 | 
						|
					
 | 
						|
					$out["video"][] = [
 | 
						|
						"title" => $video["title"],
 | 
						|
						"description" => $this->limitstrlen($video["description"]),
 | 
						|
						"author" => [
 | 
						|
							"name" => $video["channelTitle"],
 | 
						|
							"url" => null,
 | 
						|
							"avatar" => null
 | 
						|
						],
 | 
						|
						"date" => strtotime($video["publishDate"]),
 | 
						|
						"duration" => $this->hms2int($video["duration"]),
 | 
						|
						"views" => (int)$video["viewCount"],
 | 
						|
						"thumb" => $thumb,
 | 
						|
						"url" => $video["clickUrl"]
 | 
						|
					];
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $out;
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function news($get){
 | 
						|
		
 | 
						|
		if($get["npt"]){
 | 
						|
			
 | 
						|
			[$post, $proxy] = $this->backend->get($get["npt"], "news");
 | 
						|
			
 | 
						|
			try{
 | 
						|
				$html = $this->get(
 | 
						|
					$proxy,
 | 
						|
					"https://www.startpage.com/sp/search",
 | 
						|
					$post,
 | 
						|
					true
 | 
						|
				);
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
			
 | 
						|
		}else{
 | 
						|
			
 | 
						|
			$search = $get["s"];
 | 
						|
			if(strlen($search) === 0){
 | 
						|
				
 | 
						|
				throw new Exception("Search term is empty!");
 | 
						|
			}
 | 
						|
			
 | 
						|
			try{
 | 
						|
				
 | 
						|
				$proxy = $this->backend->get_ip();
 | 
						|
				
 | 
						|
				$params = [
 | 
						|
					"query" => $get["s"],
 | 
						|
					"cat" => "news",
 | 
						|
					"pl" => "opensearch"
 | 
						|
				];
 | 
						|
								
 | 
						|
				if($get["nsfw"] == "no"){
 | 
						|
					
 | 
						|
					$params["qadf"] = "heavy";
 | 
						|
				}
 | 
						|
				
 | 
						|
				if($get["time"] != "any"){
 | 
						|
					
 | 
						|
					$params["with_date"] = $get["time"];
 | 
						|
				}
 | 
						|
				
 | 
						|
				try{
 | 
						|
					$html = $this->get(
 | 
						|
						$proxy,
 | 
						|
						"https://www.startpage.com/sp/search",
 | 
						|
						$params
 | 
						|
					);
 | 
						|
				}catch(Exception $error){
 | 
						|
					
 | 
						|
					throw new Exception("Failed to fetch search page");
 | 
						|
				}
 | 
						|
				//$html = file_get_contents("scraper/startpage.html");
 | 
						|
				
 | 
						|
			}catch(Exception $error){
 | 
						|
				
 | 
						|
				throw new Exception("Failed to fetch search page");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		$this->detect_captcha($html);
 | 
						|
		
 | 
						|
		if(
 | 
						|
			preg_match(
 | 
						|
				'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),?$/m',
 | 
						|
				$html,
 | 
						|
				$matches
 | 
						|
			) === 0
 | 
						|
		){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to get JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$json = json_decode($matches[1], true);
 | 
						|
		
 | 
						|
		if($json === null){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to decode JSON object");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$out = [
 | 
						|
			"status" => "ok",
 | 
						|
			"npt" => null,
 | 
						|
			"news" => []
 | 
						|
		];
 | 
						|
		
 | 
						|
		// get npt
 | 
						|
		$out["npt"] = $this->parse_npt($json, "news", $proxy);
 | 
						|
		
 | 
						|
		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | 
						|
			
 | 
						|
			if($category["display_type"] != "news-bing"){
 | 
						|
				
 | 
						|
				// unsupported category
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
			
 | 
						|
			foreach($category["results"] as $news){
 | 
						|
				
 | 
						|
				if(
 | 
						|
					isset($news["thumbnailUrl"]) &&
 | 
						|
					$news["thumbnailUrl"] !== null
 | 
						|
				){
 | 
						|
					
 | 
						|
					$thumb = [
 | 
						|
						"ratio" => "16:9",
 | 
						|
						"url" => $this->unshitimage($news["thumbnailUrl"])
 | 
						|
					];
 | 
						|
				}else{
 | 
						|
					
 | 
						|
					$thumb = [
 | 
						|
						"ratio" => null,
 | 
						|
						"url" => null
 | 
						|
					];
 | 
						|
				}
 | 
						|
				
 | 
						|
				$out["news"][] = [
 | 
						|
					"title" => $this->titledots($this->remove_penguins($news["title"])),
 | 
						|
					"author" => $news["source"],
 | 
						|
					"description" => $this->titledots($this->remove_penguins($news["description"])),
 | 
						|
					"date" => (int)substr((string)$news["date"], 0, -3),
 | 
						|
					"thumb" => $thumb,
 | 
						|
					"url" => $news["clickUrl"]
 | 
						|
				];
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $out;
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function parse_npt($json, $pagetype, $proxy){
 | 
						|
		
 | 
						|
		foreach($json["render"]["presenter"]["pagination"]["pages"] as $page){
 | 
						|
			
 | 
						|
			if($page["name"] == "Next"){
 | 
						|
				
 | 
						|
				parse_str(
 | 
						|
					explode(
 | 
						|
						"?",
 | 
						|
						$page["url"],
 | 
						|
						2
 | 
						|
					)[1],
 | 
						|
					$str
 | 
						|
				);
 | 
						|
				
 | 
						|
				return
 | 
						|
					$this->backend->store(
 | 
						|
						http_build_query(
 | 
						|
							[
 | 
						|
								"lui" => "english",
 | 
						|
								"language" => "english",
 | 
						|
								"query" => $str["q"],
 | 
						|
								"cat" => $pagetype,
 | 
						|
								"sc" => $str["sc"],
 | 
						|
								"t" => "device",
 | 
						|
								"segment" => "startpage.udog",
 | 
						|
								"page" => $str["page"]
 | 
						|
							]
 | 
						|
						),
 | 
						|
						$pagetype,
 | 
						|
						$proxy
 | 
						|
					);
 | 
						|
				
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		
 | 
						|
		return null;
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function unshitimage($url){
 | 
						|
		
 | 
						|
		$query = parse_url($url, PHP_URL_QUERY);
 | 
						|
		parse_str($query, $query);
 | 
						|
		
 | 
						|
		if(isset($query["piurl"])){
 | 
						|
			
 | 
						|
			if(strpos($query["piurl"], "gstatic.com/")){
 | 
						|
				
 | 
						|
				return
 | 
						|
					explode(
 | 
						|
						"&",
 | 
						|
						$query["piurl"],
 | 
						|
						2
 | 
						|
					)[0];
 | 
						|
			}
 | 
						|
			
 | 
						|
			if(
 | 
						|
				strpos($query["piurl"], "bing.net/") ||
 | 
						|
				strpos($query["piurl"], "bing.com/")
 | 
						|
			){
 | 
						|
				
 | 
						|
				return
 | 
						|
					explode(
 | 
						|
						"&",
 | 
						|
						$query["piurl"],
 | 
						|
						2
 | 
						|
					)[0];
 | 
						|
			}
 | 
						|
			
 | 
						|
			return $query["piurl"];
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $url;
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function limitstrlen($text){
 | 
						|
		
 | 
						|
		return
 | 
						|
			explode(
 | 
						|
				"\n",
 | 
						|
				wordwrap(
 | 
						|
					str_replace(
 | 
						|
						["\n\r", "\r\n", "\n", "\r"],
 | 
						|
						" ",
 | 
						|
						$text
 | 
						|
					),
 | 
						|
					300,
 | 
						|
					"\n"
 | 
						|
				),
 | 
						|
				2
 | 
						|
			)[0];
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function titledots($title){
 | 
						|
		
 | 
						|
		return trim($title, " .\t\n\r\0\x0B…");
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function hms2int($time){
 | 
						|
		
 | 
						|
		$parts = explode(":", $time, 3);
 | 
						|
		$time = 0;
 | 
						|
		
 | 
						|
		if(count($parts) === 3){
 | 
						|
			
 | 
						|
			// hours
 | 
						|
			$time = $time + ((int)$parts[0] * 3600);
 | 
						|
			array_shift($parts);
 | 
						|
		}
 | 
						|
		
 | 
						|
		if(count($parts) === 2){
 | 
						|
			
 | 
						|
			// minutes
 | 
						|
			$time = $time + ((int)$parts[0] * 60);
 | 
						|
			array_shift($parts);
 | 
						|
		}
 | 
						|
		
 | 
						|
		// seconds
 | 
						|
		$time = $time + (int)$parts[0];
 | 
						|
		
 | 
						|
		return $time;
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function remove_penguins($text){
 | 
						|
		
 | 
						|
		return str_replace(
 | 
						|
			["", ""],
 | 
						|
			"",
 | 
						|
			$text
 | 
						|
		);
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function detect_captcha($html){
 | 
						|
		
 | 
						|
		$this->fuckhtml->load($html);
 | 
						|
		
 | 
						|
		$title =
 | 
						|
			$this->fuckhtml
 | 
						|
			->getElementsByTagName(
 | 
						|
				"title"
 | 
						|
			);
 | 
						|
		
 | 
						|
		if(
 | 
						|
			count($title) !== 0 &&
 | 
						|
			$title[0]["innerHTML"] == "Redirecting..."
 | 
						|
		){
 | 
						|
			
 | 
						|
			// check if it's a captcha
 | 
						|
			$as =
 | 
						|
				$this->fuckhtml
 | 
						|
				->getElementsByTagName(
 | 
						|
					"a"
 | 
						|
				);
 | 
						|
			
 | 
						|
			foreach($as as $a){
 | 
						|
				
 | 
						|
				if(
 | 
						|
					strpos(
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$a["innerHTML"]
 | 
						|
						),
 | 
						|
						"https://www.startpage.com/sp/captcha"
 | 
						|
					) !== false
 | 
						|
				){
 | 
						|
					
 | 
						|
					throw new Exception("Startpage returned a captcha");
 | 
						|
				}
 | 
						|
			}
 | 
						|
			
 | 
						|
			throw new Exception("Startpage redirected the scraper to an unhandled page");
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 |