1580 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1580 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| class startpage{
 | |
| 	
 | |
| 	public function __construct(){
 | |
| 		
 | |
| 		include "lib/backend.php";
 | |
| 		$this->backend = new backend("startpage");
 | |
| 		
 | |
| 		include "lib/fuckhtml.php";
 | |
| 		$this->fuckhtml = new fuckhtml();
 | |
| 	}
 | |
| 	
 | |
| 	public function getfilters($page){
 | |
| 		
 | |
| 		switch($page){
 | |
| 			case "web":
 | |
| 				return [
 | |
| 					"country" => [
 | |
| 						"display" => "Country",
 | |
| 						"option" => [
 | |
| 							"any" => "All Regions",
 | |
| 							"es_AR" => "Argentina",
 | |
| 							"en_AU" => "Australia",
 | |
| 							"de_AT" => "Austria",
 | |
| 							"ru_BY" => "Belarus",
 | |
| 							"fr_BE" => "Belgium (FR)",
 | |
| 							"nl_BE" => "Belgium (NL)",
 | |
| 							"bg_BG" => "Bulgaria",
 | |
| 							"en_CA" => "Canada (EN)",
 | |
| 							"fr_CA" => "Canada (FR)",
 | |
| 							"es_CL" => "Chile",
 | |
| 							"es_CO" => "Colombia",
 | |
| 							"cs_CZ" => "Czech Republic",
 | |
| 							"da_DK" => "Denmark",
 | |
| 							"ar_EG" => "Egypt",
 | |
| 							"et_EE" => "Estonia",
 | |
| 							"fi_FI" => "Finland",
 | |
| 							"fr_FR" => "France",
 | |
| 							"de_DE" => "Germany",
 | |
| 							"el_GR" => "Greece",
 | |
| 							"hu_HU" => "Hungary",
 | |
| 							"hi_IN" => "India (HI)",
 | |
| 							"en_IN" => "India (EN)",
 | |
| 							"id_ID" => "Indonesia (ID)",
 | |
| 							"en_ID" => "Indonesia (EN)",
 | |
| 							"en_IE" => "Ireland",
 | |
| 							"it_IT" => "Italy",
 | |
| 							"ja_JP" => "Japan",
 | |
| 							"ko_KR" => "Korea",
 | |
| 							"ms_MY" => "Malaysia (MS)",
 | |
| 							"en_MY" => "Malaysia (EN)",
 | |
| 							"es_MX" => "Mexico",
 | |
| 							"nl_NL" => "Netherlands",
 | |
| 							"en_NZ" => "New Zealand",
 | |
| 							"no_NO" => "Norway",
 | |
| 							"es_PE" => "Peru",
 | |
| 							"fil_PH" => "Philippines (FIL)",
 | |
| 							"en_PH" => "Philippines (EN)",
 | |
| 							"pl_PL" => "Poland",
 | |
| 							"pt_PT" => "Portugal",
 | |
| 							"ro_RO" => "Romania",
 | |
| 							"ru_RU" => "Russia",
 | |
| 							"ms_SG" => "Singapore (MS)",
 | |
| 							"en_SG" => "Singapore (EN)",
 | |
| 							"es_ES" => "Spain (ES)",
 | |
| 							"ca_ES" => "Spain (CA)",
 | |
| 							"sv_SE" => "Sweden",
 | |
| 							"de_CH" => "Switzerland (DE)",
 | |
| 							"fr_CH" => "Switzerland (FR)",
 | |
| 							"it_CH" => "Switzerland (IT)",
 | |
| 							"tr_TR" => "Turkey",
 | |
| 							"uk_UA" => "Ukraine",
 | |
| 							"en_US" => "US (EN)",
 | |
| 							"es_US" => "US (ES)",
 | |
| 							"es_UY" => "Uruguay",
 | |
| 							"es_VE" => "Venezuela",
 | |
| 							"vi_VN" => "Vietnam (VI)",
 | |
| 							"en_VN" => "Vietnam (EN)",
 | |
| 							"en_ZA" => "South Africa"
 | |
| 						]
 | |
| 					],
 | |
| 					"nsfw" => [ // qadf
 | |
| 						"display" => "NSFW",
 | |
| 						"option" => [
 | |
| 							"yes" => "Yes", // qadf=none
 | |
| 							"no" => "No" // qadf=heavy
 | |
| 						]
 | |
| 					],
 | |
| 					"time" => [ // with_date
 | |
| 						"display" => "Time posted",
 | |
| 						"option" => [
 | |
| 							"any" => "Any time",
 | |
| 							"d" => "Past 24 hours",
 | |
| 							"w" => "Past week",
 | |
| 							"m" => "Past month",
 | |
| 							"y" => "Past year",
 | |
| 						]
 | |
| 					],
 | |
| 					"extendedsearch" => [
 | |
| 						// undefined display, so it wont show in frontend
 | |
| 						"option" => [
 | |
| 							"yes" => "Yes",
 | |
| 							"no" => "No"
 | |
| 						]
 | |
| 					]
 | |
| 				];
 | |
| 				break;
 | |
| 			
 | |
| 			case "images":
 | |
| 				return [
 | |
| 					"nsfw" => [ // qadf
 | |
| 						"display" => "NSFW",
 | |
| 						"option" => [
 | |
| 							"yes" => "Yes", // qadf=none
 | |
| 							"no" => "No" // qadf=heavy
 | |
| 						]
 | |
| 					],
 | |
| 					"size" => [ // flimgsize
 | |
| 						"display" => "Size",
 | |
| 						"option" => [
 | |
| 							"any" => "Any size",
 | |
| 							"Small" => "Small",
 | |
| 							"Medium" => "Medium",
 | |
| 							"Large" => "Large",
 | |
| 							"Wallpaper" => "Wallpaper",
 | |
| 							// from here, image-size-select, var prefix = isz:lt,islt:
 | |
| 							"qsvgs" => "Larger than 400x300",
 | |
| 							"vga" => "Larger than 640x480",
 | |
| 							"svga" => "Larger than 800x600",
 | |
| 							"xga" => "Larger than 1024x768",
 | |
| 							"qsvgs" => "Larger than 400x300",
 | |
| 							"2mp" => "Larger than 2 MP (1600x1200)",
 | |
| 							"4mp" => "Larger than 4 MP (2272x1704)",
 | |
| 							"6mp" => "Larger than 6 MP (2816x2112)",
 | |
| 							"8mp" => "Larger than 8 MP (3264x2448)",
 | |
| 							"10mp" => "Larger than 10 MP (3648x2736)",
 | |
| 							"12mp" => "Larger than 12 MP (4096x3072)",
 | |
| 							"15mp" => "Larger than 15 MP (4480x3360)",
 | |
| 							"20mp" => "Larger than 20 MP (5120x3840)",
 | |
| 							"40mp" => "Larger than 40 MP (7216x5412)",
 | |
| 							"70mp" => "Larger than 70 MP (9600x7200)"
 | |
| 						]
 | |
| 					],
 | |
| 					"color" => [ // flimgcolor
 | |
| 						"display" => "Color",
 | |
| 						"option" => [
 | |
| 							"any" => "Any color",
 | |
| 							// from here, var prefix = ic:
 | |
| 							"color" => "Color only",
 | |
| 							"bnw" => "Black & white", // set to "gray"
 | |
| 							// from here, var prefix = ic:specific,isc:
 | |
| 							"red" => "Red",
 | |
| 							"orange" => "Orange",
 | |
| 							"yellow" => "Yellow",
 | |
| 							"green" => "Green",
 | |
| 							"teal" => "Teal",
 | |
| 							"blue" => "Blue",
 | |
| 							"purple" => "Purple",
 | |
| 							"pink" => "Pink",
 | |
| 							"white" => "White",
 | |
| 							"gray" => "Gray",
 | |
| 							"black" => "Black",
 | |
| 							"brown" => "Brown"
 | |
| 						]
 | |
| 					],
 | |
| 					"type" => [ // flimgtype
 | |
| 						"display" => "Type",
 | |
| 						"option" => [
 | |
| 							"any" => "Any type",
 | |
| 							"AnimatedGif" => "Animated GIF",
 | |
| 							"Clipart" => "Clip Art",
 | |
| 							"Line" => "Line Drawing",
 | |
| 							"Photo" => "Photograph",
 | |
| 							"Transparent" => "Transparent Background"
 | |
| 						]
 | |
| 					],
 | |
| 					"license" => [ // flimglicense
 | |
| 						"display" => "License",
 | |
| 						"option" => [
 | |
| 							"any" => "Any license",
 | |
| 							"p" => "Public domain",
 | |
| 							"s" => "Free to share",
 | |
| 							"sc" => "Free to share commercially",
 | |
| 							"m" => "Free to modify",
 | |
| 							"mc" => "Free to modify commercially"
 | |
| 						]
 | |
| 					]
 | |
| 				];
 | |
| 				break;
 | |
| 			
 | |
| 			case "videos":
 | |
| 				return [
 | |
| 					"nsfw" => [ // qadf
 | |
| 						"display" => "NSFW",
 | |
| 						"option" => [
 | |
| 							"yes" => "Yes", // qadf=none
 | |
| 							"no" => "No" // qadf=heavy
 | |
| 						]
 | |
| 					],
 | |
| 					"sort" => [
 | |
| 						"display" => "Sort by",
 | |
| 						"option" => [
 | |
| 							"relevance" => "Most relevant",
 | |
| 							"popular" => "Most popular",
 | |
| 							"recent" => "Most recent"
 | |
| 						]
 | |
| 					],
 | |
| 					"duration" => [ // with_duration
 | |
| 						"display" => "Duration",
 | |
| 						"option" => [
 | |
| 							"any" => "Any duration",
 | |
| 							"short" => "Short",
 | |
| 							"medium" => "Medium",
 | |
| 							"long" => "Long"
 | |
| 						]
 | |
| 					]
 | |
| 				];
 | |
| 				break;
 | |
| 			
 | |
| 			case "news":
 | |
| 				return [
 | |
| 					"nsfw" => [ // qadf
 | |
| 						"display" => "NSFW",
 | |
| 						"option" => [
 | |
| 							"yes" => "Yes", // qadf=none
 | |
| 							"no" => "No" // qadf=heavy
 | |
| 						]
 | |
| 					],
 | |
| 					"time" => [ // with_date
 | |
| 						"display" => "Time posted",
 | |
| 						"option" => [
 | |
| 							"any" => "Any time",
 | |
| 							"d" => "Past 24 hours",
 | |
| 							"w" => "Past week",
 | |
| 							"m" => "Past month"
 | |
| 						]
 | |
| 					]
 | |
| 				];
 | |
| 				break;
 | |
| 				
 | |
| 				//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEazerbaijaniN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:21:58 GMT; Secure; Path=/
 | |
| 				//preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius; Domain=startpage.com; Expires=Mon, 28 Oct 2024 20:22:52 GMT; Secure; Path=/
 | |
| 		}
 | |
| 	}
 | |
| 	
 | |
| 	private function get($proxy, $url, $get = [], $post = false, $is_xhr = false){
 | |
| 		
 | |
| 		$curlproc = curl_init();
 | |
| 		
 | |
| 		if($post === true){
 | |
| 			
 | |
| 			curl_setopt($curlproc, CURLOPT_POST, true);
 | |
| 			curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
 | |
| 			
 | |
| 		}elseif($get !== []){
 | |
| 			
 | |
| 			$get = http_build_query($get);
 | |
| 			$url .= "?" . $get;
 | |
| 		}
 | |
| 		
 | |
| 		curl_setopt($curlproc, CURLOPT_URL, $url);
 | |
| 		
 | |
| 		// http2 bypass
 | |
| 		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
 | |
| 		
 | |
| 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
 | |
| 		
 | |
| 		if($is_xhr === true){
 | |
| 			
 | |
| 			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | |
| 				["User-Agent: " . config::USER_AGENT,
 | |
| 				"Accept: application/json",
 | |
| 				"Accept-Language: en-US,en;q=0.5",
 | |
| 				"Accept-Encoding: gzip",
 | |
| 				"Referer: https://www.startpage.com/",
 | |
| 				"Content-Type: application/json",
 | |
| 				"Content-Length: " . strlen($get),
 | |
| 				"Origin: https://www.startpage.com/",
 | |
| 				"DNT: 1",
 | |
| 				"Connection: keep-alive",
 | |
| 				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | |
| 				"Sec-Fetch-Dest: empty",
 | |
| 				"Sec-Fetch-Mode: cors",
 | |
| 				"Sec-Fetch-Site: same-origin",
 | |
| 				"TE: trailers"]
 | |
| 			);
 | |
| 			
 | |
| 		}elseif($post === true){
 | |
| 			
 | |
| 			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | |
| 				["User-Agent: " . config::USER_AGENT,
 | |
| 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 | |
| 				"Accept-Language: en-US,en;q=0.5",
 | |
| 				"Accept-Encoding: gzip",
 | |
| 				"Referer: https://www.startpage.com/",
 | |
| 				"Content-Type: application/x-www-form-urlencoded",
 | |
| 				"Content-Length: " . strlen($get),
 | |
| 				"DNT: 1",
 | |
| 				"Connection: keep-alive",
 | |
| 				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | |
| 				"Upgrade-Insecure-Requests: 1",
 | |
| 				"Sec-Fetch-Dest: document",
 | |
| 				"Sec-Fetch-Mode: navigate",
 | |
| 				"Sec-Fetch-Site: none",
 | |
| 				"Sec-Fetch-User: ?1",
 | |
| 				"Priority: u=0, i",
 | |
| 				"TE: trailers"]
 | |
| 			);
 | |
| 		}else{
 | |
| 			
 | |
| 			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | |
| 				["User-Agent: " . config::USER_AGENT,
 | |
| 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 | |
| 				"Accept-Language: en-US,en;q=0.5",
 | |
| 				"Accept-Encoding: gzip",
 | |
| 				"DNT: 1",
 | |
| 				"Connection: keep-alive",
 | |
| 				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
 | |
| 				"Sec-Fetch-Dest: document",
 | |
| 				"Sec-Fetch-Mode: navigate",
 | |
| 				"Sec-Fetch-Site: none",
 | |
| 				"Sec-Fetch-User: ?1",
 | |
| 				"Priority: u=0, i",
 | |
| 				"TE: trailers"]
 | |
| 			);
 | |
| 		}
 | |
| 		
 | |
| 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
 | |
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
 | |
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
 | |
| 		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
 | |
| 		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
 | |
| 		
 | |
| 		$this->backend->assign_proxy($curlproc, $proxy);
 | |
| 		
 | |
| 		$data = curl_exec($curlproc);
 | |
| 		
 | |
| 		if(curl_errno($curlproc)){
 | |
| 			
 | |
| 			throw new Exception(curl_error($curlproc));
 | |
| 		}
 | |
| 		
 | |
| 		curl_close($curlproc);
 | |
| 		return $data;
 | |
| 	}
 | |
| 	
 | |
| 	public function web($get){
 | |
| 		
 | |
| 		if($get["npt"]){
 | |
| 			
 | |
| 			[$post, $proxy] = $this->backend->get($get["npt"], "web");
 | |
| 			
 | |
| 			try{
 | |
| 				$html = $this->get(
 | |
| 					$proxy,
 | |
| 					"https://www.startpage.com/sp/search",
 | |
| 					$post,
 | |
| 					true
 | |
| 				);
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 			
 | |
| 			$get_instant_answer = false;
 | |
| 			
 | |
| 		}else{
 | |
| 			
 | |
| 			$proxy = $this->backend->get_ip();
 | |
| 			
 | |
| 			$params = [
 | |
| 				"query" => $get["s"],
 | |
| 				"cat" => "web",
 | |
| 				"pl" => "opensearch"
 | |
| 			];
 | |
| 			
 | |
| 			if($get["nsfw"] == "no"){
 | |
| 				
 | |
| 				$params["qadf"] = "heavy";
 | |
| 				$get_instant_answer = false;
 | |
| 			}else{
 | |
| 				
 | |
| 				$get_instant_answer = true;
 | |
| 			}
 | |
| 			
 | |
| 			if($get["country"] !== "any"){
 | |
| 				
 | |
| 				$params["qsr"] = $get["country"];
 | |
| 			}
 | |
| 			
 | |
| 			if($get["time"] !== "any"){
 | |
| 				
 | |
| 				$params["with_date"] = $get["time"];
 | |
| 			}
 | |
| 			
 | |
| 			try{
 | |
| 				$html = $this->get(
 | |
| 					$proxy,
 | |
| 					"https://www.startpage.com/sp/search",
 | |
| 					$params
 | |
| 				);
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 			
 | |
| 			//$html = file_get_contents("scraper/startpage.html");
 | |
| 		}
 | |
| 		
 | |
| 		$this->detect_captcha($html);
 | |
| 		
 | |
| 		if(
 | |
| 			preg_match(
 | |
| 				'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),?$/m',
 | |
| 				$html,
 | |
| 				$matches
 | |
| 			) === 0
 | |
| 		){
 | |
| 			
 | |
| 			throw new Exception("Failed to grep JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$json = json_decode($matches[1], true);
 | |
| 		
 | |
| 		if($json === null){
 | |
| 			
 | |
| 			throw new Exception("Failed to decode JSON");
 | |
| 		}
 | |
| 		
 | |
| 		//print_r($json);
 | |
| 		
 | |
| 		$out = [
 | |
| 			"status" => "ok",
 | |
| 			"spelling" => [
 | |
| 				"type" => "no_correction",
 | |
| 				"using" => null,
 | |
| 				"correction" => null
 | |
| 			],
 | |
| 			"npt" => null,
 | |
| 			"answer" => [],
 | |
| 			"web" => [],
 | |
| 			"image" => [],
 | |
| 			"video" => [],
 | |
| 			"news" => [],
 | |
| 			"related" => []
 | |
| 		];
 | |
| 		
 | |
| 		// get npt
 | |
| 		$out["npt"] = $this->parse_npt($json, "web", $proxy);
 | |
| 		
 | |
| 		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | |
| 			
 | |
| 			if(!isset($category["display_type"])){
 | |
| 				
 | |
| 				continue;
 | |
| 			}
 | |
| 			
 | |
| 			switch($category["display_type"]){
 | |
| 				
 | |
| 				case "web-google":
 | |
| 					foreach($category["results"] as $result){
 | |
| 						
 | |
| 						$sublinks = [];
 | |
| 						
 | |
| 						foreach($result["siteLinks"] as $sublink){
 | |
| 							
 | |
| 							$sublinks[] = [
 | |
| 								"title" => $sublink["title"],
 | |
| 								"description" => null,
 | |
| 								"url" => $sublink["clickUrl"]
 | |
| 							];
 | |
| 						}
 | |
| 						
 | |
| 						$description =
 | |
| 							explode(
 | |
| 								"...",
 | |
| 								$this->titledots(
 | |
| 									html_entity_decode(
 | |
| 										$this->fuckhtml
 | |
| 										->getTextContent(
 | |
| 											$result["description"]
 | |
| 										)
 | |
| 									)
 | |
| 								),
 | |
| 								2
 | |
| 							);
 | |
| 						
 | |
| 						$date = strtotime(trim($description[0]));
 | |
| 						
 | |
| 						if(
 | |
| 							$date === false ||
 | |
| 							count($description) !== 2 ||
 | |
| 							strlen($description[0]) > 14
 | |
| 						){
 | |
| 							
 | |
| 							// no date found
 | |
| 							$description =
 | |
| 								implode(
 | |
| 									" ... ",
 | |
| 									$description
 | |
| 								);
 | |
| 							
 | |
| 							$date = null;
 | |
| 						}else{
 | |
| 							
 | |
| 							// date found
 | |
| 							$description = ltrim($description[1]);
 | |
| 						}
 | |
| 						
 | |
| 						$out["web"][] = [
 | |
| 							"title" =>
 | |
| 								$this->titledots(
 | |
| 									html_entity_decode(
 | |
| 										$this->fuckhtml
 | |
| 										->getTextContent(
 | |
| 											$result["title"]
 | |
| 										)
 | |
| 									)
 | |
| 								),
 | |
| 							"description" => $description,
 | |
| 							"url" => $result["clickUrl"],
 | |
| 							"date" => $date,
 | |
| 							"type" => "web",
 | |
| 							"thumb" => [
 | |
| 								"url" => null,
 | |
| 								"ratio" => null
 | |
| 							],
 | |
| 							"sublink" => $sublinks,
 | |
| 							"table" => []
 | |
| 						];
 | |
| 					}
 | |
| 					break;
 | |
| 				
 | |
| 				case "images-qi-top":
 | |
| 					foreach($category["results"] as $result){
 | |
| 						
 | |
| 						$out["image"][] = [
 | |
| 							"title" =>
 | |
| 								$this->titledots(
 | |
| 									html_entity_decode(
 | |
| 										$this->fuckhtml
 | |
| 										->getTextContent(
 | |
| 											$result["title"]
 | |
| 										)
 | |
| 									)
 | |
| 								),
 | |
| 							"source" => [
 | |
| 								[
 | |
| 									"url" => $result["rawImageUrl"],
 | |
| 									"width" => (int)$result["width"],	
 | |
| 									"height" => (int)$result["height"]
 | |
| 								],
 | |
| 								[
 | |
| 									"url" => $this->unshitimage($result["mdThumbnailUrl"]),
 | |
| 									"width" => (int)$result["mdThumbnailWidth"],
 | |
| 									"height" => (int)$result["mdThumbnailHeight"]
 | |
| 								]
 | |
| 							],
 | |
| 							"url" =>
 | |
| 								$result["altClickUrl"]
 | |
| 						];
 | |
| 					}
 | |
| 					break;
 | |
| 				
 | |
| 				case "spellsuggest-google":
 | |
| 					$out["spelling"] =
 | |
| 						[
 | |
| 							"type" => "including",
 | |
| 							"using" => $json["render"]["query"],
 | |
| 							"correction" => $category["results"][0]["query"]
 | |
| 						];
 | |
| 					break;
 | |
| 				
 | |
| 				case "dictionary-qi":
 | |
| 					foreach($category["results"] as $result){
 | |
| 						
 | |
| 						$answer = [
 | |
| 							"title" => $result["word"],
 | |
| 							"description" => [],
 | |
| 							"url" => null,
 | |
| 							"thumb" => null,
 | |
| 							"table" => [],
 | |
| 							"sublink" => []
 | |
| 						];
 | |
| 						
 | |
| 						foreach($result["lexical_categories"] as $lexic_type => $definitions){
 | |
| 							
 | |
| 							$answer["description"][] = [
 | |
| 								"type" => "title",
 | |
| 								"value" => $lexic_type
 | |
| 							];
 | |
| 							
 | |
| 							$i = 0;
 | |
| 							
 | |
| 							foreach($definitions as $definition){
 | |
| 								
 | |
| 								$text_definition = trim($definition["definition"]);
 | |
| 								$text_example = trim($definition["example"]);
 | |
| 								$text_synonyms = implode(", ", $definition["synonyms"]);
 | |
| 								
 | |
| 								if($text_definition != ""){
 | |
| 									
 | |
| 									$i++;
 | |
| 									
 | |
| 									$c = count($answer["description"]) - 1;
 | |
| 									if(
 | |
| 										$c !== 0 &&
 | |
| 										$answer["description"][$c]["type"] == "text"
 | |
| 									){
 | |
| 										
 | |
| 										$answer["description"][$c]["value"] .=
 | |
| 											"\n\n" . $i . ". " . $text_definition;
 | |
| 										
 | |
| 									}else{
 | |
| 										
 | |
| 										$answer["description"][] = [
 | |
| 											"type" => "text",
 | |
| 											"value" => $i . ". " . $text_definition
 | |
| 										];
 | |
| 									}
 | |
| 								}
 | |
| 								
 | |
| 								if($text_example != ""){
 | |
| 									
 | |
| 									$answer["description"][] = [
 | |
| 										"type" => "quote",
 | |
| 										"value" => $text_example
 | |
| 									];
 | |
| 								}
 | |
| 								
 | |
| 								if($text_synonyms != ""){
 | |
| 									
 | |
| 									$answer["description"][] = [
 | |
| 										"type" => "text",
 | |
| 										"value" => "Synonyms: " . $text_synonyms
 | |
| 									];
 | |
| 								}
 | |
| 							}
 | |
| 						}
 | |
| 						
 | |
| 						$out["answer"][] = $answer;
 | |
| 					}
 | |
| 					break;
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		// parse instant answers
 | |
| 		if(
 | |
| 			$get["extendedsearch"] == "yes" &&
 | |
| 			$get_instant_answer === true
 | |
| 		){
 | |
| 			
 | |
| 			// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
 | |
| 			try{
 | |
| 				$post = [
 | |
| 					"se" => "n0vze2y9dqwy",
 | |
| 					"q" => $json["render"]["query"],
 | |
| 					"results" => [], // populate
 | |
| 					"enableKnowledgePanel" => true,
 | |
| 					"enableMediaThumbBar" => false,
 | |
| 					"enableSearchSuggestions" => false,
 | |
| 					"enableTripadvisorProperties" => [],
 | |
| 					"enableTripadvisorPlaces" => [],
 | |
| 					"enableTripadvisorPlacesForLocations" => [],
 | |
| 					"enableWebProducts" => false,
 | |
| 					"tripadvisorPartnerId" => null,
 | |
| 					"tripadvisorMapColorMode" => "light",
 | |
| 					"tripadvisorDisablesKnowledgePanel" => false,
 | |
| 					"instantAnswers" => [
 | |
| 						"smartAnswers",
 | |
| 						"youtube",
 | |
| 						"tripadvisor"
 | |
| 					],
 | |
| 					"iaType" => null,
 | |
| 					"forceEnhancedKnowledgePanel" => false,
 | |
| 					"shoppingOnly" => false,
 | |
| 					"allowAdultProducts" => true,
 | |
| 					"lang" => "en",
 | |
| 					"browserLang" => "en-US",
 | |
| 					"browserTimezone" => "America/New_York",
 | |
| 					"market" => null,
 | |
| 					"userLocation" => null,
 | |
| 					"userDate" => date("Y-m-d"),
 | |
| 					"userAgentType" => "unknown"
 | |
| 				];
 | |
| 				
 | |
| 				foreach($out["web"] as $result){
 | |
| 					
 | |
| 					$post["results"][] = [
 | |
| 						"url" => $result["url"],
 | |
| 						"title" => $result["title"]
 | |
| 					];
 | |
| 				}
 | |
| 				
 | |
| 				$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
 | |
| 				
 | |
| 				$additional_data =
 | |
| 					$this->get(
 | |
| 						$proxy,
 | |
| 						"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
 | |
| 						$post,
 | |
| 						true,
 | |
| 						true
 | |
| 					);
 | |
| 				
 | |
| 				$additional_data = json_decode($additional_data, true);
 | |
| 				
 | |
| 				if($additional_data === null){
 | |
| 					
 | |
| 					throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
 | |
| 				}
 | |
| 				
 | |
| 				if(!isset($additional_data["knowledgePanel"])){
 | |
| 					
 | |
| 					throw new Exception("Response has missing data (knowledgePanel)");
 | |
| 				}
 | |
| 				
 | |
| 				$additional_data = $additional_data["knowledgePanel"];
 | |
| 				
 | |
| 				$answer = [
 | |
| 					"title" => $additional_data["meta"]["title"],
 | |
| 					"description" => [
 | |
| 						[
 | |
| 							"type" => "quote",
 | |
| 							"value" => $additional_data["meta"]["description"]
 | |
| 						]
 | |
| 					],
 | |
| 					"url" => $additional_data["meta"]["origWikiUrl"],
 | |
| 					"thumb" => $additional_data["meta"]["image"],
 | |
| 					"table" => [],
 | |
| 					"sublink" => []
 | |
| 				];
 | |
| 				
 | |
| 				// parse html for instant answer
 | |
| 				$this->fuckhtml->load($additional_data["html"]);
 | |
| 				
 | |
| 				$div =
 | |
| 					$this->fuckhtml
 | |
| 					->getElementsByTagName(
 | |
| 						"div"
 | |
| 					);
 | |
| 				
 | |
| 				// get description
 | |
| 				$description =
 | |
| 					$this->fuckhtml
 | |
| 					->getElementsByClassName(
 | |
| 						"sx-kp-short-extract sx-kp-short-extract-complete",
 | |
| 						$div
 | |
| 					);
 | |
| 				
 | |
| 				if(count($description) !== 0){
 | |
| 					
 | |
| 					$answer["description"][] = [
 | |
| 						"type" => "text",
 | |
| 						"value" =>
 | |
| 							html_entity_decode(
 | |
| 								$this->fuckhtml
 | |
| 								->getTextContent(
 | |
| 									$description[0]
 | |
| 								)
 | |
| 							)
 | |
| 					];
 | |
| 				}
 | |
| 				
 | |
| 				// get socials
 | |
| 				$socials =
 | |
| 					$this->fuckhtml
 | |
| 					->getElementsByClassName(
 | |
| 						"sx-wiki-social-link",
 | |
| 						"a"
 | |
| 					);
 | |
| 				
 | |
| 				foreach($socials as $social){
 | |
| 					
 | |
| 					$title =
 | |
| 						$this->fuckhtml
 | |
| 						->getTextContent(
 | |
| 							$social["attributes"]["title"]
 | |
| 						);
 | |
| 					
 | |
| 					$url =
 | |
| 						$this->fuckhtml
 | |
| 						->getTextContent(
 | |
| 							$social["attributes"]["href"]
 | |
| 						);
 | |
| 					
 | |
| 					switch($title){
 | |
| 						
 | |
| 						case "Official Website":
 | |
| 							$title = "Website";
 | |
| 							break;
 | |
| 					}
 | |
| 					
 | |
| 					$answer["sublink"][$title] = $url;
 | |
| 				}
 | |
| 				
 | |
| 				// get videos
 | |
| 				$videos =
 | |
| 					$this->fuckhtml
 | |
| 					->getElementsByClassName(
 | |
| 						"sx-kp-video-grid-item",
 | |
| 						$div
 | |
| 					);
 | |
| 				
 | |
| 				foreach($videos as $video){
 | |
| 					
 | |
| 					$this->fuckhtml->load($video);
 | |
| 					
 | |
| 					$as =
 | |
| 						$this->fuckhtml
 | |
| 						->getElementsByTagName(
 | |
| 							"a"
 | |
| 						);
 | |
| 					
 | |
| 					if(count($as) === 0){
 | |
| 						
 | |
| 						// ?? invalid
 | |
| 						continue;
 | |
| 					}
 | |
| 					
 | |
| 					$image =
 | |
| 						$this->fuckhtml
 | |
| 						->getElementsByAttributeName(
 | |
| 							"data-sx-src",
 | |
| 							"img"
 | |
| 						);
 | |
| 					
 | |
| 					if(count($image) !== 0){
 | |
| 						
 | |
| 						$thumb = [
 | |
| 							"ratio" => "16:9",
 | |
| 							"url" =>
 | |
| 								$this->fuckhtml
 | |
| 								->getTextContent(
 | |
| 									$image[0]["attributes"]["data-sx-src"]
 | |
| 								)
 | |
| 						];
 | |
| 					}else{
 | |
| 
 | |
| 						$thumb = [
 | |
| 							"ratio" => null,
 | |
| 							"url" => null
 | |
| 						];
 | |
| 					}
 | |
| 					
 | |
| 					$out["video"][] = [
 | |
| 						"title" =>
 | |
| 							$this->fuckhtml
 | |
| 							->getTextContent(
 | |
| 								$as[0]["attributes"]["title"]
 | |
| 							),
 | |
| 						"description" => null,
 | |
| 						"date" => null,
 | |
| 						"duration" => null,
 | |
| 						"views" => null,
 | |
| 						"thumb" => $thumb,
 | |
| 						"url" =>
 | |
| 							$this->fuckhtml
 | |
| 							->getTextContent(
 | |
| 								$as[0]["attributes"]["href"]
 | |
| 							)
 | |
| 					];
 | |
| 				}
 | |
| 				
 | |
| 				// reset
 | |
| 				$this->fuckhtml->load($additional_data["html"]);
 | |
| 				
 | |
| 				// get table elements
 | |
| 				$table =
 | |
| 					$this->fuckhtml
 | |
| 					->getElementsByClassName(
 | |
| 						"sx-infobox",
 | |
| 						"table"
 | |
| 					);
 | |
| 				
 | |
| 				if(count($table) !== 0){
 | |
| 					
 | |
| 					$trs =
 | |
| 						$this->fuckhtml
 | |
| 						->getElementsByTagName(
 | |
| 							"tr"
 | |
| 						);
 | |
| 					
 | |
| 					foreach($trs as $tr){
 | |
| 						
 | |
| 						$this->fuckhtml->load($tr);
 | |
| 						
 | |
| 						// ok so startpage devs cant fucking code a table
 | |
| 						// td = content
 | |
| 						// th (AAAHH) = title
 | |
| 						$tds =
 | |
| 							$this->fuckhtml
 | |
| 							->getElementsByTagName(
 | |
| 								"td"	
 | |
| 							);
 | |
| 						
 | |
| 						$ths =
 | |
| 							$this->fuckhtml
 | |
| 							->getElementsByTagName(
 | |
| 								"th"
 | |
| 							);
 | |
| 						
 | |
| 						if(
 | |
| 							count($ths) === 1 &&
 | |
| 							count($tds) === 1
 | |
| 						){
 | |
| 							
 | |
| 							$title =
 | |
| 								$this->fuckhtml
 | |
| 								->getTextContent(
 | |
| 									$ths[0]
 | |
| 								);
 | |
| 							
 | |
| 							$description = [];
 | |
| 							
 | |
| 							$this->fuckhtml->load($tds[0]);
 | |
| 							
 | |
| 							$lis =
 | |
| 								$this->fuckhtml
 | |
| 								->getElementsByTagName(
 | |
| 									"li"
 | |
| 								);
 | |
| 							
 | |
| 							if(count($lis) !== 0){
 | |
| 								
 | |
| 								foreach($lis as $li){
 | |
| 									
 | |
| 									$description[] =
 | |
| 										$this->fuckhtml
 | |
| 										->getTextContent(
 | |
| 											$li
 | |
| 										);
 | |
| 								}
 | |
| 								
 | |
| 								$description = implode(", ", $description);
 | |
| 							}else{
 | |
| 								
 | |
| 								$description =
 | |
| 									$this->fuckhtml
 | |
| 									->getTextContent(
 | |
| 										$tds[0]
 | |
| 									);
 | |
| 							}
 | |
| 							
 | |
| 							$answer["table"][$title] = $description;
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 				
 | |
| 				$out["answer"][] = $answer;
 | |
| 				
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				// do nothing
 | |
| 				//echo "error!";
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		return $out;
 | |
| 	}
 | |
| 	
 | |
| 	public function image($get){
 | |
| 		
 | |
| 		if($get["npt"]){
 | |
| 			
 | |
| 			[$post, $proxy] = $this->backend->get($get["npt"], "images");
 | |
| 			
 | |
| 			try{
 | |
| 				$html = $this->get(
 | |
| 					$proxy,
 | |
| 					"https://www.startpage.com/sp/search",
 | |
| 					$post,
 | |
| 					true
 | |
| 				);
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 			
 | |
| 		}else{
 | |
| 			
 | |
| 			$search = $get["s"];
 | |
| 			if(strlen($search) === 0){
 | |
| 				
 | |
| 				throw new Exception("Search term is empty!");
 | |
| 			}
 | |
| 			
 | |
| 			try{
 | |
| 				
 | |
| 				$proxy = $this->backend->get_ip();
 | |
| 				
 | |
| 				$params = [
 | |
| 					"query" => $get["s"],
 | |
| 					"cat" => "images",
 | |
| 					"pl" => "opensearch"
 | |
| 				];
 | |
| 				
 | |
| 				if($get["nsfw"] == "no"){
 | |
| 					
 | |
| 					$params["qadf"] = "heavy";
 | |
| 				}
 | |
| 				
 | |
| 				if($get["size"] != "any"){
 | |
| 					
 | |
| 					if(
 | |
| 						$get["size"] == "Small" ||
 | |
| 						$get["size"] == "Medium" ||
 | |
| 						$get["size"] == "Large" ||
 | |
| 						$get["size"] == "Wallpaper"
 | |
| 					){
 | |
| 						
 | |
| 						$params["flimgsize"] = $get["size"];
 | |
| 					}else{
 | |
| 						
 | |
| 						$params["image-size-select"] = "isz:lt,islt:" . $get["size"];
 | |
| 					}
 | |
| 				}
 | |
| 				
 | |
| 				if($get["color"] != "any"){
 | |
| 					
 | |
| 					if($get["color"] == "color"){
 | |
| 						
 | |
| 						$params["flimgcolor"] = "ic:color";
 | |
| 					}elseif($get["color"] == "bnw"){
 | |
| 						
 | |
| 						$params["flimgcolor"] = "ic:gray";
 | |
| 					}else{
 | |
| 						
 | |
| 						$params["flimgcolor"] = "ic:specific,isc:" . $get["color"];
 | |
| 					}
 | |
| 				}
 | |
| 				
 | |
| 				if($get["type"] != "any"){
 | |
| 					
 | |
| 					$params["flimgtype"] = $get["type"];
 | |
| 				}
 | |
| 				
 | |
| 				if($get["license"] != "any"){
 | |
| 					
 | |
| 					$params["flimglicense"] = $get["license"];
 | |
| 				}
 | |
| 				
 | |
| 				try{
 | |
| 					$html = $this->get(
 | |
| 						$proxy,
 | |
| 						"https://www.startpage.com/sp/search",
 | |
| 						$params
 | |
| 					);
 | |
| 				}catch(Exception $error){
 | |
| 					
 | |
| 					throw new Exception("Failed to fetch search page");
 | |
| 				}
 | |
| 				//$html = file_get_contents("scraper/startpage.html");
 | |
| 				
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		$this->detect_captcha($html);
 | |
| 		
 | |
| 		$out = [
 | |
| 			"status" => "ok",
 | |
| 			"npt" => null,
 | |
| 			"image" => []
 | |
| 		];
 | |
| 		
 | |
| 		if(
 | |
| 			preg_match(
 | |
| 				'/React\.createElement\(UIStartpage\.AppSerpImages, ?(.+)\),?$/m',
 | |
| 				$html,
 | |
| 				$matches
 | |
| 			) === 0
 | |
| 		){
 | |
| 			
 | |
| 			throw new Exception("Failed to grep JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$json = json_decode($matches[1], true);
 | |
| 		
 | |
| 		if($json === null){
 | |
| 			
 | |
| 			throw new Exception("Failed to decode JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		// get npt
 | |
| 		$out["npt"] = $this->parse_npt($json, "images", $proxy);
 | |
| 		
 | |
| 		// get images
 | |
| 		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | |
| 			
 | |
| 			if($category["display_type"] != "images-bing"){
 | |
| 				
 | |
| 				// ignore ads and !! suggestions !! @todo
 | |
| 				continue;
 | |
| 			}
 | |
| 			
 | |
| 			foreach($category["results"] as $image){
 | |
| 				
 | |
| 				$out["image"][] = [
 | |
| 					"title" => $this->titledots($image["title"]),
 | |
| 					"source" => [
 | |
| 						[
 | |
| 							"url" => $this->unshitimage($image["clickUrl"]),
 | |
| 							"width" => (int)$image["width"],
 | |
| 							"height" => (int)$image["height"]
 | |
| 						],
 | |
| 						[
 | |
| 							"url" => $this->unshitimage($image["thumbnailUrl"]),
 | |
| 							"width" => (int)$image["thumbnailWidth"],
 | |
| 							"height" => (int)$image["thumbnailHeight"]
 | |
| 						]
 | |
| 					],
 | |
| 					"url" => $image["altClickUrl"]
 | |
| 				];
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		return $out;
 | |
| 	}
 | |
| 	
 | |
| 	public function video($get){
 | |
| 		
 | |
| 		if($get["npt"]){
 | |
| 			
 | |
| 			[$post, $proxy] = $this->backend->get($get["npt"], "videos");
 | |
| 			
 | |
| 			try{
 | |
| 				$html = $this->get(
 | |
| 					$proxy,
 | |
| 					"https://www.startpage.com/sp/search",
 | |
| 					$post,
 | |
| 					true
 | |
| 				);
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 			
 | |
| 		}else{
 | |
| 			
 | |
| 			$search = $get["s"];
 | |
| 			if(strlen($search) === 0){
 | |
| 				
 | |
| 				throw new Exception("Search term is empty!");
 | |
| 			}
 | |
| 			
 | |
| 			try{
 | |
| 				
 | |
| 				$proxy = $this->backend->get_ip();
 | |
| 				
 | |
| 				$params = [
 | |
| 					"query" => $get["s"],
 | |
| 					"cat" => "video",
 | |
| 					"pl" => "opensearch"
 | |
| 				];
 | |
| 				
 | |
| 				if($get["nsfw"] == "no"){
 | |
| 					
 | |
| 					$params["qadf"] = "heavy";
 | |
| 				}
 | |
| 				
 | |
| 				if($get["sort"] != "relevance"){
 | |
| 					
 | |
| 					$params["sort_by"] = $get["sort"];
 | |
| 				}
 | |
| 				
 | |
| 				if($get["duration"] != "any"){
 | |
| 					
 | |
| 					$params["with_duration"] = $get["duration"];
 | |
| 				}
 | |
| 				
 | |
| 				try{
 | |
| 					$html = $this->get(
 | |
| 						$proxy,
 | |
| 						"https://www.startpage.com/sp/search",
 | |
| 						$params
 | |
| 					);
 | |
| 				}catch(Exception $error){
 | |
| 					
 | |
| 					throw new Exception("Failed to fetch search page");
 | |
| 				}
 | |
| 				//$html = file_get_contents("scraper/startpage.html");
 | |
| 				
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		$this->detect_captcha($html);
 | |
| 		
 | |
| 		if(
 | |
| 			preg_match(
 | |
| 				'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),?$/m',
 | |
| 				$html,
 | |
| 				$matches
 | |
| 			) === 0
 | |
| 		){
 | |
| 			
 | |
| 			throw new Exception("Failed to get JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$json = json_decode($matches[1], true);
 | |
| 		
 | |
| 		if($json === null){
 | |
| 			
 | |
| 			throw new Exception("Failed to decode JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$out = [
 | |
| 			"status" => "ok",
 | |
| 			"npt" => null,
 | |
| 			"video" => [],
 | |
| 			"author" => [],
 | |
| 			"livestream" => [],
 | |
| 			"playlist" => [],
 | |
| 			"reel" => []
 | |
| 		];
 | |
| 		
 | |
| 		// get npt
 | |
| 		$out["npt"] = $this->parse_npt($json, "video", $proxy);
 | |
| 		
 | |
| 		// get results
 | |
| 		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | |
| 			
 | |
| 			if($category["display_type"] == "video-youtube"){
 | |
| 				
 | |
| 				foreach($category["results"] as $video){
 | |
| 					
 | |
| 					if(
 | |
| 						isset($video["thumbnailUrl"]) &&
 | |
| 						$video["thumbnailUrl"] !== null
 | |
| 					){
 | |
| 						
 | |
| 						$thumb = [
 | |
| 							"ratio" => "16:9",
 | |
| 							"url" => $this->unshitimage($video["thumbnailUrl"])
 | |
| 						];
 | |
| 					}else{
 | |
| 						
 | |
| 						$thumb = [
 | |
| 							"ratio" => null,
 | |
| 							"url" => null
 | |
| 						];
 | |
| 					}
 | |
| 					
 | |
| 					$out["video"][] = [
 | |
| 						"title" => $video["title"],
 | |
| 						"description" => $this->limitstrlen($video["description"]),
 | |
| 						"author" => [
 | |
| 							"name" => $video["channelTitle"],
 | |
| 							"url" => null,
 | |
| 							"avatar" => null
 | |
| 						],
 | |
| 						"date" => strtotime($video["publishDate"]),
 | |
| 						"duration" => $this->hms2int($video["duration"]),
 | |
| 						"views" => (int)$video["viewCount"],
 | |
| 						"thumb" => $thumb,
 | |
| 						"url" => $video["clickUrl"]
 | |
| 					];
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		return $out;
 | |
| 	}
 | |
| 	
 | |
| 	public function news($get){
 | |
| 		
 | |
| 		if($get["npt"]){
 | |
| 			
 | |
| 			[$post, $proxy] = $this->backend->get($get["npt"], "news");
 | |
| 			
 | |
| 			try{
 | |
| 				$html = $this->get(
 | |
| 					$proxy,
 | |
| 					"https://www.startpage.com/sp/search",
 | |
| 					$post,
 | |
| 					true
 | |
| 				);
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 			
 | |
| 		}else{
 | |
| 			
 | |
| 			$search = $get["s"];
 | |
| 			if(strlen($search) === 0){
 | |
| 				
 | |
| 				throw new Exception("Search term is empty!");
 | |
| 			}
 | |
| 			
 | |
| 			try{
 | |
| 				
 | |
| 				$proxy = $this->backend->get_ip();
 | |
| 				
 | |
| 				$params = [
 | |
| 					"query" => $get["s"],
 | |
| 					"cat" => "news",
 | |
| 					"pl" => "opensearch"
 | |
| 				];
 | |
| 								
 | |
| 				if($get["nsfw"] == "no"){
 | |
| 					
 | |
| 					$params["qadf"] = "heavy";
 | |
| 				}
 | |
| 				
 | |
| 				if($get["time"] != "any"){
 | |
| 					
 | |
| 					$params["with_date"] = $get["time"];
 | |
| 				}
 | |
| 				
 | |
| 				try{
 | |
| 					$html = $this->get(
 | |
| 						$proxy,
 | |
| 						"https://www.startpage.com/sp/search",
 | |
| 						$params
 | |
| 					);
 | |
| 				}catch(Exception $error){
 | |
| 					
 | |
| 					throw new Exception("Failed to fetch search page");
 | |
| 				}
 | |
| 				//$html = file_get_contents("scraper/startpage.html");
 | |
| 				
 | |
| 			}catch(Exception $error){
 | |
| 				
 | |
| 				throw new Exception("Failed to fetch search page");
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		$this->detect_captcha($html);
 | |
| 		
 | |
| 		if(
 | |
| 			preg_match(
 | |
| 				'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),?$/m',
 | |
| 				$html,
 | |
| 				$matches
 | |
| 			) === 0
 | |
| 		){
 | |
| 			
 | |
| 			throw new Exception("Failed to get JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$json = json_decode($matches[1], true);
 | |
| 		
 | |
| 		if($json === null){
 | |
| 			
 | |
| 			throw new Exception("Failed to decode JSON object");
 | |
| 		}
 | |
| 		
 | |
| 		$out = [
 | |
| 			"status" => "ok",
 | |
| 			"npt" => null,
 | |
| 			"news" => []
 | |
| 		];
 | |
| 		
 | |
| 		// get npt
 | |
| 		$out["npt"] = $this->parse_npt($json, "news", $proxy);
 | |
| 		
 | |
| 		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
 | |
| 			
 | |
| 			if($category["display_type"] != "news-bing"){
 | |
| 				
 | |
| 				// unsupported category
 | |
| 				continue;
 | |
| 			}
 | |
| 			
 | |
| 			foreach($category["results"] as $news){
 | |
| 				
 | |
| 				if(
 | |
| 					isset($news["thumbnailUrl"]) &&
 | |
| 					$news["thumbnailUrl"] !== null
 | |
| 				){
 | |
| 					
 | |
| 					$thumb = [
 | |
| 						"ratio" => "16:9",
 | |
| 						"url" => $this->unshitimage($news["thumbnailUrl"])
 | |
| 					];
 | |
| 				}else{
 | |
| 					
 | |
| 					$thumb = [
 | |
| 						"ratio" => null,
 | |
| 						"url" => null
 | |
| 					];
 | |
| 				}
 | |
| 				
 | |
| 				$out["news"][] = [
 | |
| 					"title" => $this->titledots($this->remove_penguins($news["title"])),
 | |
| 					"author" => $news["source"],
 | |
| 					"description" => $this->titledots($this->remove_penguins($news["description"])),
 | |
| 					"date" => (int)substr((string)$news["date"], 0, -3),
 | |
| 					"thumb" => $thumb,
 | |
| 					"url" => $news["clickUrl"]
 | |
| 				];
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		return $out;
 | |
| 	}
 | |
| 	
 | |
| 	private function parse_npt($json, $pagetype, $proxy){
 | |
| 		
 | |
| 		foreach($json["render"]["presenter"]["pagination"]["pages"] as $page){
 | |
| 			
 | |
| 			if($page["name"] == "Next"){
 | |
| 				
 | |
| 				parse_str(
 | |
| 					explode(
 | |
| 						"?",
 | |
| 						$page["url"],
 | |
| 						2
 | |
| 					)[1],
 | |
| 					$str
 | |
| 				);
 | |
| 				
 | |
| 				return
 | |
| 					$this->backend->store(
 | |
| 						http_build_query(
 | |
| 							[
 | |
| 								"lui" => "english",
 | |
| 								"language" => "english",
 | |
| 								"query" => $str["q"],
 | |
| 								"cat" => $pagetype,
 | |
| 								"sc" => $str["sc"],
 | |
| 								"t" => "device",
 | |
| 								"segment" => "startpage.udog",
 | |
| 								"page" => $str["page"]
 | |
| 							]
 | |
| 						),
 | |
| 						$pagetype,
 | |
| 						$proxy
 | |
| 					);
 | |
| 				
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		return null;
 | |
| 	}
 | |
| 	
 | |
| 	private function unshitimage($url){
 | |
| 		
 | |
| 		$query = parse_url($url, PHP_URL_QUERY);
 | |
| 		parse_str($query, $query);
 | |
| 		
 | |
| 		if(isset($query["piurl"])){
 | |
| 			
 | |
| 			if(strpos($query["piurl"], "gstatic.com/")){
 | |
| 				
 | |
| 				return
 | |
| 					explode(
 | |
| 						"&",
 | |
| 						$query["piurl"],
 | |
| 						2
 | |
| 					)[0];
 | |
| 			}
 | |
| 			
 | |
| 			if(
 | |
| 				strpos($query["piurl"], "bing.net/") ||
 | |
| 				strpos($query["piurl"], "bing.com/")
 | |
| 			){
 | |
| 				
 | |
| 				return
 | |
| 					explode(
 | |
| 						"&",
 | |
| 						$query["piurl"],
 | |
| 						2
 | |
| 					)[0];
 | |
| 			}
 | |
| 			
 | |
| 			return $query["piurl"];
 | |
| 		}
 | |
| 		
 | |
| 		return $url;
 | |
| 	}
 | |
| 	
 | |
| 	private function limitstrlen($text){
 | |
| 		
 | |
| 		return
 | |
| 			explode(
 | |
| 				"\n",
 | |
| 				wordwrap(
 | |
| 					str_replace(
 | |
| 						["\n\r", "\r\n", "\n", "\r"],
 | |
| 						" ",
 | |
| 						$text
 | |
| 					),
 | |
| 					300,
 | |
| 					"\n"
 | |
| 				),
 | |
| 				2
 | |
| 			)[0];
 | |
| 	}
 | |
| 	
 | |
| 	private function titledots($title){
 | |
| 		
 | |
| 		return trim($title, " .\t\n\r\0\x0B…");
 | |
| 	}
 | |
| 	
 | |
| 	private function hms2int($time){
 | |
| 		
 | |
| 		$parts = explode(":", $time, 3);
 | |
| 		$time = 0;
 | |
| 		
 | |
| 		if(count($parts) === 3){
 | |
| 			
 | |
| 			// hours
 | |
| 			$time = $time + ((int)$parts[0] * 3600);
 | |
| 			array_shift($parts);
 | |
| 		}
 | |
| 		
 | |
| 		if(count($parts) === 2){
 | |
| 			
 | |
| 			// minutes
 | |
| 			$time = $time + ((int)$parts[0] * 60);
 | |
| 			array_shift($parts);
 | |
| 		}
 | |
| 		
 | |
| 		// seconds
 | |
| 		$time = $time + (int)$parts[0];
 | |
| 		
 | |
| 		return $time;
 | |
| 	}
 | |
| 	
 | |
| 	private function remove_penguins($text){
 | |
| 		
 | |
| 		return str_replace(
 | |
| 			["", ""],
 | |
| 			"",
 | |
| 			$text
 | |
| 		);
 | |
| 	}
 | |
| 	
 | |
| 	private function detect_captcha($html){
 | |
| 		
 | |
| 		$this->fuckhtml->load($html);
 | |
| 		
 | |
| 		$title =
 | |
| 			$this->fuckhtml
 | |
| 			->getElementsByTagName(
 | |
| 				"title"
 | |
| 			);
 | |
| 		
 | |
| 		if(
 | |
| 			count($title) !== 0 &&
 | |
| 			$title[0]["innerHTML"] == "Redirecting..."
 | |
| 		){
 | |
| 			
 | |
| 			// check if it's a captcha
 | |
| 			$as =
 | |
| 				$this->fuckhtml
 | |
| 				->getElementsByTagName(
 | |
| 					"a"
 | |
| 				);
 | |
| 			
 | |
| 			foreach($as as $a){
 | |
| 				
 | |
| 				if(
 | |
| 					strpos(
 | |
| 						$this->fuckhtml
 | |
| 						->getTextContent(
 | |
| 							$a["innerHTML"]
 | |
| 						),
 | |
| 						"https://www.startpage.com/sp/captcha"
 | |
| 					) !== false
 | |
| 				){
 | |
| 					
 | |
| 					throw new Exception("Startpage returned a captcha");
 | |
| 				}
 | |
| 			}
 | |
| 			
 | |
| 			throw new Exception("Startpage redirected the scraper to an unhandled page");
 | |
| 		}
 | |
| 	}
 | |
| }
 |