<?php

class startpage{
	
	public function __construct(){
		
		include "lib/backend.php";
		$this->backend = new backend("startpage");
		
		include "lib/fuckhtml.php";
		$this->fuckhtml = new fuckhtml();
	}
	
	public function getfilters($page){
		
		switch($page){
			case "web":
				return [
					"country" => [
						"display" => "Country",
						"option" => [
							"any" => "All Regions",
							"es_AR" => "Argentina",
							"en_AU" => "Australia",
							"de_AT" => "Austria",
							"ru_BY" => "Belarus",
							"fr_BE" => "Belgium (FR)",
							"nl_BE" => "Belgium (NL)",
							"bg_BG" => "Bulgaria",
							"en_CA" => "Canada (EN)",
							"fr_CA" => "Canada (FR)",
							"es_CL" => "Chile",
							"es_CO" => "Colombia",
							"cs_CZ" => "Czech Republic",
							"da_DK" => "Denmark",
							"ar_EG" => "Egypt",
							"et_EE" => "Estonia",
							"fi_FI" => "Finland",
							"fr_FR" => "France",
							"de_DE" => "Germany",
							"el_GR" => "Greece",
							"hu_HU" => "Hungary",
							"hi_IN" => "India (HI)",
							"en_IN" => "India (EN)",
							"id_ID" => "Indonesia (ID)",
							"en_ID" => "Indonesia (EN)",
							"en_IE" => "Ireland",
							"it_IT" => "Italy",
							"ja_JP" => "Japan",
							"ko_KR" => "Korea",
							"ms_MY" => "Malaysia (MS)",
							"en_MY" => "Malaysia (EN)",
							"es_MX" => "Mexico",
							"nl_NL" => "Netherlands",
							"en_NZ" => "New Zealand",
							"no_NO" => "Norway",
							"es_PE" => "Peru",
							"fil_PH" => "Philippines (FIL)",
							"en_PH" => "Philippines (EN)",
							"pl_PL" => "Poland",
							"pt_PT" => "Portugal",
							"ro_RO" => "Romania",
							"ru_RU" => "Russia",
							"ms_SG" => "Singapore (MS)",
							"en_SG" => "Singapore (EN)",
							"es_ES" => "Spain (ES)",
							"ca_ES" => "Spain (CA)",
							"sv_SE" => "Sweden",
							"de_CH" => "Switzerland (DE)",
							"fr_CH" => "Switzerland (FR)",
							"it_CH" => "Switzerland (IT)",
							"tr_TR" => "Turkey",
							"uk_UA" => "Ukraine",
							"en_US" => "US (EN)",
							"es_US" => "US (ES)",
							"es_UY" => "Uruguay",
							"es_VE" => "Venezuela",
							"vi_VN" => "Vietnam (VI)",
							"en_VN" => "Vietnam (EN)",
							"en_ZA" => "South Africa"
						]
					],
					"nsfw" => [ // qadf
						"display" => "NSFW",
						"option" => [
							"yes" => "Yes", // qadf=none
							"no" => "No" // qadf=heavy
						]
					],
					"time" => [ // with_date
						"display" => "Time fetched",
						"option" => [
							"any" => "Any time",
							"d" => "Past 24 hours",
							"w" => "Past week",
							"m" => "Past month",
							"y" => "Past year",
						]
					],
					"extendedsearch" => [
						// undefined display, so it wont show in frontend
						"option" => [
							"yes" => "Yes",
							"no" => "No"
						]
					]
				];
				break;
		}
	}
	
	private function get($proxy, $url, $get = [], $post = false, $is_xhr = false){
		
		$curlproc = curl_init();
		
		if($post === true){
			
			curl_setopt($curlproc, CURLOPT_POST, true);
			curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
			
		}elseif($get !== []){
			
			$get = http_build_query($get);
			$url .= "?" . $get;
		}
		
		curl_setopt($curlproc, CURLOPT_URL, $url);
		
		// http2 bypass
		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
		
		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
		
		if($is_xhr === true){
			
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
				["User-Agent: " . config::USER_AGENT,
				"Accept: application/json",
				"Accept-Language: en-US,en;q=0.5",
				"Accept-Encoding: gzip",
				"Referer: https://www.startpage.com/",
				"Content-Type: application/json",
				"Content-Length: " . strlen($get),
				"Origin: https://www.startpage.com/",
				"DNT: 1",
				"Connection: keep-alive",
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
				"Sec-Fetch-Dest: empty",
				"Sec-Fetch-Mode: cors",
				"Sec-Fetch-Site: same-origin",
				"TE: trailers"]
			);
			
		}elseif($post === true){
			
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
				["User-Agent: " . config::USER_AGENT,
				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
				"Accept-Language: en-US,en;q=0.5",
				"Accept-Encoding: gzip",
				"Referer: https://www.startpage.com/",
				"Content-Type: application/x-www-form-urlencoded",
				"Content-Length: " . strlen($get),
				"DNT: 1",
				"Connection: keep-alive",
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
				"Upgrade-Insecure-Requests: 1",
				"Sec-Fetch-Dest: document",
				"Sec-Fetch-Mode: navigate",
				"Sec-Fetch-Site: none",
				"Sec-Fetch-User: ?1",
				"Priority: u=0, i",
				"TE: trailers"]
			);
		}else{
			
			curl_setopt($curlproc, CURLOPT_HTTPHEADER,
				["User-Agent: " . config::USER_AGENT,
				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
				"Accept-Language: en-US,en;q=0.5",
				"Accept-Encoding: gzip",
				"DNT: 1",
				"Connection: keep-alive",
				"Cookie: preferences=date_timeEEEworldN1Ndisable_family_filterEEE1N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE0N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fdevice%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE20N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius",
				"Sec-Fetch-Dest: document",
				"Sec-Fetch-Mode: navigate",
				"Sec-Fetch-Site: none",
				"Sec-Fetch-User: ?1",
				"Priority: u=0, i",
				"TE: trailers"]
			);
		}
		
		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
		
		$this->backend->assign_proxy($curlproc, $proxy);
		
		$data = curl_exec($curlproc);
		
		if(curl_errno($curlproc)){
			
			throw new Exception(curl_error($curlproc));
		}
		
		curl_close($curlproc);
		return $data;
	}
	
	public function web($get){
		
		if($get["npt"]){
			
			[$post, $proxy] = $this->backend->get($get["npt"], "web");
			
			try{
				$html = $this->get(
					$proxy,
					"https://www.startpage.com/sp/search",
					$post,
					true
				);
			}catch(Exception $error){
				
				throw new Exception("Failed to fetch search page");
			}
			
			$get_instant_answer = false;
			
		}else{
			
			$proxy = $this->backend->get_ip();
			
			$params = [
				"query" => $get["s"],
				"cat" => "web",
				"pl" => "opensearch"
			];
			
			if($get["nsfw"] == "no"){
				
				$params["qadf"] = "heavy";
				$get_instant_answer = false;
			}else{
				
				$get_instant_answer = true;
			}
			
			if($get["country"] !== "any"){
				
				$params["qsr"] = $get["country"];
			}
			
			if($get["time"] !== "any"){
				
				$params["with_date"] = $get["time"];
			}
			
			try{
				$html = $this->get(
					$proxy,
					"https://www.startpage.com/sp/search",
					$params
				);
			}catch(Exception $error){
				
				throw new Exception("Failed to fetch search page");
			}
			
			//$html = file_get_contents("scraper/startpage.html");
		}
		
		if(
			preg_match(
				'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),$/m',
				$html,
				$matches
			) === 0
		){
			
			throw new Exception("Failed to grep JSON object");
		}
		
		$json = json_decode($matches[1], true);
		
		if($json === null){
			
			throw new Exception("Failed to decode JSON");
		}
		
		$out = [
			"status" => "ok",
			"spelling" => [
				"type" => "no_correction",
				"using" => null,
				"correction" => null
			],
			"npt" => null,
			"answer" => [],
			"web" => [],
			"image" => [],
			"video" => [],
			"news" => [],
			"related" => []
		];
		
		// get npt
		foreach($json["render"]["presenter"]["pagination"]["pages"] as $page){
			
			if($page["name"] == "Next"){
				
				parse_str(
					explode(
						"?",
						$page["url"],
						2
					)[1],
					$str
				);
				
				$out["npt"] =
					$this->backend->store(
						http_build_query(
							[
								"lui" => "english",
								"language" => "english",
								"query" => $str["q"],
								"cat" => "web",
								"sc" => $str["sc"],
								"t" => "device",
								"segment" => "startpage.udog",
								"page" => $str["page"]
							]
						),
						"web",
						$proxy
					);
				
				break;
			}
		}
		
		foreach($json["render"]["presenter"]["regions"]["mainline"] as $category){
			
			if(!isset($category["display_type"])){
				
				continue;
			}
			
			switch($category["display_type"]){
				
				case "web-google":
					foreach($category["results"] as $result){
						
						$sublinks = [];
						
						foreach($result["siteLinks"] as $sublink){
							
							$sublinks[] = [
								"title" => $sublink["title"],
								"description" => null,
								"url" => $sublink["clickUrl"]
							];
						}
						
						$description =
							explode(
								"...",
								$this->titledots(
									html_entity_decode(
										$this->fuckhtml
										->getTextContent(
											$result["description"]
										)
									)
								),
								2
							);
						
						$date = strtotime(trim($description[0]));
						
						if(
							$date === false ||
							count($description) !== 2 ||
							strlen($description[0]) > 14
						){
							
							// no date found
							$description =
								implode(
									" ... ",
									$description
								);
							
							$date = null;
						}else{
							
							// date found
							$description = ltrim($description[1]);
						}
						
						$out["web"][] = [
							"title" =>
								$this->titledots(
									html_entity_decode(
										$this->fuckhtml
										->getTextContent(
											$result["title"]
										)
									)
								),
							"description" => $description,
							"url" => $result["clickUrl"],
							"date" => $date,
							"type" => "web",
							"thumb" => [
								"url" => null,
								"ratio" => null
							],
							"sublink" => $sublinks,
							"table" => []
						];
					}
					break;
				
				case "images-qi-top":
					foreach($category["results"] as $result){
						
						$out["image"][] = [
							"title" =>
								$this->titledots(
									html_entity_decode(
										$this->fuckhtml
										->getTextContent(
											$result["title"]
										)
									)
								),
							"source" => [
								[
									"url" => $result["rawImageUrl"],
									"width" => (int)$result["width"],	
									"height" => (int)$result["height"]
								],
								[
									"url" => $this->unshitimage($result["mdThumbnailUrl"]),
									"width" => (int)$result["mdThumbnailWidth"],
									"height" => (int)$result["mdThumbnailHeight"]
								]
							],
							"url" =>
								$result["altClickUrl"]
						];
					}
					break;
			}
		}
		
		// parse instant answers
		if(
			$get["extendedsearch"] == "yes" &&
			$get_instant_answer === true
		){
			
			// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
			try{
				$post = [
					"se" => "n0vze2y9dqwy",
					"q" => $json["render"]["query"],
					"results" => [], // populate
					"enableKnowledgePanel" => true,
					"enableMediaThumbBar" => false,
					"enableSearchSuggestions" => false,
					"enableTripadvisorProperties" => [],
					"enableTripadvisorPlaces" => [],
					"enableTripadvisorPlacesForLocations" => [],
					"enableWebProducts" => false,
					"tripadvisorPartnerId" => null,
					"tripadvisorMapColorMode" => "light",
					"tripadvisorDisablesKnowledgePanel" => false,
					"instantAnswers" => [
						"smartAnswers",
						"youtube",
						"tripadvisor"
					],
					"iaType" => null,
					"forceEnhancedKnowledgePanel" => false,
					"shoppingOnly" => false,
					"allowAdultProducts" => true,
					"lang" => "en",
					"browserLang" => "en-US",
					"browserTimezone" => "America/New_York",
					"market" => null,
					"userLocation" => null,
					"userDate" => date("Y-m-d"),
					"userAgentType" => "unknown"
				];
				
				foreach($out["web"] as $result){
					
					$post["results"][] = [
						"url" => $result["url"],
						"title" => $result["title"]
					];
				}
				
				$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
				
				$additional_data =
					$this->get(
						$proxy,
						"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
						$post,
						true,
						true
					);
				
				$additional_data = json_decode($additional_data, true);
				
				if($additional_data === null){
					
					throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
				}
				
				if(!isset($additional_data["knowledgePanel"])){
					
					throw new Exception("Response has missing data (knowledgePanel)");
				}
				
				$additional_data = $additional_data["knowledgePanel"];
				
				$answer = [
					"title" => $additional_data["meta"]["title"],
					"description" => [
						[
							"type" => "quote",
							"value" => $additional_data["meta"]["description"]
						]
					],
					"url" => $additional_data["meta"]["origWikiUrl"],
					"thumb" => $additional_data["meta"]["image"],
					"table" => [],
					"sublink" => []
				];
				
				// parse html for instant answer
				$this->fuckhtml->load($additional_data["html"]);
				
				$div =
					$this->fuckhtml
					->getElementsByTagName(
						"div"
					);
				
				// get description
				$description =
					$this->fuckhtml
					->getElementsByClassName(
						"sx-kp-short-extract sx-kp-short-extract-complete",
						$div
					);
				
				if(count($description) !== 0){
					
					$answer["description"][] = [
						"type" => "text",
						"value" =>
							$this->fuckhtml
							->getTextContent(
								$description[0]
							)
					];
				}
				
				// get socials
				$socials =
					$this->fuckhtml
					->getElementsByClassName(
						"sx-wiki-social-link",
						"a"
					);
				
				foreach($socials as $social){
					
					$title =
						$this->fuckhtml
						->getTextContent(
							$social["attributes"]["title"]
						);
					
					$url =
						$this->fuckhtml
						->getTextContent(
							$social["attributes"]["href"]
						);
					
					switch($title){
						
						case "Official Website":
							$title = "Website";
							break;
					}
					
					$answer["sublink"][$title] = $url;
				}
				
				// get videos
				$videos =
					$this->fuckhtml
					->getElementsByClassName(
						"sx-kp-video-grid-item",
						$div
					);
				
				foreach($videos as $video){
					
					$this->fuckhtml->load($video);
					
					$as =
						$this->fuckhtml
						->getElementsByTagName(
							"a"
						);
					
					if(count($as) === 0){
						
						// ?? invalid
						continue;
					}
					
					$image =
						$this->fuckhtml
						->getElementsByAttributeName(
							"data-sx-src",
							"img"
						);
					
					if(count($image) !== 0){
						
						$thumb = [
							"ratio" => "16:9",
							"url" =>
								$this->fuckhtml
								->getTextContent(
									$image[0]["attributes"]["data-sx-src"]
								)
						];
					}else{

						$thumb = [
							"ratio" => null,
							"url" => null
						];
					}
					
					$out["video"][] = [
						"title" =>
							$this->fuckhtml
							->getTextContent(
								$as[0]["attributes"]["title"]
							),
						"description" => null,
						"date" => null,
						"duration" => null,
						"views" => null,
						"thumb" => $thumb,
						"url" =>
							$this->fuckhtml
							->getTextContent(
								$as[0]["attributes"]["href"]
							)
					];
				}
				
				// reset
				$this->fuckhtml->load($additional_data["html"]);
				
				// get table elements
				$table =
					$this->fuckhtml
					->getElementsByClassName(
						"sx-infobox",
						"table"
					);
				
				if(count($table) !== 0){
					
					$trs =
						$this->fuckhtml
						->getElementsByTagName(
							"tr"
						);
					
					foreach($trs as $tr){
						
						$this->fuckhtml->load($tr);
						
						// ok so startpage devs cant fucking code a table
						// td = content
						// th (AAAHH) = title
						$tds =
							$this->fuckhtml
							->getElementsByTagName(
								"td"	
							);
						
						$ths =
							$this->fuckhtml
							->getElementsByTagName(
								"th"
							);
						
						if(
							count($ths) === 1 &&
							count($tds) === 1
						){
							
							$title =
								$this->fuckhtml
								->getTextContent(
									$ths[0]
								);
							
							$description = [];
							
							$this->fuckhtml->load($tds[0]);
							
							$lis =
								$this->fuckhtml
								->getElementsByTagName(
									"li"
								);
							
							if(count($lis) !== 0){
								
								foreach($lis as $li){
									
									$description[] =
										$this->fuckhtml
										->getTextContent(
											$li
										);
								}
								
								$description = implode(", ", $description);
							}else{
								
								$description =
									$this->fuckhtml
									->getTextContent(
										$tds[0]
									);
							}
							
							$answer["table"][$title] = $description;
						}
					}
				}
				
				$out["answer"][] = $answer;
				
			}catch(Exception $error){
				
				// do nothing
				//echo "error!";
			}
		}
		
		return $out;
	}
	
	private function unshitimage($url){
		
		$query = parse_url($url, PHP_URL_QUERY);
		parse_str($query, $query);
		
		if(isset($query["piurl"])){
			
			if(strpos($query["piurl"], "gstatic.com/")){
				
				return
					explode(
						"&",
						$query["piurl"],
						2
					)[0];
			}
			
			return $query["piurl"];
		}
		
		return $url;
	}
	
	private function titledots($title){
		
		return trim($title, " .\t\n\r\0\x0B…");
	}
}