forked from lolcat/4get
		
	
		
			
				
	
	
		
			237 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			237 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
class mwmbl{
 | 
						|
	
 | 
						|
	public function __construct(){
 | 
						|
		
 | 
						|
		include "lib/backend.php";
 | 
						|
		$this->backend = new backend("mwmbl");
 | 
						|
		
 | 
						|
		include "lib/fuckhtml.php";
 | 
						|
		$this->fuckhtml = new fuckhtml();
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function getfilters($page){
 | 
						|
		
 | 
						|
		return [];
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function get($proxy, $url, $get = []){
 | 
						|
		
 | 
						|
		$curlproc = curl_init();
 | 
						|
		
 | 
						|
		if($get !== []){
 | 
						|
			$get = http_build_query($get);
 | 
						|
			$url .= "?" . $get;
 | 
						|
		}
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_URL, $url);
 | 
						|
		
 | 
						|
		// use http2
 | 
						|
		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
 | 
						|
		curl_setopt($curlproc, CURLOPT_HTTPHEADER,
 | 
						|
			["User-Agent: " . config::USER_AGENT,
 | 
						|
			"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 | 
						|
			"Accept-Language: en-US,en;q=0.5",
 | 
						|
			"Accept-Encoding: gzip",
 | 
						|
			"Referer: https://beta.mwmbl.org/",
 | 
						|
			"DNT: 1",
 | 
						|
			"Sec-GPC: 1",
 | 
						|
			"Connection: keep-alive",
 | 
						|
			"Upgrade-Insecure-Requests: 1",
 | 
						|
			"Sec-Fetch-Dest: document",
 | 
						|
			"Sec-Fetch-Mode: navigate",
 | 
						|
			"Sec-Fetch-Site: same-origin",
 | 
						|
			"Priority: u=0, i",
 | 
						|
			"Sec-Fetch-User: ?1"]
 | 
						|
		);
 | 
						|
		
 | 
						|
		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
 | 
						|
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
 | 
						|
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
 | 
						|
		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
 | 
						|
		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); // @todo reset
 | 
						|
		
 | 
						|
		$this->backend->assign_proxy($curlproc, $proxy);
 | 
						|
		
 | 
						|
		$data = curl_exec($curlproc);
 | 
						|
		
 | 
						|
		if(curl_errno($curlproc)){
 | 
						|
			
 | 
						|
			throw new Exception(curl_error($curlproc));
 | 
						|
		}
 | 
						|
		
 | 
						|
		curl_close($curlproc);
 | 
						|
		return $data;
 | 
						|
	}
 | 
						|
	
 | 
						|
	public function web($get){
 | 
						|
		
 | 
						|
		$search = $get["s"];
 | 
						|
		if(strlen($search) === 0){
 | 
						|
			
 | 
						|
			throw new Exception("Search term is empty!");
 | 
						|
		}
 | 
						|
		
 | 
						|
		try{
 | 
						|
			$html = $this->get(
 | 
						|
				$this->backend->get_ip(), // no next page!
 | 
						|
				"https://beta.mwmbl.org/",
 | 
						|
				[
 | 
						|
					"q" => $search
 | 
						|
				]
 | 
						|
			);
 | 
						|
		}catch(Exception $error){
 | 
						|
			
 | 
						|
			throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
 | 
						|
		}
 | 
						|
		
 | 
						|
		$out = [
 | 
						|
			"status" => "ok",
 | 
						|
			"spelling" => [
 | 
						|
				"type" => "no_correction",
 | 
						|
				"using" => null,
 | 
						|
				"correction" => null
 | 
						|
			],
 | 
						|
			"npt" => null,
 | 
						|
			"answer" => [],
 | 
						|
			"web" => [],
 | 
						|
			"image" => [],
 | 
						|
			"video" => [],
 | 
						|
			"news" => [],
 | 
						|
			"related" => []
 | 
						|
		];
 | 
						|
		
 | 
						|
		$this->fuckhtml->load($html);
 | 
						|
		
 | 
						|
		$results =
 | 
						|
			$this->fuckhtml
 | 
						|
			->getElementsByClassName(
 | 
						|
				"result",
 | 
						|
				"li"
 | 
						|
			);
 | 
						|
		
 | 
						|
		foreach($results as $result){
 | 
						|
			
 | 
						|
			$this->fuckhtml->load($result);
 | 
						|
			
 | 
						|
			$p =
 | 
						|
				$this->fuckhtml
 | 
						|
				->getElementsByTagName("p");
 | 
						|
			
 | 
						|
			$sublinks = [];
 | 
						|
			
 | 
						|
			$mores =
 | 
						|
				$this->fuckhtml
 | 
						|
				->getElementsByClassName(
 | 
						|
					"result-link-more",
 | 
						|
					"div"
 | 
						|
				);
 | 
						|
			
 | 
						|
			foreach($mores as $more){
 | 
						|
				
 | 
						|
				$this->fuckhtml->load($more);
 | 
						|
				
 | 
						|
				$as =
 | 
						|
					$this->fuckhtml
 | 
						|
					->getElementsByClassName(
 | 
						|
						"more",
 | 
						|
						"a"
 | 
						|
					);
 | 
						|
				
 | 
						|
				if(count($as) === 0){
 | 
						|
					
 | 
						|
					// ?? invalid
 | 
						|
					continue;
 | 
						|
				}
 | 
						|
				
 | 
						|
				$sublinks[] = [
 | 
						|
					"title" =>
 | 
						|
						$this->titledots(
 | 
						|
							$this->fuckhtml
 | 
						|
							->getTextContent(
 | 
						|
								$this->fuckhtml
 | 
						|
								->getElementsByClassName(
 | 
						|
									"more-title",
 | 
						|
									"span"
 | 
						|
								)[0]
 | 
						|
							)
 | 
						|
						),
 | 
						|
					"description" =>
 | 
						|
						$this->titledots(
 | 
						|
							$this->fuckhtml
 | 
						|
							->getTextContent(
 | 
						|
								$this->fuckhtml
 | 
						|
								->getElementsByClassName(
 | 
						|
									"more-extract",
 | 
						|
									"span"
 | 
						|
								)[0]
 | 
						|
							)
 | 
						|
						),
 | 
						|
					"url" =>
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$as[0]
 | 
						|
							["attributes"]
 | 
						|
							["href"]
 | 
						|
						)
 | 
						|
				];
 | 
						|
			}
 | 
						|
			
 | 
						|
			// reset
 | 
						|
			$this->fuckhtml->load($result);
 | 
						|
			
 | 
						|
			$out["web"][] = [
 | 
						|
				"title" =>
 | 
						|
					$this->titledots(
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$this->fuckhtml
 | 
						|
							->getElementsByClassName(
 | 
						|
								"title",
 | 
						|
								$p
 | 
						|
							)[0]
 | 
						|
						)
 | 
						|
					),
 | 
						|
				"description" =>
 | 
						|
					$this->titledots(
 | 
						|
						$this->fuckhtml
 | 
						|
						->getTextContent(
 | 
						|
							$this->fuckhtml
 | 
						|
							->getElementsByClassName(
 | 
						|
								"extract",
 | 
						|
								$p
 | 
						|
							)[0]
 | 
						|
						)
 | 
						|
					),
 | 
						|
				"url" =>
 | 
						|
					$this->fuckhtml
 | 
						|
					->getTextContent(
 | 
						|
						$this->fuckhtml
 | 
						|
						->getElementsByTagName("a")
 | 
						|
						[0]
 | 
						|
						["attributes"]
 | 
						|
						["href"]
 | 
						|
					),
 | 
						|
				"date" => null,
 | 
						|
				"type" => "web",
 | 
						|
				"thumb" => [
 | 
						|
					"url" => null,
 | 
						|
					"ratio" => null
 | 
						|
				],
 | 
						|
				"sublink" => $sublinks,
 | 
						|
				"table" => []
 | 
						|
			];
 | 
						|
		}
 | 
						|
		
 | 
						|
		return $out;
 | 
						|
	}
 | 
						|
	
 | 
						|
	private function titledots($title){
 | 
						|
		
 | 
						|
		return rtrim($title, "…");
 | 
						|
	}
 | 
						|
}
 |