<?php

class mojeek{
	public function __construct(){
		
		include "lib/fuckhtml.php";
		$this->fuckhtml = new fuckhtml();
		
		include "lib/backend.php";
		$this->backend = new backend("mojeek");
	}
	
	public function getfilters($page){
		
		switch($page){
			
			case "web":
				return [
					"focus" => [
						"display" => "Focus",
						"option" => [
							"any" => "No focus",
							"blogs" => "Blogs",
							"Dictionary" => "Dictionary",
							"Recipes" => "Recipes",
							"Time" => "Time",
							"Weather" => "Weather"
						]
					],
					"lang" => [
						"display" => "Language",
						"option" => [
							"any" => "Any language",
							"af" => "Afrikaans",
							"sq" => "Albanian",
							"an" => "Aragonese",
							"ay" => "Aymara",
							"bi" => "Bislama",
							"br" => "Breton",
							"ca" => "Catalan",
							"kw" => "Cornish",
							"co" => "Corsican",
							"hr" => "Croatian",
							"da" => "Danish",
							"nl" => "Dutch",
							"dz" => "Dzongkha",
							"en" => "English",
							"fj" => "Fijian",
							"fi" => "Finnish",
							"fr" => "French",
							"gd" => "Gaelic",
							"gl" => "Galician",
							"de" => "German",
							"ht" => "Haitian",
							"io" => "Ido",
							"id" => "Indonesian",
							"ia" => "Interlingua",
							"ie" => "Interlingue",
							"ga" => "Irish",
							"it" => "Italian",
							"rw" => "Kinyarwanda",
							"la" => "Latin",
							"li" => "Limburgish",
							"lb" => "Luxembourgish",
							"no" => "Norwegian",
							"nb" => "Norwegian Bokmål",
							"nn" => "Norwegian Nynorsk",
							"oc" => "Occitan (post 1500)",
							"pl" => "Polish",
							"pt" => "Portuguese",
							"rm" => "Romansh",
							"rn" => "Rundi",
							"sg" => "Sango",
							"so" => "Somali",
							"es" => "Spanish",
							"sw" => "Swahili",
							"ss" => "Swati",
							"sv" => "Swedish",
							"ty" => "Tahitian",
							"to" => "Tonga (Tonga Islands)",
							"ts" => "Tsonga",
							"vo" => "Volapük",
							"wa" => "Walloon",
							"cy" => "Welsh",
							"xh" => "Xhosa",
							"zu" => "Zulu"
						]
					],
					"country" => [
						"display" => "Country",
						"option" => [
							"any" => "No location bias",
							"af" => "Afghanistan",
							"ax" => "Åland Islands",
							"al" => "Albania",
							"dz" => "Algeria",
							"as" => "American Samoa",
							"ad" => "Andorra",
							"ao" => "Angola",
							"ai" => "Anguilla",
							"aq" => "Antarctica",
							"ag" => "Antigua and Barbuda",
							"ar" => "Argentina",
							"am" => "Armenia",
							"aw" => "Aruba",
							"au" => "Australia",
							"at" => "Austria",
							"az" => "Azerbaijan",
							"bs" => "Bahamas",
							"bh" => "Bahrain",
							"bd" => "Bangladesh",
							"bb" => "Barbados",
							"by" => "Belarus",
							"be" => "Belgium",
							"bz" => "Belize",
							"bj" => "Benin",
							"bm" => "Bermuda",
							"bt" => "Bhutan",
							"bo" => "Bolivia (Plurinational State of)",
							"bq" => "Bonaire, Sint Eustatius and Saba",
							"ba" => "Bosnia and Herzegovina",
							"bw" => "Botswana",
							"bv" => "Bouvet Island",
							"br" => "Brazil",
							"io" => "British Indian Ocean Territory",
							"bn" => "Brunei Darussalam",
							"bg" => "Bulgaria",
							"bf" => "Burkina Faso",
							"bi" => "Burundi",
							"cv" => "Cabo Verde",
							"kh" => "Cambodia",
							"cm" => "Cameroon",
							"ca" => "Canada",
							"ky" => "Cayman Islands",
							"cf" => "Central African Republic",
							"td" => "Chad",
							"cl" => "Chile",
							"cn" => "China",
							"cx" => "Christmas Island",
							"cc" => "Cocos (Keeling) Islands",
							"co" => "Colombia",
							"km" => "Comoros",
							"cg" => "Congo",
							"cd" => "Congo (Democratic Republic of the)",
							"ck" => "Cook Islands",
							"cr" => "Costa Rica",
							"ci" => "Côte d'Ivoire",
							"hr" => "Croatia",
							"cu" => "Cuba",
							"cw" => "Curaçao",
							"cy" => "Cyprus",
							"cz" => "Czechia",
							"dk" => "Denmark",
							"dj" => "Djibouti",
							"dm" => "Dominica",
							"do" => "Dominican Republic",
							"ec" => "Ecuador",
							"eg" => "Egypt",
							"sv" => "El Salvador",
							"gq" => "Equatorial Guinea",
							"er" => "Eritrea",
							"ee" => "Estonia",
							"et" => "Ethiopia",
							"fk" => "Falkland Islands (Malvinas)",
							"fo" => "Faroe Islands",
							"fj" => "Fiji",
							"fi" => "Finland",
							"fr" => "France",
							"gf" => "French Guiana",
							"pf" => "French Polynesia",
							"tf" => "French Southern Territories",
							"ga" => "Gabon",
							"gm" => "Gambia",
							"ge" => "Georgia",
							"de" => "Germany",
							"gh" => "Ghana",
							"gi" => "Gibraltar",
							"gr" => "Greece",
							"gl" => "Greenland",
							"gd" => "Grenada",
							"gp" => "Guadeloupe",
							"gu" => "Guam",
							"gt" => "Guatemala",
							"gg" => "Guernsey",
							"gn" => "Guinea",
							"gw" => "Guinea-Bissau",
							"gy" => "Guyana",
							"ht" => "Haiti",
							"hm" => "Heard Island and McDonald Islands",
							"va" => "Holy See",
							"hn" => "Honduras",
							"hk" => "Hong Kong",
							"hu" => "Hungary",
							"is" => "Iceland",
							"in" => "India",
							"id" => "Indonesia",
							"ir" => "Iran (Islamic Republic of)",
							"iq" => "Iraq",
							"ie" => "Ireland",
							"im" => "Isle of Man",
							"il" => "Israel",
							"it" => "Italy",
							"jm" => "Jamaica",
							"jp" => "Japan",
							"je" => "Jersey",
							"jo" => "Jordan",
							"kz" => "Kazakhstan",
							"ke" => "Kenya",
							"ki" => "Kiribati",
							"kp" => "Korea (Democratic People's Republic of)",
							"kr" => "Korea (Republic of)",
							"kw" => "Kuwait",
							"kg" => "Kyrgyzstan",
							"la" => "Lao People's Democratic Republic",
							"lv" => "Latvia",
							"lb" => "Lebanon",
							"ls" => "Lesotho",
							"lr" => "Liberia",
							"ly" => "Libya",
							"li" => "Liechtenstein",
							"lt" => "Lithuania",
							"lu" => "Luxembourg",
							"mo" => "Macao",
							"mk" => "Macedonia (the former Yugoslav Republic of)",
							"mg" => "Madagascar",
							"mw" => "Malawi",
							"my" => "Malaysia",
							"mv" => "Maldives",
							"ml" => "Mali",
							"mt" => "Malta",
							"mh" => "Marshall Islands",
							"mq" => "Martinique",
							"mr" => "Mauritania",
							"mu" => "Mauritius",
							"yt" => "Mayotte",
							"mx" => "Mexico",
							"fm" => "Micronesia (Federated States of)",
							"md" => "Moldova (Republic of)",
							"mc" => "Monaco",
							"mn" => "Mongolia",
							"me" => "Montenegro",
							"ms" => "Montserrat",
							"ma" => "Morocco",
							"mz" => "Mozambique",
							"mm" => "Myanmar",
							"na" => "Namibia",
							"nr" => "Nauru",
							"np" => "Nepal",
							"nl" => "Netherlands",
							"nc" => "New Caledonia",
							"nz" => "New Zealand",
							"ni" => "Nicaragua",
							"ne" => "Niger",
							"ng" => "Nigeria",
							"nu" => "Niue",
							"nf" => "Norfolk Island",
							"mp" => "Northern Mariana Islands",
							"no" => "Norway",
							"om" => "Oman",
							"pk" => "Pakistan",
							"pw" => "Palau",
							"ps" => "Palestine, State of",
							"pa" => "Panama",
							"pg" => "Papua New Guinea",
							"py" => "Paraguay",
							"pe" => "Peru",
							"ph" => "Philippines",
							"pn" => "Pitcairn",
							"pl" => "Poland",
							"pt" => "Portugal",
							"pr" => "Puerto Rico",
							"qa" => "Qatar",
							"re" => "Réunion",
							"ro" => "Romania",
							"ru" => "Russian Federation",
							"rw" => "Rwanda",
							"bl" => "Saint Barthélemy",
							"sh" => "Saint Helena, Ascension and Tristan da Cunha",
							"kn" => "Saint Kitts and Nevis",
							"lc" => "Saint Lucia",
							"mf" => "Saint Martin (French part)",
							"pm" => "Saint Pierre and Miquelon",
							"vc" => "Saint Vincent and the Grenadines",
							"ws" => "Samoa",
							"sm" => "San Marino",
							"st" => "Sao Tome and Principe",
							"sa" => "Saudi Arabia",
							"sn" => "Senegal",
							"rs" => "Serbia",
							"sc" => "Seychelles",
							"sl" => "Sierra Leone",
							"sg" => "Singapore",
							"sx" => "Sint Maarten (Dutch part)",
							"sk" => "Slovakia",
							"si" => "Slovenia",
							"sb" => "Solomon Islands",
							"so" => "Somalia",
							"za" => "South Africa",
							"gs" => "South Georgia and South Sandwich Islands",
							"ss" => "South Sudan",
							"es" => "Spain",
							"lk" => "Sri Lanka",
							"sd" => "Sudan",
							"sr" => "Suriname",
							"sj" => "Svalbard and Jan Mayen",
							"sz" => "Swaziland",
							"se" => "Sweden",
							"ch" => "Switzerland",
							"sy" => "Syrian Arab Republic",
							"tw" => "Taiwan",
							"tj" => "Tajikistan",
							"tz" => "Tanzania, United Republic of",
							"th" => "Thailand",
							"tl" => "Timor-Leste",
							"tg" => "Togo",
							"tk" => "Tokelau",
							"to" => "Tonga",
							"tt" => "Trinidad and Tobago",
							"tn" => "Tunisia",
							"tr" => "Turkey",
							"tm" => "Turkmenistan",
							"tc" => "Turks and Caicos Islands",
							"tv" => "Tuvalu",
							"ug" => "Uganda",
							"ua" => "Ukraine",
							"ae" => "United Arab Emirates",
							"gb" => "United Kingdom",
							"us" => "United States of America",
							"um" => "United States Minor Outlying Islands",
							"uy" => "Uruguay",
							"uz" => "Uzbekistan",
							"vu" => "Vanuatu",
							"ve" => "Venezuela (Bolivarian Republic of)",
							"vn" => "Viet Nam",
							"vg" => "Virgin Islands (British)",
							"vi" => "Virgin Islands (U.S.)",
							"wf" => "Wallis and Futuna",
							"eh" => "Western Sahara",
							"ye" => "Yemen",
							"zm" => "Zambia",
							"zw" => "Zimbabwe"
						]
					],
					"region" => [
						"display" => "Region",
						"option" => [
							"any" => "Any region",
							"eu" => "European Union",
							"de" => "Germany",
							"fr" => "France",
							"uk" => "United Kingdom"
						]
					],
					"domain" => [
						"display" => "Results per domain",
						"option" => [
							"1" => "1 result",
							"2" => "2 results",
							"3" => "3 results",
							"4" => "4 results",
							"5" => "5 results",
							"10" => "10 results",
							"0" => "Unlimited",
						]
					]
				];
				break;
			
			case "news":
				return [];
		}
	}
	
	private function get($proxy, $url, $get = []){
		
		$headers = [
			"User-Agent: " . config::USER_AGENT,
			"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
			"Accept-Language: en-US,en;q=0.5",
			"Accept-Encoding: gzip",
			"DNT: 1",
			"Connection: keep-alive",
			"Upgrade-Insecure-Requests: 1",
			"Sec-Fetch-Dest: document",
			"Sec-Fetch-Mode: navigate",
			"Sec-Fetch-Site: none",
			"Sec-Fetch-User: ?1"
		];
		
		$curlproc = curl_init();
		
		if($get !== []){
			$get = http_build_query($get);
			$url .= "?" . $get;
		}
		
		curl_setopt($curlproc, CURLOPT_URL, $url);
		
		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
		curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
		
		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);

		$this->backend->assign_proxy($curlproc, $proxy);
		
		$data = curl_exec($curlproc);
		
		if(curl_errno($curlproc)){
			
			throw new Exception(curl_error($curlproc));
		}
		
		curl_close($curlproc);
		return $data;
	}
	
	public function web($get){
		
		if($get["npt"]){
			
			[$token, $proxy] = $this->backend->get($get["npt"], "web");
			
			try{
				$html =
					$this->get(
						$proxy,
						"https://www.mojeek.com" . $token,
						[]
					);
			}catch(Exception $error){
				
				throw new Exception("Failed to get HTML");
			}
			
		}else{
			
			$search = $get["s"];
			if(strlen($search) === 0){
				
				throw new Exception("Search term is empty!");
			}
			
			$proxy = $this->backend->get_ip();
			$lang = $get["lang"];
			$country = $get["country"];
			$region = $get["region"];
			$domain = $get["domain"];
			$focus = $get["focus"];
			
			$params = [
				"q" => $search,
				"t" => 20, // number of results/page
				"tn" => 7, // number of news results/page
				"date" => 1, // show date
				"tlen" => 128, // max length of title
				"dlen" => 511, // max length of description
				"arc" => ($country == "any" ? "none" : $country) // location. don't use autodetect!
			];
			
			switch($focus){
				
				case "any": break;
				
				case "blogs":
					$params["fmt"] = "sst";
					$params["sst"] = "1";
					break;
				
				default:
					$params["foc_t"] = $focus;
					break;
			}
			
			if($lang != "any"){
				
				$params["lb"] = $lang;
			}
			
			if($region != "any"){
				
				$params["reg"] = $region;
			}
			
			if($domain != "1"){
				
				$params["si"] = $domain;
			}
			
			try{
				$html =
					$this->get(
						$proxy,
						"https://www.mojeek.com/search",
						$params
					);
			}catch(Exception $error){
				
				throw new Exception("Failed to get HTML");
			}
			/*
			$handle = fopen("scraper/mojeek.html", "r");
			$html = fread($handle, filesize("scraper/mojeek.html"));
			fclose($handle);*/
			
		}
		
		$out = [
			"status" => "ok",
			"spelling" => [
				"type" => "no_correction",
				"using" => null,
				"correction" => null
			],
			"npt" => null,
			"answer" => [],
			"web" => [],
			"image" => [],
			"video" => [],
			"news" => [],
			"related" => []
		];
		
		$this->fuckhtml->load($html);
		
		$results =
			$this->fuckhtml
			->getElementsByClassName("results-standard", "ul");
		
		if(count($results) === 0){
			
			return $out;
		}
		
		/*
			Get all search result divs
		*/
		foreach($results as $container){
			
			$this->fuckhtml->load($container);
			$results =
				$this->fuckhtml
				->getElementsByTagName("li");
			
			foreach($results as $result){
				
				$data = [
					"title" => null,
					"description" => null,
					"url" => null,
					"date" => null,
					"type" => "web",
					"thumb" => [
						"url" => null,
						"ratio" => null
					],
					"sublink" => [],
					"table" => []
				];
				
				$this->fuckhtml->load($result);
				
				$title =
					$this->fuckhtml
					->getElementsByClassName("title", "a")[0];
				
				$data["title"] =
					html_entity_decode(
						$this->fuckhtml
						->getTextContent(
							$title["innerHTML"]
						)
					);
				
				$data["url"] =
					html_entity_decode(
						$this->fuckhtml
						->getTextContent(
							$title["attributes"]["href"]
						)
					);
				
				$description =
					$this->fuckhtml
					->getElementsByClassName(
						"s", "p"
					);
				
				if(count($description) !== 0){
					
					$data["description"] =
						$this->titledots(
							html_entity_decode(
								$this->fuckhtml
								->getTextContent(
									$description[0]
								)
							)
						);
				}
				
				$date =
					$this->fuckhtml
					->getElementsByClassName(
						"mdate",
						"span"
					);
				
				if(count($date) !== 0){
										
					$data["date"] =
						strtotime(
							$this->fuckhtml
							->getTextContent(
								$date[0]
							)
						);
				}
				
				$out["web"][] = $data;
			}
		}
		
		/*
			Get instant answers
		*/
		$this->fuckhtml->load($html);
		
		$infoboxes =
			$this->fuckhtml
			->getElementsByClassName(
				"infobox infobox-top",
				"div"
			);
		
		foreach($infoboxes as $infobox){
			
			$answer = [
				"title" => null,
				"description" => [],
				"url" => null,
				"thumb" => null,
				"table" => [],
				"sublink" => []
			];
			
			// load first part with title + short definition
			$infobox_html =
				explode(
					"<hr>",
					$infobox["innerHTML"]
				);
			
			$this->fuckhtml->load($infobox_html[0]);
			
			// title
			$answer["title"] =
				$this->fuckhtml
				->getTextContent(
					$this->fuckhtml
					->getElementsByTagName("h1")[0]
				);
			
			// short definition
			$definition =
				$this->fuckhtml
				->getElementsByTagName(
					"p"
				);
			
			if(count($definition) !== 0){
				
				$answer["description"][] = [
					"type" => "quote",
					"value" =>
						$this->fuckhtml
						->getTextContent(
							$definition[0]
						)
				];
			}

			// get thumbnail, if it exists
			$this->fuckhtml->load($infobox_html[1]);
			
			$thumb =
				$this->fuckhtml
				->getElementsByClassName("float-right", "img");
			
			if(count($thumb) !== 0){
				
				preg_match(
					'/\/image\?img=([^&]+)/i',
					$thumb[0]["attributes"]["src"],
					$thumb
				);
				
				if(count($thumb) === 2){
					
					$answer["thumb"] =
						urldecode(
							$this->fuckhtml
							->getTextContent(
								$thumb[1]
							)
						);
				}
			}
			
			// get description
			$ps =
				$this->fuckhtml
				->getElementsByTagName("p");
			
			$first_tag = true;
			foreach($ps as $p){
				
				$this->fuckhtml->load($p);
				
				if(
					preg_match(
						'/^\s*<strong>/i',
						$p["innerHTML"]
					)
				){
					
					/*
						Parse table
					*/
					
					$strong =
						$this->fuckhtml
						->getElementsByTagName("strong")[0];
					
					$p["innerHTML"] =
						str_replace($strong["innerHTML"], "", $p["innerHTML"]);
					
					$strong =
						preg_replace(
							'/:$/',
							"",
							ucfirst(
								$this->fuckhtml
								->getTextContent(
									$strong
								)
							)
						);
					
					$answer["table"][trim($strong)] =
						trim(
							$this->fuckhtml
							->getTextContent(
								$p
							)
						);
					
					continue;
				}
				
				$as =
					$this->fuckhtml
					->getElementsByClassName("svg-icon");
				
				if(count($as) !== 0){
					
					/*
						Parse websites
					*/
					foreach($as as $a){
						
						$answer["sublink"][
							ucfirst(explode(" ", $a["attributes"]["class"], 2)[1])
						] =
							$this->fuckhtml
							->getTextContent(
								$a["attributes"]["href"]
							);
					}
					
					continue; 
				}
				
				/*
					Parse text content
				*/
				$tags =
					$this->fuckhtml
					->getElementsByTagName("*");
				
				$i = 0;
				foreach($tags as $tag){
					
					$c = count($answer["description"]);
					
					// remove tag from innerHTML
					$p["innerHTML"] =
						explode($tag["outerHTML"], $p["innerHTML"], 2);
					
					if(count($p["innerHTML"]) === 2){
						
						if(
							$i === 0 &&
							$c !== 0 &&
							$answer["description"][$c - 1]["type"] == "link"
						){
							
							$append = "\n\n";
						}else{
							
							$append = "";
						}
						
						if($p["innerHTML"][0] != ""){
							$answer["description"][] = [
								"type" => "text",
								"value" => $append . trim($p["innerHTML"][0])
							];
						}
						
						$p["innerHTML"] = $p["innerHTML"][1];
					}else{
						
						$p["innerHTML"] = $p["innerHTML"][0];
					}
					
					switch($tag["tagName"]){
						
						case "a":
							
							$value =
								$this->fuckhtml
								->getTextContent(
									$tag
								);
							
							if(strtolower($value) == "wikipedia"){
								
								if($c !== 0){
									$answer["description"][$c - 1]["value"] =
										rtrim($answer["description"][$c - 1]["value"]);
								}
								break;
							}
							
							$answer["description"][] = [
								"type" => "link",
								"url" =>
									$this->fuckhtml
									->getTextContent(
										$tag["attributes"]["href"]
									),
								"value" =>
									$this->fuckhtml
									->getTextContent(
										$tag
									)
							];
							break;
					}
					
					$i++;
				}
			}
			
			// get URL
			$this->fuckhtml->load($infobox_html[2]);
			
			$answer["url"] =
				$this->fuckhtml
				->getTextContent(
					$this->fuckhtml
					->getElementsByTagName(
						"a"
					)[0]
					["attributes"]
					["href"]
				);
			
			// append answer
			$out["answer"][] = $answer;
		}
		
		/*
			Get news
		*/
		$this->fuckhtml->load($html);
		
		$news =
			$this->fuckhtml
			->getElementsByClassName(
				"results news-results",
				"div"
			);
		
		if(count($news) !== 0){
			
			$this->fuckhtml->load($news[0]);
			
			$lis =
				$this->fuckhtml
				->getElementsByTagName("li");
			
			foreach($lis as $li){
				
				$this->fuckhtml->load($li);
				
				$a =
					$this->fuckhtml
					->getElementsByClassName(
						"ob",
						"a"
					);
				
				if(count($a) === 0){
					
					continue;
				}
				
				$a = $a[0];
				
				$date =
					explode(
						" - ",
						$this->fuckhtml
						->getTextContent(
							$this->fuckhtml
							->getElementsByTagName(
								"span"
							)[0]
						)
					);
				
				$date =
					strtotime(
						$date[count($date) - 1]
					);
				
				$out["news"][] = [
					"title" =>
						html_entity_decode(
							$this->fuckhtml
							->getTextContent(
								$a
							)
						),
					"description" => null,
					"date" => $date,
					"thumb" => [
						"url" => null,
						"ratio" => null
					],
					"url" =>
						$this->fuckhtml
						->getTextContent(
							$a["attributes"]["href"]
						)
				];
			}
		}
		
		/*
			Get next page
		*/
		$this->fuckhtml->load($html);
		
		$pagination =
			$this->fuckhtml
			->getElementsByClassName("pagination");
		
		if(count($pagination) !== false){
			
			$this->fuckhtml->load($pagination[0]);
			$as =
				$this->fuckhtml
				->getElementsByTagName("a");
			
			foreach($as as $a){
				
				if($a["innerHTML"] == "Next"){
					
					$out["npt"] = $this->backend->store(
						$this->fuckhtml
						->getTextContent(
							$a["attributes"]["href"]
						),
						"web",
						$proxy
					);
				}
			}
		}
		
		return $out;
	}
	
	public function news($get){
		
		$search = $get["s"];
		
		if(strlen($search) === 0){
			
			throw new Exception("Search term is empty!");
		}
		
		$out = [
			"status" => "ok",
			"npt" => null,
			"news" => []
		];
		
		try{
			$html =
				$this->get(
					$this->backend->get_ip(),
					"https://www.mojeek.com/search",
					[
						"q" => $search,
						"fmt" => "news"
					]
				);
		}catch(Exception $error){
			
			throw new Exception("Failed to get HTML");
		}
		/*
		$handle = fopen("scraper/mojeek.html", "r");
		$html = fread($handle, filesize("scraper/mojeek.html"));
		fclose($handle);
		*/
		
		$this->fuckhtml->load($html);
		
		$articles =
			$this->fuckhtml->getElementsByTagName("article");
		
		foreach($articles as $article){
			
			$this->fuckhtml->load($article);
			
			$data = [
				"title" => null,
				"author" => null,
				"description" => null,
				"date" => null,
				"thumb" =>
					[
						"url" => null,
						"ratio" => null
					],
				"url" => null
			];
			
			$a = $this->fuckhtml->getElementsByTagName("a")[0];
			
			$data["title"] =
				$this->fuckhtml
				->getTextContent(
					$a["attributes"]["title"]
				);
			
			$data["url"] =
				$this->fuckhtml
				->getTextContent(
					$a["attributes"]["href"]
				);
			
			$p = $this->fuckhtml->getElementsByTagName("p");
			
			$data["description"] =
				$this->titledots(
					$this->fuckhtml
					->getTextContent(
						$this->fuckhtml
						->getElementsByClassName(
							"s",
							$p
						)[0]
					)
				);
			
			if($data["description"] == ""){
				
				$data["description"] = null;
			}
			
			// get date from big node
			$date =
				$this->fuckhtml
				->getElementsByClassName(
					"date",
					$p
				);
			
			if(count($date) !== 0){
				
				$data["date"] =
					strtotime(
						$this->fuckhtml
						->getTextContent(
							$date[0]
						)
					);
			}
			
			// grep date + author
			$s =
				$this->fuckhtml
				->getElementsByClassName(
					"i",
					$p
				)[0];
			
			$this->fuckhtml->load($s);
			
			$a =
				$this->fuckhtml
				->getElementsByTagName("a");
			
			if(count($a) !== 0){
				
				// parse big node information
				$data["author"] =
					htmlspecialchars_decode(
						$this->fuckhtml
						->getTextContent(
							$a[0]["innerHTML"]
						)
					);
			}else{
				
				// parse smaller nodes
				$replace =
					$this->fuckhtml
					->getElementsByTagName("time")[0];
				
				$data["date"] =
					strtotime(
						$this->fuckhtml
						->getTextContent(
							$replace
						)
					);
				
				$s["innerHTML"] =
					str_replace(
						$replace["outerHTML"],
						"",
						$s["innerHTML"]
					);
				
				$data["author"] =
					preg_replace(
						'/ &bull; $/',
						"",
						$s["innerHTML"]
					);
			}
			
			$out["news"][] = $data;
		}
		
		return $out;
	}
	
	private function titledots($title){
		
		return trim($title, ". \t\n\r\0\x0B");
	}
}