backend = new backend("qwant"); } public function getfilters($page){ $base = [ "nsfw" => [ "display" => "NSFW", "option" => [ "yes" => "Yes", "maybe" => "Maybe", "no" => "No" ] ], "country" => [ "display" => "Country", "option" => [ "en_US" => "United States", "fr_FR" => "France", "en_GB" => "Great Britain", "de_DE" => "Germany", "it_IT" => "Italy", "es_AR" => "Argentina", "en_AU" => "Australia", "es_ES" => "Spain (es)", "ca_ES" => "Spain (ca)", "cs_CZ" => "Czech Republic", "ro_RO" => "Romania", "el_GR" => "Greece", "zh_CN" => "China", "zh_HK" => "Hong Kong", "en_NZ" => "New Zealand", "fr_FR" => "France", "th_TH" => "Thailand", "ko_KR" => "South Korea", "sv_SE" => "Sweden", "nb_NO" => "Norway", "da_DK" => "Denmark", "hu_HU" => "Hungary", "et_EE" => "Estonia", "es_MX" => "Mexico", "es_CL" => "Chile", "en_CA" => "Canada (en)", "fr_CA" => "Canada (fr)", "en_MY" => "Malaysia", "bg_BG" => "Bulgaria", "fi_FI" => "Finland", "pl_PL" => "Poland", "nl_NL" => "Netherlands", "pt_PT" => "Portugal", "de_CH" => "Switzerland (de)", "fr_CH" => "Switzerland (fr)", "it_CH" => "Switzerland (it)", "de_AT" => "Austria", "fr_BE" => "Belgium (fr)", "nl_BE" => "Belgium (nl)", "en_IE" => "Ireland", "he_IL" => "Israel" ] ] ]; switch($page){ case "web": $base = array_merge( $base, [ "time" => [ "display" => "Time posted", "option" => [ "any" => "Any time", "day" => "Past 24 hours", "week" => "Past week", "month" => "Past month" ] ], "extendedsearch" => [ // no display, wont show in interface "option" => [ "yes" => "Yes", "no" => "No" ] ] ] ); break; case "images": $base = array_merge( $base, [ "time" => [ "display" => "Time posted", "option" => [ "any" => "Any time", "day" => "Past 24 hours", "week" => "Past week", "month" => "Past month" ] ], "size" => [ "display" => "Size", "option" => [ "any" => "Any size", "large" => "Large", "medium" => "Medium", "small" => "Small" ] ], "color" => [ "display" => "Color", "option" => [ "any" => "Any color", "coloronly" => "Color only", "monochrome" => "Monochrome", "black" => "Black", "brown" => "Brown", "gray" => "Gray", "white" => "White", "yellow" => "Yellow", "orange" => "Orange", "red" => "Red", "pink" => "Pink", "purple" => "Purple", "blue" => "Blue", "teal" => "Teal", "green" => "Green" ] ], "imagetype" => [ "display" => "Type", "option" => [ "any" => "Any type", "animatedgif" => "Animated GIF", "photo" => "Photograph", "transparent" => "Transparent" ] ], "license" => [ "display" => "License", "option" => [ "any" => "Any license", "share" => "Non-commercial reproduction and sharing", "sharecommercially" => "Reproduction and sharing", "modify" => "Non-commercial reproduction, sharing and modification", "modifycommercially" => "Reproduction, sharing and modification", "public" => "Public domain" ] ] ] ); break; case "videos": $base = array_merge( $base, [ "order" => [ "display" => "Order by", "option" => [ "relevance" => "Relevance", "views" => "Views", "date" => "Most recent", ] ], "source" => [ "display" => "Source", "option" => [ "any" => "Any source", "youtube" => "YouTube", "dailymotion" => "Dailymotion", ] ] ] ); break; case "news": $base = array_merge( $base, [ "time" => [ "display" => "Time posted", "option" => [ "any" => "Any time", "hour" => "Less than 1 hour ago", "day" => "Past 24 hours", "week" => "Past week", "month" => "Past month" ] ], "order" => [ "display" => "Order by", "option" => [ "relevance" => "Relevance", "date" => "Most recent" ] ] ] ); break; } return $base; } private function get($proxy, $url, $get = []){ $headers = [ "User-Agent: " . config::USER_AGENT, "Accept: application/json, text/plain, */*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip", "DNT: 1", "Connection: keep-alive", "Origin: https://www.qwant.com", "Referer: https://www.qwant.com/", "Sec-Fetch-Dest: empty", "Sec-Fetch-Mode: cors", "Sec-Fetch-Site: same-site", "TE: trailers" ]; $curlproc = curl_init(); if($get !== []){ $get = http_build_query($get); $url .= "?" . $get; } curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers); // Bypass HTTP/2 check curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); $this->backend->assign_proxy($curlproc, $proxy); $data = curl_exec($curlproc); if(curl_errno($curlproc)){ throw new Exception(curl_error($curlproc)); } curl_close($curlproc); return $data; } public function web($get){ if($get["npt"]){ // get next page data [$params, $proxy] = $this->backend->get($get["npt"], "web"); $params = json_decode($params, true); }else{ // get _GET data instead $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } if(strlen($search) > 2048){ throw new Exception("Search term is too long!"); } $proxy = $this->backend->get_ip(); $params = [ "q" => $search, "freshness" => $get["time"], "count" => 10, "locale" => $get["country"], "offset" => 0, "device" => "desktop", "tgp" => 3, "safesearch" => 0, "displayed" => "true" ]; switch($get["nsfw"]){ case "yes": $params["safesearch"] = 0; break; case "maybe": $params["safesearch"] = 1; break; case "no": $params["safesearch"] = 2; break; } } /* $handle = fopen("scraper/qwant_web.json", "r"); $json = fread($handle, filesize("scraper/qwant_web.json")); fclose($handle);*/ try{ $json = $this->get( $proxy, "https://fdn.qwant.com/v3/search/web", $params ); }catch(Exception $error){ throw new Exception("Could not fetch JSON"); } $json = json_decode($json, true); if($json === NULL){ throw new Exception("Failed to decode JSON"); } if(isset($json["data"]["message"][0])){ throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]); } $out = [ "status" => "ok", "spelling" => [ "type" => "no_correction", "using" => null, "correction" => null ], "npt" => null, "answer" => [], "web" => [], "image" => [], "video" => [], "news" => [], "related" => [] ]; if( $json["status"] != "success" && $json["data"]["error_code"] === 5 ){ // no results return $out; } $this->detect_errors($json); if(!isset($json["data"]["result"]["items"]["mainline"])){ throw new Exception("Server did not return a result object"); } // data is OK, parse // get instant answer if( $get["extendedsearch"] == "yes" && isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"]) ){ try{ $answer = $this->get( $proxy, "https://api.qwant.com/v3" . $json["data"]["result"]["items"]["sidebar"][0]["endpoint"], [] ); $answer = json_decode($answer, true); if( $answer === null || $answer["status"] != "success" || $answer["data"]["result"] === null ){ throw new Exception(); } // parse answer $out["answer"][] = [ "title" => $answer["data"]["result"]["title"], "description" => [ [ "type" => "text", "value" => $this->trimdots($answer["data"]["result"]["description"]) ] ], "url" => $answer["data"]["result"]["url"], "thumb" => $answer["data"]["result"]["thumbnail"]["landscape"] == null ? null : $this->unshitimage( $answer["data"]["result"]["thumbnail"]["landscape"], false ), "table" => [], "sublink" => [] ]; }catch(Exception $error){ // do nothing in case of failure } } // get word correction if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){ $out["spelling"] = [ "type" => "including", "using" => $json["data"]["query"]["queryContext"]["alteredQuery"], "correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"] ]; } // check for next page if($json["data"]["result"]["lastPage"] === false){ $params["offset"] = $params["offset"] + 10; $out["npt"] = $this->backend->store( json_encode($params), "web", $proxy ); } // parse results foreach($json["data"]["result"]["items"]["mainline"] as $item){ switch($item["type"]){ // ignores ads case "web": $first_iteration = true; foreach($item["items"] as $result){ if(isset($result["thumbnailUrl"])){ $thumb = [ "url" => $this->unshitimage($result["thumbnailUrl"]), "ratio" => "16:9" ]; }else{ $thumb = [ "url" => null, "ratio" => null ]; } $sublinks = []; if(isset($result["links"])){ foreach($result["links"] as $link){ $sublinks[] = [ "title" => $this->trimdots($link["title"]), "date" => null, "description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null, "url" => $link["url"] ]; } } // detect gibberish results if( $first_iteration && !isset($result["urlPingSuffix"]) ){ throw new Exception("Qwant returned gibberish results"); } $out["web"][] = [ "title" => $this->trimdots($result["title"]), "description" => $this->trimdots($result["desc"]), "url" => $result["url"], "date" => null, "type" => "web", "thumb" => $thumb, "sublink" => $sublinks, "table" => [] ]; $first_iteration = false; } break; case "images": foreach($item["items"] as $image){ $out["image"][] = [ "title" => $image["title"], "source" => [ [ "url" => $image["media"], "width" => (int)$image["width"], "height" => (int)$image["height"] ], [ "url" => $this->unshitimage($image["thumbnail"]), "width" => $image["thumb_width"], "height" => $image["thumb_height"] ] ], "url" => $image["url"] ]; } break; case "videos": foreach($item["items"] as $video){ $out["video"][] = [ "title" => $video["title"], "description" => null, "date" => (int)$video["date"], "duration" => $video["duration"] === null ? null : $video["duration"] / 1000, "views" => null, "thumb" => $video["thumbnail"] === null ? [ "url" => null, "ratio" => null, ] : [ "url" => $this->unshitimage($video["thumbnail"]), "ratio" => "16:9", ], "url" => $video["url"] ]; } break; case "related_searches": foreach($item["items"] as $related){ $out["related"][] = $related["text"]; } break; } } return $out; } public function image($get){ if($get["npt"]){ [$params, $proxy] = $this->backend->get( $get["npt"], "images" ); $params = json_decode($params, true); }else{ $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } $proxy = $this->backend->get_ip(); $params = [ "t" => "images", "q" => $search, "count" => 125, "locale" => $get["country"], "offset" => 0, // increment by 125 "device" => "desktop", "tgp" => 3 ]; if($get["time"] != "any"){ $params["freshness"] = $get["time"]; } foreach(["size", "color", "imagetype", "license"] as $p){ if($get[$p] != "any"){ $params[$p] = $get[$p]; } } switch($get["nsfw"]){ case "yes": $params["safesearch"] = 0; break; case "maybe": $params["safesearch"] = 1; break; case "no": $params["safesearch"] = 2; break; } } try{ $json = $this->get( $proxy, "https://api.qwant.com/v3/search/images", $params, ); }catch(Exception $err){ throw new Exception("Failed to get JSON"); } /* $handle = fopen("scraper/yandex.json", "r"); $json = fread($handle, filesize("scraper/yandex.json")); fclose($handle);*/ $json = json_decode($json, true); if($json === null){ throw new Exception("Failed to decode JSON"); } $this->detect_errors($json); if(isset($json["data"]["result"]["items"]["mainline"])){ throw new Exception("Qwant returned gibberish results"); } $out = [ "status" => "ok", "npt" => null, "image" => [] ]; if($json["data"]["result"]["lastPage"] === false){ $params["offset"] = $params["offset"] + 125; $out["npt"] = $this->backend->store( json_encode($params), "images", $proxy ); } foreach($json["data"]["result"]["items"] as $image){ $out["image"][] = [ "title" => $this->trimdots($image["title"]), "source" => [ [ "url" => $image["media"], "width" => $image["width"], "height" => $image["height"] ], [ "url" => $this->unshitimage($image["thumbnail"]), "width" => $image["thumb_width"], "height" => $image["thumb_height"] ] ], "url" => $image["url"] ]; } return $out; } public function video($get){ $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } $params = [ "t" => "videos", "q" => $search, "count" => 50, "locale" => $get["country"], "offset" => 0, // dont implement pagination "device" => "desktop", "tgp" => 3 ]; switch($get["nsfw"]){ case "yes": $params["safesearch"] = 0; break; case "maybe": $params["safesearch"] = 1; break; case "no": $params["safesearch"] = 2; break; } try{ $json = $this->get( $this->backend->get_ip(), "https://api.qwant.com/v3/search/videos", $params ); }catch(Exception $error){ throw new Exception("Could not fetch JSON"); } /* $handle = fopen("scraper/yandex-video.json", "r"); $json = fread($handle, filesize("scraper/yandex-video.json")); fclose($handle); */ $json = json_decode($json, true); if($json === null){ throw new Exception("Could not parse JSON"); } $this->detect_errors($json); if(isset($json["data"]["result"]["items"]["mainline"])){ throw new Exception("Qwant returned gibberish results"); } $out = [ "status" => "ok", "npt" => null, "video" => [], "author" => [], "livestream" => [], "playlist" => [], "reel" => [] ]; foreach($json["data"]["result"]["items"] as $video){ if(empty($video["thumbnail"])){ $thumb = [ "url" => null, "ratio" => null ]; }else{ $thumb = [ "url" => $this->unshitimage($video["thumbnail"], false), "ratio" => "16:9" ]; } $duration = (int)$video["duration"]; $out["video"][] = [ "title" => $video["title"], "description" => $this->limitstrlen($video["desc"]), "author" => [ "name" => $video["channel"], "url" => null, "avatar" => null ], "date" => (int)$video["date"], "duration" => $duration === 0 ? null : $duration, "views" => null, "thumb" => $thumb, "url" => preg_replace("/\?syndication=.+/", "", $video["url"]) ]; } return $out; } public function news($get){ $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } $params = [ "t" => "news", "q" => $search, "count" => 50, "locale" => $get["country"], "offset" => 0, // dont implement pagination "device" => "desktop", "tgp" => 3 ]; switch($get["nsfw"]){ case "yes": $params["safesearch"] = 0; break; case "maybe": $params["safesearch"] = 1; break; case "no": $params["safesearch"] = 2; break; } try{ $json = $this->get( $this->backend->get_ip(), "https://api.qwant.com/v3/search/news", $params ); }catch(Exception $error){ throw new Exception("Could not fetch JSON"); } /* $handle = fopen("scraper/yandex-video.json", "r"); $json = fread($handle, filesize("scraper/yandex-video.json")); fclose($handle); */ $json = json_decode($json, true); if($json === null){ throw new Exception("Could not parse JSON"); } $this->detect_errors($json); if(isset($json["data"]["result"]["items"]["mainline"])){ throw new Exception("Qwant returned gibberish results"); } $out = [ "status" => "ok", "npt" => null, "news" => [] ]; foreach($json["data"]["result"]["items"] as $news){ if(empty($news["media"][0]["pict_big"]["url"])){ $thumb = [ "url" => null, "ratio" => null ]; }else{ $thumb = [ "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false), "ratio" => "16:9" ]; } $out["news"][] = [ "title" => $news["title"], "author" => $news["press_name"], "description" => $this->trimdots($news["desc"]), "date" => (int)$news["date"], "thumb" => $thumb, "url" => $news["url"] ]; } return $out; } private function detect_errors($json){ if( isset($json["status"]) && $json["status"] == "error" ){ if(isset($json["data"]["error_data"]["captchaUrl"])){ throw new Exception("Qwant returned a captcha"); }elseif(isset($json["data"]["error_data"]["error_code"])){ throw new Exception( "Qwant returned an API error: " . $json["data"]["error_data"]["error_code"] ); } throw new Exception("Qwant returned an API error"); } } private function limitstrlen($text){ return explode("\n", wordwrap($text, 300, "\n"))[0]; } private function trimdots($text){ return trim($text, ". "); } private function unshitimage($url, $is_bing = true){ // https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0 parse_str(parse_url($url)["query"], $parts); if($is_bing){ $parse = parse_url($parts["u"]); parse_str($parse["query"], $parts); return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]); } return $parts["u"]; } }