bypasscaptcha($html, "yes", "ca");*/ class brave{ public function __construct(){ include "lib/fuckhtml.php"; $this->fuckhtml = new fuckhtml(); include "lib/nextpage.php"; $this->nextpage = new nextpage("brave"); } public function getfilters($page){ switch($page){ case "web": return [ "country" => [ "display" => "Country", "option" => [ "all" => "All Regions", "ar" => "Argentina", "au" => "Australia", "at" => "Austria", "be" => "Belgium", "br" => "Brazil", "ca" => "Canada", "cl" => "Chile", "cn" => "China", "dk" => "Denmark", "fi" => "Finland", "fr" => "France", "de" => "Germany", "hk" => "Hong Kong", "in" => "India", "id" => "Indonesia", "it" => "Italy", "jp" => "Japan", "kr" => "Korea", "my" => "Malaysia", "mx" => "Mexico", "nl" => "Netherlands", "nz" => "New Zealand", "no" => "Norway", "pl" => "Poland", "pt" => "Portugal", "ph" => "Philippines", "ru" => "Russia", "sa" => "Saudi Arabia", "za" => "South Africa", "es" => "Spain", "se" => "Sweden", "ch" => "Switzerland", "tw" => "Taiwan", "tr" => "Turkey", "gb" => "United Kingdom", "us" => "United States" ] ], "nsfw" => [ "display" => "NSFW", "option" => [ "yes" => "Yes", "maybe" => "Maybe", "no" => "No" ] ], "newer" => [ "display" => "Newer than", "option" => "_DATE" ], "older" => [ "display" => "Older than", "option" => "_DATE" ] ]; break; case "news": return [ "country" => [ "display" => "Country", "option" => [ "all" => "All regions", "ar" => "Argentina", "au" => "Australia", "at" => "Austria", "be" => "Belgium", "br" => "Brazil", "ca" => "Canada", "cl" => "Chile", "cn" => "China", "dk" => "Denmark", "fi" => "Finland", "fr" => "France", "de" => "Germany", "hk" => "Hong Kong", "in" => "India", "id" => "Indonesia", "it" => "Italy", "jp" => "Japan", "kr" => "Korea", "my" => "Malaysia", "mx" => "Mexico", "nl" => "Netherlands", "nz" => "New Zealand", "no" => "Norway", "pl" => "Poland", "pt" => "Portugal", "ph" => "Philippines", "ru" => "Russia", "sa" => "Saudi Arabia", "za" => "South Africa", "es" => "Spain", "se" => "Sweden", "ch" => "Switzerland", "tw" => "Taiwan", "tr" => "Turkey", "gb" => "United Kingdom", "us" => "United States" ] ], "nsfw" => [ "display" => "NSFW", "option" => [ "yes" => "Yes", "maybe" => "Maybe", "no" => "No" ] ] ]; break; } } private function get($url, $get = [], $nsfw, $country/*, $is_post = false, $additional_cookies = null*/){ switch($nsfw){ case "yes": $nsfw = "off"; break; case "maybe": $nsfw = "moderate"; break; case "no": $nsfw = "strict"; break; } //$cookie = "safesearch={$nsfw}; country={$country}; useLocation=0"; /* if($additional_cookies !== null){ $cookie = $additional_cookies . "; " . $cookie; }*/ $headers = [ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip", "Cookie: safesearch={$nsfw}; country={$country}; useLocation=0; summarizer=0", "DNT: 1", "Connection: keep-alive", "Upgrade-Insecure-Requests: 1", "Sec-Fetch-Dest: document", "Sec-Fetch-Mode: navigate", "Sec-Fetch-Site: none", "Sec-Fetch-User: ?1"//, //"Content-Type: application/json" ]; if($country == "any"){ $country = "all"; } $curlproc = curl_init(); /*if($is_post){ curl_setopt($curlproc, CURLOPT_POST, true); curl_setopt( $curlproc, CURLOPT_POSTFIELDS, json_encode($get) ); }else{ */ if($get !== []){ $get = http_build_query($get); $url .= "?" . $get; } //} curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers); curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); $data = curl_exec($curlproc); if(curl_errno($curlproc)){ throw new Exception(curl_error($curlproc)); } curl_close($curlproc); return $data; } public function web($get){ if($get["npt"]){ // get next page data $q = json_decode($this->nextpage->get($get["npt"], "web"), true); $search = $q["q"]; $q["spellcheck"] = 0; $nsfw = $q["nsfw"]; unset($q["nsfw"]); $country = $q["country"]; unset($q["country"]); }else{ // get _GET data instead $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } if(strlen($search) > 2048){ throw new Exception("Search query is too long!"); } $nsfw = $get["nsfw"]; $country = $get["country"]; $older = $get["older"]; $newer = $get["newer"]; $q = [ "q" => $search ]; /* Pass older/newer filters to brave */ if($newer !== false){ $newer = date("Y-m-d", $newer); if($older === false){ $older = date("Y-m-d", time()); } } if( is_string($older) === false && $older !== false ){ $older = date("Y-m-d", $older); if($newer === false){ $newer = "1970-01-02"; } } if($older !== false){ $q["tf"] = "{$newer}to{$older}"; } } /* $handle = fopen("scraper/brave.html", "r"); $html = fread($handle, filesize("scraper/brave.html")); fclose($handle); */ try{ $html = $this->get( "https://search.brave.com/search", $q, $nsfw, $country ); }catch(Exception $error){ throw new Exception("Could not fetch search page"); } $out = [ "status" => "ok", "spelling" => [ "type" => "no_correction", "using" => null, "correction" => null ], "npt" => null, "answer" => [], "web" => [], "image" => [], "video" => [], "news" => [], "related" => [] ]; // load html $this->fuckhtml->load($html); /* Get next page "token" */ $nextpage = $this->fuckhtml ->getElementsByClassName( "btn ml-15", "a" ); if(count($nextpage) !== 0){ preg_match( '/offset=([0-9]+)/', $this->fuckhtml->getTextContent($nextpage[0]["attributes"]["href"]), $nextpage ); $q["offset"] = (int)$nextpage[1]; $q["nsfw"] = $nsfw; $q["country"] = $country; $out["npt"] = $this->nextpage->store( json_encode($q), "web" ); } /* Get discussions (and append them to web results) */ // they're loaded using javascript!! $discussion = $this->fuckhtml ->getElementById( "js-discussions", "script" ); if( $discussion && isset($discussion["attributes"]["data"]) ){ $discussion = json_decode( $this->fuckhtml ->getTextContent( $discussion["attributes"]["data"] ), true ); foreach($discussion["results"] as $result){ $data = [ "title" => $this->titledots($result["title"]), "description" => null, "url" => $result["url"], "date" => null, "type" => "web", "thumb" => [ "url" => null, "ratio" => null ], "sublink" => [], "table" => [] ]; // description $data["description"] = $this->limitstrlen( $this->limitwhitespace( $this->titledots( $this->fuckhtml->getTextContent( $result["description"] ) ) ) ); if($result["age"] != ""){ $data["date"] = strtotime($result["age"]); } // populate table if($result["data"]["num_answers"] != ""){ $data["table"]["Replies"] = (int)$result["data"]["num_answers"]; } if($result["data"]["score"] != ""){ $score = explode("|", $result["data"]["score"]); if(count($score) === 2){ $score = ((int)$score[1]) . " (" . trim($score[0]) . ")"; }else{ $score = (int)$score[0]; } $data["table"]["Votes"] = $score; } if($result["thumbnail"] != ""){ $data["thumb"]["url"] = $result["thumbnail"]; $data["thumb"]["ratio"] = "16:9"; } $out["web"][] = $data; } } /* Get related searches */ $faq = $this->fuckhtml ->getElementById("js-faq", "script"); if( $faq && isset($faq["attributes"]["data"]) ){ $faq = json_decode( $this->fuckhtml ->getTextContent( $faq["attributes"]["data"] ), true ); foreach($faq["items"] as $related){ $out["related"][] = $related["question"]; } } /* Get spelling autocorrect */ $altered = $this->fuckhtml ->getElementById("altered-query", "div"); if($altered){ $this->fuckhtml->load($altered); $altered = $this->fuckhtml ->getElementsByTagName("a"); if(count($altered) === 2){ $out["spelling"] = [ "type" => "including", "using" => $this->fuckhtml ->getTextContent($altered[0]), "correction" => $this->fuckhtml ->getTextContent($altered[1]) ]; } $this->fuckhtml->load($html); } /* Get web results */ $resulthtml = $this->fuckhtml ->getElementById( "results", "div" ); $this->fuckhtml->load($resulthtml); $items = 0; foreach( $this->fuckhtml ->getElementsByClassName("snippet fdb") as $result ){ $data = [ "title" => null, "description" => null, "url" => null, "date" => null, "type" => "web", "thumb" => [ "url" => null, "ratio" => null ], "sublink" => [], "table" => [] ]; if( isset($result["attributes"]["data-type"]) && $result["attributes"]["data-type"] == "ad" ){ // is an ad, skip continue; } $this->fuckhtml->load($result); /* Get title */ $title = $this->fuckhtml ->getElementsByClassName( "snippet-title", "span" ); if(count($title) === 0){ // encountered AI summarizer // or misspelling indicator @TODO continue; } if(isset($title[0]["attributes"]["title"])){ $data["title"] = $this->titledots( $this->fuckhtml ->getTextContent( $title[0]["attributes"]["title"] ) ); }else{ $data["title"] = $this->titledots( $this->fuckhtml ->getTextContent( $title[0] ) ); } /* Get description */ $description = $this->fuckhtml ->getElementsByClassName( "snippet-description", "p" ); if(count($description) !== 0){ $data["description"] = $this->titledots( $this->fuckhtml ->getTextContent( $description[0] ) ); // also check for thumbnail in here $img = $this->fuckhtml ->getElementsByClassName( "thumb", "img" ); if(count($img) !== 0){ $data["thumb"] = [ "url" => $this->unshiturl($img[0]["attributes"]["src"]), "ratio" => "16:9" ]; }else{ // might be a video thumbnail wrapper? $wrapper = $this->fuckhtml ->getElementsByClassName( "video-thumb", "a" ); if(count($wrapper) !== 0){ // we found a video $this->fuckhtml->load($wrapper[0]); $img = $this->fuckhtml ->getElementsByTagName("img"); $data["thumb"] = [ "url" => $this->unshiturl($img[0]["attributes"]["src"]), "ratio" => "16:9" ]; // get the video length, if its there $duration = $this->fuckhtml ->getElementsByClassName( "duration", "div" ); if(count($duration) !== 0){ $data["table"]["Duration"] = $duration[0]["innerHTML"]; } // reset html load $this->fuckhtml->load($result); } } }else{ // is a steam/shop listing $description_alt = $this->fuckhtml ->getElementsByClassName( "text-sm", "div" ); if(count($description_alt) !== 0){ switch($description_alt[0]["attributes"]["class"]){ case "text-sm text-gray": case "description text-sm": $data["description"] = $this->titledots( $this->fuckhtml ->getTextContent( $description_alt[0] ) ); break; } // get table sublink $sublink = $this->fuckhtml ->getElementsByClassName( "r-attr text-sm", "div" ); if(count($sublink) !== 0){ $this->tablesublink($sublink, $data); } // check for thumb element $data["thumb"] = $this->getimagelinkfromstyle("thumb"); }else{ // ok... finally... // maybe its the instant answer thingy $answer = $this->fuckhtml ->getElementsByClassName("answer"); if(count($answer) !== 0){ $data["description"] = $this->titledots( $this->fuckhtml ->getTextContent($answer[0]) ); } } } // finally, fix brave's date format sucking balls $data["description"] = explode(" - ", $data["description"], 2); if(count($data["description"]) === 0){ // nothing to do $data["description"] = $data["description"][0]; }else{ // attempt to parse $time = strtotime($data["description"][0]); if($time !== false){ // got response $data["date"] = $time; array_shift($data["description"]); } // merge back $data["description"] = implode(" - ", $data["description"]); } /* Check content type */ $content_type = $this->fuckhtml ->getElementsByClassName( "content-type", "span" ); if(count($content_type) !== 0){ $data["type"] = strtolower($this->fuckhtml->getTextContent($content_type[0])); } /* Check subtext table thingy */ $table_items = array_merge( $this->fuckhtml ->getElementsByClassName( "item-attributes", "div" ), $this->fuckhtml ->getElementsByClassName( "r", "div" ) ); /* DIV: item-attributes */ if(count($table_items) !== 0){ foreach($table_items as $table){ $this->fuckhtml->load($table); $span = $this->fuckhtml ->getElementsByClassName( "text-sm", "*" ); foreach($span as $item){ $item = explode( ":", $this->fuckhtml->getTextContent(preg_replace('/\n/', " ", $item["innerHTML"])), 2 ); if(count($item) === 2){ $data["table"][trim($item[0])] = trim($this->limitwhitespace($item[1])); } } } $this->fuckhtml->load($result); } // get video sublinks $table_items = $this->fuckhtml ->getElementsByClassName( "snippet-description published-time", "p" ); if(count($table_items) !== 0){ $table_items = explode( '', $table_items[0]["innerHTML"], 2 ); if(count($table_items) === 2){ $item2 = []; $item2[] = explode(":", $this->fuckhtml->getTextContent($table_items[0])); if(trim($table_items[1]) != ""){ $item2[] = explode(":", $this->fuckhtml->getTextContent($table_items[1])); } foreach($item2 as $it){ $data["table"][trim($it[0])] = trim($it[1]); } } } /* Get URL */ $data["url"] = $this->fuckhtml->getTextContent( $this->fuckhtml ->getElementsByTagName("a") [0] ["attributes"] ["href"] ); /* Get sublinks */ $sublinks_elems = $this->fuckhtml ->getElementsByClassName( "snippet", "div" ); $sublinks = []; foreach($sublinks_elems as $sublink){ $this->fuckhtml->load($sublink); $a = $this->fuckhtml ->getElementsByTagName("a")[0]; $title = $this->fuckhtml ->getTextContent($a); $url = $a["attributes"]["href"]; $description = $this->titledots( $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByTagName("p")[0] ) ); $sublinks[] = [ "title" => $title, "date" => null, "description" => $description, "url" => $url ]; } /* Get smaller sublinks */ $sublinks_elems = $this->fuckhtml ->getElementsByClassName( "deep-link", "a" ); foreach($sublinks_elems as $sublink){ $sublinks[] = [ "title" => $this->fuckhtml->getTextContent($sublink), "date" => null, "description" => null, "url" => $sublink["attributes"]["href"] ]; } // append sublinks to $data !! $data["sublink"] = $sublinks; // append first result to start of $out["web"] // other results are after if($items === 0){ $out["web"] = [$data, ...$out["web"]]; }else{ $out["web"][] = $data; } $items++; } /* Get news */ $this->fuckhtml->load($resulthtml); $news_carousel = $this->fuckhtml->getElementById("news-carousel"); $this->fuckhtml->load($news_carousel); if($news_carousel){ $a = $this->fuckhtml ->getElementsByClassName( "card fdb", "a" ); foreach($a as $news){ $this->fuckhtml->load($news); $out["news"][] = [ "title" => $this->titledots( $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "title", "div" )[0] ) ), "description" => null, "date" => strtotime( $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "card-footer__timestamp", "span" )[0] ) ), "thumb" => $this->getimagelinkfromstyle("img-bg"), "url" => $this->fuckhtml->getTextContent($news["attributes"]["href"]) ]; } } /* Get videos */ $this->fuckhtml->load($resulthtml); $news_carousel = $this->fuckhtml->getElementById("video-carousel"); $this->fuckhtml->load($news_carousel); if($news_carousel){ $a = $this->fuckhtml ->getElementsByClassName( "card fdb", "a" ); foreach($a as $video){ $this->fuckhtml->load($video); $date = null; $date_o = $this->fuckhtml ->getElementsByClassName( "text-gray text-xs", "span" ); if(count($date_o) !== 0){ $date = strtotime( $this->fuckhtml ->getTextContent( $date_o[0] ) ); } $out["video"][] = [ "title" => $this->titledots( $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "title", "div" )[0] ) ), "description" => null, "date" => $date, "duration" => null, "views" => null, "thumb" => $this->getimagelinkfromstyle("img-bg"), "url" => $this->fuckhtml->getTextContent($video["attributes"]["href"]) ]; } } /* Get DEFINITION snippet */ $this->fuckhtml->load($html); $infobox = $this->fuckhtml->getElementById("rh-definitions", "div"); if($infobox !== false){ $answer = [ "title" => null, "description" => [], "url" => null, "thumb" => null, "table" => [], "sublink" => [] ]; $this->fuckhtml->load($infobox); $answer["title"] = $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "header", "h5" )[0] ); $sections = $this->fuckhtml ->getElementsByTagName("section"); $i = -1; foreach($sections as $section){ $this->fuckhtml->load($section); $items = $this->fuckhtml ->getElementsByTagName("*"); $li = 1; $pronounce = false; foreach($items as $item){ switch($item["tagName"]){ case "h6": if( isset($item["attributes"]["class"]) && $item["attributes"]["class"] == "h6 pronunciation" ){ if($pronounce){ break; } $answer["description"][] = [ "type" => "quote", "value" => $this->fuckhtml ->getTextContent( $item ) ]; $answer["description"][] = [ "type" => "audio", "url" => "https://search.brave.com/api/rhfetch?rhtype=definitions&word={$answer["title"]}&source=ahd-5" ]; $pronounce = true; $i = $i + 2; break; } $answer["description"][] = [ "type" => "title", "value" => $this->fuckhtml ->getTextContent( $item ) ]; $i++; break; case "li": if( $i !== -1 && $answer["description"][$i]["type"] == "text" ){ $answer["description"][$i]["value"] .= "\n" . $li . ". " . $this->fuckhtml ->getTextContent( $item ); }else{ $answer["description"][] = [ "type" => "text", "value" => $li . ". " . $this->fuckhtml ->getTextContent( $item ) ]; $i++; } $li++; break; case "a": $answer["url"] = $this->fuckhtml ->getTextContent( $item["attributes"]["href"] ); break; } } } $out["answer"][] = $answer; } /* Get instant answer */ $this->fuckhtml->load($html); $infobox = $this->fuckhtml->getElementById("infobox", "div"); if($infobox !== false){ $answer = [ "title" => null, "description" => [], "url" => null, "thumb" => null, "table" => [], "sublink" => [] ]; $this->fuckhtml->load($infobox); $div = $this->fuckhtml->getElementsByTagName("div"); /* Get title + url */ $title = $this->fuckhtml ->getElementsByClassName("infobox-title", "a"); if(count($title) !== 0){ $answer["title"] = $this->fuckhtml ->getTextContent( $title[0] ); $answer["url"] = $this->fuckhtml ->getTextContent( $title[0]["attributes"]["href"] ); } /* Get thumbnail */ $thumb = $this->getimagelinkfromstyle("thumb"); if($thumb["url"] !== null){ $answer["thumb"] = $thumb["url"]; } /* Get table */ $title = $this->fuckhtml ->getElementsByClassName( "infobox-attr-header", "div" ); $rowhtml = $infobox; if(count($title) >= 2){ $rowhtml = explode( $title[1]["outerHTML"], $infobox["innerHTML"], 2 )[0]; } $this->fuckhtml->load($rowhtml); $rows = $this->fuckhtml ->getElementsByClassName("infobox-attr", "div"); foreach($rows as $row){ if(!isset($row["innerHTML"])){ continue; } $this->fuckhtml->load($row); $span = $this->fuckhtml ->getElementsByTagName("span"); if(count($span) === 2){ $answer["table"][ $this->fuckhtml->getTextContent($span[0]) ] = str_replace("\n", ", ", $this->fuckhtml->getTextContent($span[1], true)); } } $this->fuckhtml->load($infobox); /* Parse stackoverflow answers */ $code = $this->fuckhtml ->getElementById("codebox-answer", $div); if($code){ // this might be standalone text with no paragraphs, check for that $author = $this->fuckhtml ->getElementById("author"); $desc_tmp = str_replace( $author["outerHTML"], "", $code["innerHTML"] ); $this->fuckhtml->load($desc_tmp); $code = $this->fuckhtml ->getElementsByTagName("*"); if(count($code) === 0){ $answer["description"] = [ [ "type" => "text", "value" => $this->fuckhtml ->getTextContent( $desc_tmp ) ], [ "type" => "quote", "value" => $this->fuckhtml ->getTextContent( $author ) ] ]; }else{ $text = []; $i = 0; foreach($code as $snippet){ switch($snippet["tagName"]){ case "p": $this->fuckhtml->load($snippet["innerHTML"]); $codetags = $this->fuckhtml ->getElementsByTagName("*"); $tmphtml = $snippet["innerHTML"]; foreach($codetags as $tag){ if(!isset($tag["outerHTML"])){ continue; } $tmphtml = explode( $tag["outerHTML"], $tmphtml, 2 ); $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false); $this->appendtext($value, $text, $i); $type = null; switch($tag["tagName"]){ case "code": $type = "inline_code"; break; case "em": $type = "italic"; break; case "blockquote": $type = "quote"; break; default: $type = "text"; } if($type !== null){ $value = $this->fuckhtml->getTextContent($tag, false, true); if(trim($value) != ""){ if( $i !== 0 && $type == "title" ){ $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); } $text[] = [ "type" => $type, "value" => $value ]; $i++; } } if(count($tmphtml) === 2){ $tmphtml = $tmphtml[1]; }else{ break; } } if(is_array($tmphtml)){ $tmphtml = $tmphtml[0]; } if(strlen($tmphtml) !== 0){ $value = $this->fuckhtml->getTextContent($tmphtml, false, false); $this->appendtext($value, $text, $i); } break; case "pre": switch($text[$i - 1]["type"]){ case "text": case "italic": $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); break; } $text[] = [ "type" => "code", "value" => rtrim( $this->fuckhtml ->getTextContent( $snippet, true, false ) ) ]; $i++; break; case "ol": $o = 0; $this->fuckhtml->load($snippet); $li = $this->fuckhtml ->getElementsByTagName("li"); foreach($li as $elem){ $o++; $this->appendtext( $o . ". " . $this->fuckhtml ->getTextContent( $elem ), $text, $i ); } break; } } if( $i !== 0 && $text[$i - 1]["type"] == "text" ){ $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]); } if($author){ $text[] = [ "type" => "quote", "value" => $this->fuckhtml->getTextContent($author) ]; } $answer["description"] = $text; } }else{ /* Get normal description */ $description = $this->fuckhtml ->getElementsByClassName( "mb-6", "div" ); if(count($description) !== 0){ $description = [ [ "type" => "text", "value" => $this->titledots( preg_replace( '/ Wikipedia$/', "", $this->fuckhtml ->getTextContent( $description[0] ) ) ) ] ]; $ratings = $this->fuckhtml ->getElementById("ratings"); if($ratings){ $this->fuckhtml->load($ratings); $ratings = $this->fuckhtml ->getElementsByClassName( "flex-hcenter mb-10", "div" ); $description[] = [ "type" => "title", "value" => "Ratings" ]; foreach($ratings as $rating){ $this->fuckhtml->load($rating); $num = $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "r-num", "div" )[0] ); $href = $this->fuckhtml ->getElementsByClassName( "mr-10", "a" )[0]; $votes = $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "text-sm", "span" )[0] ); $c = count($description) - 1; if( $c !== -1 && $description[$c]["type"] == "text" ){ $description[$c]["value"] .= $num . " "; }else{ $description[] = [ "type" => "text", "value" => $num . " " ]; } $description[] = [ "type" => "link", "value" => $this->fuckhtml->getTextContent($href), "url" => $this->fuckhtml->getTextContent($href["attributes"]["href"]) ]; $description[] = [ "type" => "text", "value" => " (" . $votes . ")\n" ]; } } $answer["description"] = $description; } } /* Get sublinks */ $this->fuckhtml->load($infobox); $profiles = $this->fuckhtml ->getElementById("profiles"); if($profiles){ $profiles = $this->fuckhtml ->getElementsByClassName( "chip", "a" ); foreach($profiles as $profile){ $name = $this->fuckhtml->getTextContent($profile["attributes"]["title"]); if(strtolower($name) == "steampowered"){ $name = "Steam"; } $answer["sublink"][$name] = $this->fuckhtml->getTextContent($profile["attributes"]["href"]); } } $actors = $this->fuckhtml ->getElementById("panel-movie-cast"); if($actors){ $this->fuckhtml->load($actors); $actors = $this->fuckhtml ->getElementsByClassName("card"); $answer["description"][] = [ "type" => "title", "value" => "Cast" ]; foreach($actors as $actor){ $this->fuckhtml->load($actor); $answer["description"][] = [ "type" => "text", "value" => $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName("card-body") [0] ) ]; $answer["description"][] = [ "type" => "image", "url" => $this->getimagelinkfromstyle("person-thumb")["url"] ]; } } $out["answer"][] = $answer; } /* Get actor standalone thingy */ $this->fuckhtml->load($resulthtml); $actors = $this->fuckhtml ->getElementById("predicate-entity"); if($actors){ $this->fuckhtml->load($actors); $cards = $this->fuckhtml ->getElementsByClassName("card"); $url = $this->fuckhtml ->getElementsByClassName( "disclaimer", "div" )[0]; $this->fuckhtml->load($url); $url = $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByTagName("a") [0] ["attributes"] ["href"] ); $this->fuckhtml->load($actors); $answer = [ "title" => $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "entity", "span" )[0] ) . " (Cast)", "description" => [], "url" => $url, "sublink" => [], "thumb" => null, "table" => [] ]; foreach($cards as $card){ $this->fuckhtml->load($card); $answer["description"][] = [ "type" => "title", "value" => $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "title" )[0] ) ]; $answer["description"][] = [ "type" => "text", "value" => $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "text-xs desc" )[0] ) ]; $answer["description"][] = [ "type" => "image", "url" => $this->getimagelinkfromstyle("img-bg")["url"] ]; } $out["answer"][] = $answer; } return $out; } public function news($get){ $search = $get["s"]; if(strlen($search) === 0){ throw new Exception("Search term is empty!"); } $nsfw = $get["nsfw"]; $country = $get["country"]; if(strlen($search) > 2048){ throw new Exception("Search query is too long!"); } /* $handle = fopen("scraper/brave-news.html", "r"); $html = fread($handle, filesize("scraper/brave-news.html")); fclose($handle);*/ try{ $html = $this->get( "https://search.brave.com/news", [ "q" => $search ], $nsfw, $country ); }catch(Exception $error){ throw new Exception("Could not fetch search page"); } $out = [ "status" => "ok", "npt" => null, "news" => [] ]; // load html $this->fuckhtml->load($html); $news = $this->fuckhtml ->getElementsByClassName( "snippet inline gap-standard", "div" ); foreach($news as $article){ $data = [ "title" => null, "author" => null, "description" => null, "date" => null, "thumb" => [ "url" => null, "ratio" => null ], "url" => null ]; $this->fuckhtml->load($article); $elems = $this->fuckhtml ->getElementsByTagName("*"); // get title $data["title"] = $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "snippet-title", $elems ) [0] ["innerHTML"] ); // get description $data["description"] = $this->titledots( $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "snippet-description", $elems ) [0] ["innerHTML"] ) ); // get date $date = explode( "•", $this->fuckhtml ->getTextContent( $this->fuckhtml ->getElementsByClassName( "snippet-url", $elems )[0] ) ); if( count($date) !== 1 && trim($date[1]) != "" ){ $data["date"] = strtotime( $date[1] ); } // get URL $data["url"] = $this->fuckhtml->getTextContent( $this->unshiturl( $this->fuckhtml ->getElementsByClassName( "result-header", $elems ) [0] ["attributes"] ["href"] ) ); // get thumbnail $thumb = $this->fuckhtml ->getElementsByTagName( "img" ); if( count($thumb) === 2 && trim( $thumb[1] ["attributes"] ["src"] ) != "" ){ $data["thumb"] = [ "url" => $this->fuckhtml->getTextContent( $this->unshiturl( $thumb[1] ["attributes"] ["src"] ) ), "ratio" => "16:9" ]; } $out["news"][] = $data; } return $out; } /* public function bypasscaptcha($html, $nsfw, $country){ // @TODO figure out why I still cant go trough // the captcha wall even after breaking it try{ $html = $this->get( "https://search.brave.com/goggles", [ "q" => "site:dailymotion.com my bloody valentine" ], $nsfw, $country ); }catch(Exception $error){ throw new Exception("Could not fetch html"); } // Bypass brave search captcha // this captcha only appears on the goggles page preg_match( '/this\.img\.src = "(.*)"/', $html, $image ); $image = base64_decode( explode( "data:image/png;base64,", $image[1] )[1] ); $im = new Imagick(); $im->readImageBlob($image); $im->blurImage(20, 20); $im->posterizeImage(2, imagick::IMGTYPE_COLORSEPARATION); // if we encounter a white line thats longer than 45px // we found the circle position $iterator = $im->getPixelRegionIterator(0, 77, 310, 1); $found = null; foreach( $iterator as $row ){ $whitecount = 0; $count = 0; foreach($row as $pixel){ if($pixel->getColor()["r"] === 255){ $whitecount++; $pixel->setColor("rgba(255,0,0,0)"); if($whitecount === 45){ $found = $count - 45; break 2; } }else{ $whitecount = 0; } $count++; $iterator->syncIterator(); } } $found = $found + 10; //header("Content-Type: image/png"); //echo $im; //die(); if($found === null){ throw new Exception("Could not bypass captcha"); } preg_match( '/data="{"captcha_id":"([0-9A-z-]+)"}"/', $html, $key ); $key = $key[1]; // we bypassed captcha, send POST data $order = $this->get( "https://search.brave.com/api/captcha?brave=0&captcha_id={$key}", [ "solution" => (string)$found ], $nsfw, $country, true ); $order = json_decode($order, true)["orderId"]; $orderpayload = $this->get( "https://search.brave.com/api/rewards/v1/orders/{$order}", [], $nsfw, $country ); $orderpayload = json_decode($orderpayload, true); $creds = $this->get( "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials", [ "itemId" => $orderpayload["items"][0]["id"], "blindedCreds" => [ "fuYAVcB/m7BU66vf3wkNGxJCSaRhshB9o+8km3F1h2c=", "uswvcWJuPK/1qFlVdzBP3eQd0+V1EQgfAtnEoMIK+Uk=", "fJWKGLBxl3Gyn4n9FjTLq1PjupfABT7Ni8MeB+iGzUs=", "Aq9enJ/VZP9GxQIza3n65ZK7xQhY4VwDxv53BCb/Txg=", "FMJA9eSLHq71K+Pcwgm4gIQOmdR/6KMy5cMgXhpd5Ro=", "2NVhIAbvI317SP9/xXbVe/U57eWgvHyqVbHL/5+Gdmw=", "6mpjsjSCmYEzK2xlbL8DI2P4LuhWUOxjTLvsTAL9l24=", "kAn4wuHvIlKWhfuFfPTSfD4tZ5le9t7/61YbdEc/L3k=", "BjjUyG16aTfd1c0h4oBzgQQOekrH1f+a5CmcXqMPTR4=", "SBNgpCt4/V44yaQTfh+D027Yv1GJFHkjUEpPw6rAwRI=", "XDENAtdQ7PyYx+Qx1wQGQtDWgg8WpIMgWGmd4RDOVWE=", "tF7rB4sqamsiUk3K7fojdQSI0Q6iip72yKyhnvg/bC0=", "VsAqflirAd/u4VsLdfRS2UvnH24ZNkFh6YN3DctLjzQ=", "MntLbXkoI0LdcisCbNazmooiHXJyX91L1KERDAu1JRU=", "TH6Zs8JBvFDbTDWgKbfGE4M5/cSwCtHD8ms5Y/U8zHQ=", "jsZg0Z+qDPHymrbhdnesodhLNJ26QdunyMko1aVe4So=", "rpKsyj6/vdnuMgLI2BApeijtGq9g5USRDL0w6X2bnlQ=", "vCzliGT8A9vcLXj2sFf2kavOuYw69d70NpfgA22B4lI=", "7OWoxSCtYXWcaBSifF7AXNBif/sjcuO0IelzXG/3PFk=", "iiXtByNlT6nDMN9De5B58Jl8J0p6LCjnZ9aS3w2FEQU=", "zDhd7gsJ4h4JkDeGK0Y0mfFd8IBdkLhMOANzwO+4Dig=", "qANZ+AikwFReEA61JF009d/c3IHM/aSfIYwljckhJWE=", "nNC30pDLxtXvUr+WDwfDSrAInNBpfSZkPsV2JlpheWI=", "kGXE1pkt25P71kdJzmKIg4+yMR1VA5wNmbpBb/FhJQ8=", "aLqPsY1Qiz2UCa2Jx3YNNt8r4JINMphks/43EiyZfXU=", "bHGYZoQARZEM5LdFF6B74PkRqNd9EKxzuTvGYxjq+hk=", "JOsYQjfE/9Y1u29hR+GvEkNyxUI8blgLhX1iJI/aGRQ=", "yKjHjH5j600TJD/3WPsA1N3OmItDLifdjlysq4H6NV0=", "9lTnUbsPp7BJ7XVN5/T4yGfzD9DJdqWB7xk72s19MAA=", "5KHG8iY45em7zDhO/HlI0ydcZ0Ubn+XSyjifMmy7qXM=" ] ], $nsfw, $country, true ); var_dump($creds); sleep(2); $test = $this->get( "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials", [], $nsfw, $country ); var_dump($test); $html = $this->get( "https://search.brave.com/goggles", [ "q" => "site:dailymotion.com my bloody valentine" ], $nsfw, $country, false, "__Secure-sku#brave-search-captcha=eyJ0eXBlIjoic2luZ2xlLXVzZSIsInZlcnNpb24iOjEsInNrdSI6ImJyYXZlLXNlYXJjaC1jYXB0Y2hhIiwicHJlc2VudGF0aW9uIjoiZXlKcGMzTjFaWElpT2lKaWNtRjJaUzVqYjIwL2MydDFQV0p5WVhabExYTmxZWEpqYUMxallYQjBZMmhoSWl3aWMybG5ibUYwZFhKbElqb2lNRzl0VDBneWQxZ3dTazkzU0VFMVJ6QTJaR1V5WjFOQ1dDdGhSM3B2Y2xsTVQwVTJZVVJtTUc5a1IweG1Wa3RhZEd0cU4xbHdia3BPT0VOVGNGbE5lVWR2YmpGRlNTOUhhMlZYU1RWNGQxTjJPWGxJTTNjOVBTSXNJblFpT2lKWlJWWldaVzR5TTJwQ01tSnZkakJ2U1hGNGJtSndUMGxEUW5Kd1drRjBRbWQxVnpoRlNURTNVREY2UVRaQlpUTXJSVGRFYm5NeVFqUmhka0pGYTFWM2FGY3JWRVZJVjNWcE9TdFllRU1yYlVSTVkyMTBRVDA5SW4wPSJ9" ); var_dump($html); }*/ private function appendtext($payload, &$text, &$index){ if(trim($payload) == ""){ return; } if( $index !== 0 && $text[$index - 1]["type"] == "text" ){ $text[$index - 1]["value"] .= "\n\n" . preg_replace('/ $/', " ", $payload); }else{ $text[] = [ "type" => "text", "value" => preg_replace('/ $/', " ", $payload) ]; $index++; } } private function tablesublink($html_collection, &$data){ foreach($html_collection as $html){ $html["innerHTML"] = preg_replace( '/