forked from lolcat/4get
1529 lines
28 KiB
PHP
1529 lines
28 KiB
PHP
<?php
|
|
|
|
class brave{
|
|
|
|
public function __construct(){
|
|
|
|
include "lib/fuckhtml.php";
|
|
$this->fuckhtml = new fuckhtml();
|
|
|
|
include "lib/nextpage.php";
|
|
$this->nextpage = new nextpage("brave");
|
|
}
|
|
|
|
public function getfilters($page){
|
|
|
|
switch($page){
|
|
|
|
case "web":
|
|
return [
|
|
"country" => [
|
|
"display" => "Country",
|
|
"option" => [
|
|
"all" => "All Regions",
|
|
"ar" => "Argentina",
|
|
"au" => "Australia",
|
|
"at" => "Austria",
|
|
"be" => "Belgium",
|
|
"br" => "Brazil",
|
|
"ca" => "Canada",
|
|
"cl" => "Chile",
|
|
"cn" => "China",
|
|
"dk" => "Denmark",
|
|
"fi" => "Finland",
|
|
"fr" => "France",
|
|
"de" => "Germany",
|
|
"hk" => "Hong Kong",
|
|
"in" => "India",
|
|
"id" => "Indonesia",
|
|
"it" => "Italy",
|
|
"jp" => "Japan",
|
|
"kr" => "Korea",
|
|
"my" => "Malaysia",
|
|
"mx" => "Mexico",
|
|
"nl" => "Netherlands",
|
|
"nz" => "New Zealand",
|
|
"no" => "Norway",
|
|
"pl" => "Poland",
|
|
"pt" => "Portugal",
|
|
"ph" => "Philippines",
|
|
"ru" => "Russia",
|
|
"sa" => "Saudi Arabia",
|
|
"za" => "South Africa",
|
|
"es" => "Spain",
|
|
"se" => "Sweden",
|
|
"ch" => "Switzerland",
|
|
"tw" => "Taiwan",
|
|
"tr" => "Turkey",
|
|
"gb" => "United Kingdom",
|
|
"us" => "United States"
|
|
]
|
|
],
|
|
"nsfw" => [
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes",
|
|
"maybe" => "Maybe",
|
|
"no" => "No"
|
|
]
|
|
],
|
|
"newer" => [
|
|
"display" => "Newer than",
|
|
"option" => "_DATE"
|
|
],
|
|
"older" => [
|
|
"display" => "Older than",
|
|
"option" => "_DATE"
|
|
],
|
|
"spellcheck" => [
|
|
"display" => "Spellcheck",
|
|
"option" => [
|
|
"no" => "No",
|
|
"yes" => "Yes"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
|
|
case "images":
|
|
case "videos":
|
|
case "news":
|
|
return [
|
|
"country" => [
|
|
"display" => "Country",
|
|
"option" => [
|
|
"all" => "All regions",
|
|
"ar" => "Argentina",
|
|
"au" => "Australia",
|
|
"at" => "Austria",
|
|
"be" => "Belgium",
|
|
"br" => "Brazil",
|
|
"ca" => "Canada",
|
|
"cl" => "Chile",
|
|
"cn" => "China",
|
|
"dk" => "Denmark",
|
|
"fi" => "Finland",
|
|
"fr" => "France",
|
|
"de" => "Germany",
|
|
"hk" => "Hong Kong",
|
|
"in" => "India",
|
|
"id" => "Indonesia",
|
|
"it" => "Italy",
|
|
"jp" => "Japan",
|
|
"kr" => "Korea",
|
|
"my" => "Malaysia",
|
|
"mx" => "Mexico",
|
|
"nl" => "Netherlands",
|
|
"nz" => "New Zealand",
|
|
"no" => "Norway",
|
|
"pl" => "Poland",
|
|
"pt" => "Portugal",
|
|
"ph" => "Philippines",
|
|
"ru" => "Russia",
|
|
"sa" => "Saudi Arabia",
|
|
"za" => "South Africa",
|
|
"es" => "Spain",
|
|
"se" => "Sweden",
|
|
"ch" => "Switzerland",
|
|
"tw" => "Taiwan",
|
|
"tr" => "Turkey",
|
|
"gb" => "United Kingdom",
|
|
"us" => "United States"
|
|
]
|
|
],
|
|
"nsfw" => [
|
|
"display" => "NSFW",
|
|
"option" => [
|
|
"yes" => "Yes",
|
|
"maybe" => "Maybe",
|
|
"no" => "No"
|
|
]
|
|
]
|
|
];
|
|
break;
|
|
}
|
|
}
|
|
|
|
private function get($url, $get = [], $nsfw, $country){
|
|
|
|
switch($nsfw){
|
|
|
|
case "yes": $nsfw = "off"; break;
|
|
case "maybe": $nsfw = "moderate"; break;
|
|
case "no": $nsfw = "strict"; break;
|
|
}
|
|
|
|
if($country == "any"){
|
|
|
|
$country = "all";
|
|
}
|
|
|
|
$headers = [
|
|
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
|
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
"Accept-Language: en-US,en;q=0.5",
|
|
"Accept-Encoding: gzip",
|
|
"Cookie: safesearch={$nsfw}; country={$country}; useLocation=0; summarizer=0",
|
|
"DNT: 1",
|
|
"Connection: keep-alive",
|
|
"Upgrade-Insecure-Requests: 1",
|
|
"Sec-Fetch-Dest: document",
|
|
"Sec-Fetch-Mode: navigate",
|
|
"Sec-Fetch-Site: none",
|
|
"Sec-Fetch-User: ?1"
|
|
];
|
|
|
|
$curlproc = curl_init();
|
|
|
|
if($get !== []){
|
|
$get = http_build_query($get);
|
|
$url .= "?" . $get;
|
|
}
|
|
|
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
|
|
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
|
|
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
|
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
|
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
|
|
|
$data = curl_exec($curlproc);
|
|
|
|
if(curl_errno($curlproc)){
|
|
|
|
throw new Exception(curl_error($curlproc));
|
|
}
|
|
|
|
curl_close($curlproc);
|
|
return $data;
|
|
}
|
|
|
|
public function web($get){
|
|
|
|
if($get["npt"]){
|
|
|
|
// get next page data
|
|
$q = json_decode($this->nextpage->get($get["npt"], "web"), true);
|
|
|
|
$search = $q["q"];
|
|
$q["spellcheck"] = "0";
|
|
|
|
$nsfw = $q["nsfw"];
|
|
unset($q["nsfw"]);
|
|
|
|
$country = $q["country"];
|
|
unset($q["country"]);
|
|
|
|
}else{
|
|
|
|
// get _GET data instead
|
|
$search = $get["s"];
|
|
|
|
if(strlen($search) === 0){
|
|
|
|
throw new Exception("Search term is empty!");
|
|
}
|
|
|
|
if(strlen($search) > 2048){
|
|
|
|
throw new Exception("Search query is too long!");
|
|
}
|
|
|
|
$nsfw = $get["nsfw"];
|
|
$country = $get["country"];
|
|
$older = $get["older"];
|
|
$newer = $get["newer"];
|
|
$spellcheck = $get["spellcheck"];
|
|
|
|
$q = [
|
|
"q" => $search
|
|
];
|
|
|
|
/*
|
|
Pass older/newer filters to brave
|
|
*/
|
|
if($newer !== false){
|
|
|
|
$newer = date("Y-m-d", $newer);
|
|
|
|
if($older === false){
|
|
|
|
$older = date("Y-m-d", time());
|
|
}
|
|
}
|
|
|
|
if(
|
|
is_string($older) === false &&
|
|
$older !== false
|
|
){
|
|
|
|
$older = date("Y-m-d", $older);
|
|
|
|
if($newer === false){
|
|
|
|
$newer = "1970-01-02";
|
|
}
|
|
}
|
|
|
|
if($older !== false){
|
|
|
|
$q["tf"] = "{$newer}to{$older}";
|
|
}
|
|
|
|
// spellcheck
|
|
if($spellcheck == "no"){
|
|
|
|
$q["spellcheck"] = "1";
|
|
}
|
|
}
|
|
/*
|
|
$handle = fopen("scraper/brave.html", "r");
|
|
$html = fread($handle, filesize("scraper/brave.html"));
|
|
fclose($handle);
|
|
*/
|
|
try{
|
|
$html =
|
|
$this->get(
|
|
"https://search.brave.com/search",
|
|
$q,
|
|
$nsfw,
|
|
$country
|
|
);
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Could not fetch search page");
|
|
}
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"spelling" => [
|
|
"type" => "no_correction",
|
|
"using" => null,
|
|
"correction" => null
|
|
],
|
|
"npt" => null,
|
|
"answer" => [],
|
|
"web" => [],
|
|
"image" => [],
|
|
"video" => [],
|
|
"news" => [],
|
|
"related" => []
|
|
];
|
|
|
|
// load html
|
|
$this->fuckhtml->load($html);
|
|
|
|
/*
|
|
Get next page "token"
|
|
*/
|
|
$nextpage =
|
|
$this->fuckhtml
|
|
->getElementById(
|
|
"pagination",
|
|
"div"
|
|
);
|
|
|
|
if($nextpage){
|
|
|
|
$this->fuckhtml->load($nextpage);
|
|
|
|
$nextpage =
|
|
$this->fuckhtml
|
|
->getElementsByClassName("btn", "a");
|
|
|
|
if(count($nextpage) !== 0){
|
|
|
|
$nextpage =
|
|
$nextpage[count($nextpage) - 1];
|
|
|
|
if(
|
|
strtolower(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$nextpage
|
|
)
|
|
) == "next"
|
|
){
|
|
|
|
preg_match(
|
|
'/offset=([0-9]+)/',
|
|
$this->fuckhtml->getTextContent($nextpage["attributes"]["href"]),
|
|
$nextpage
|
|
);
|
|
|
|
$q["offset"] = (int)$nextpage[1];
|
|
$q["nsfw"] = $nsfw;
|
|
$q["country"] = $country;
|
|
|
|
$out["npt"] =
|
|
$this->nextpage->store(
|
|
json_encode($q),
|
|
"web"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
$this->fuckhtml->load($html);
|
|
|
|
$script_disc =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"script"
|
|
);
|
|
|
|
$grep = [];
|
|
foreach($script_disc as $discs){
|
|
|
|
preg_match(
|
|
'/const data ?= ?(\[{.*}]);/',
|
|
$discs["innerHTML"],
|
|
$grep
|
|
);
|
|
|
|
if(isset($grep[1])){
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(!isset($grep[1])){
|
|
|
|
throw new Exception("Could not get data JS");
|
|
}
|
|
|
|
$data =
|
|
$this->fuckhtml
|
|
->parseJsObject(
|
|
$grep[1]
|
|
);
|
|
unset($grep);
|
|
|
|
$data = $data[1]["data"]["body"]["response"];
|
|
|
|
/*
|
|
Get web results
|
|
*/
|
|
if(!isset($data["web"]["results"])){
|
|
|
|
return $out;
|
|
}
|
|
|
|
//$has_so_answer = false;
|
|
|
|
foreach($data["web"]["results"] as $result){
|
|
|
|
if(isset($result["thumbnail"])){
|
|
|
|
$thumb = [
|
|
"ratio" => $result["thumbnail"]["logo"] == "false" ? "16:9" : "1:1",
|
|
"url" => $result["thumbnail"]["original"]
|
|
];
|
|
}else{
|
|
|
|
$thumb = [
|
|
"ratio" => null,
|
|
"url" => null
|
|
];
|
|
}
|
|
|
|
$sublink = [];
|
|
if(isset($result["cluster"])){
|
|
|
|
foreach($result["cluster"] as $cluster){
|
|
|
|
$sublink[] = [
|
|
"title" => $this->titledots($cluster["title"]),
|
|
"description" =>
|
|
$this->titledots(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$cluster["description"]
|
|
)
|
|
),
|
|
"url" => $cluster["url"],
|
|
"date" => null
|
|
];
|
|
}
|
|
}
|
|
|
|
// parse table elements
|
|
$table = [];
|
|
|
|
// product
|
|
$ref = null;
|
|
|
|
if(isset($result["product"])){
|
|
|
|
$ref = &$result["product"];
|
|
}elseif(isset($result["creative_work"])){
|
|
|
|
$ref = &$result["creative_work"];
|
|
}
|
|
|
|
if($ref !== null){
|
|
|
|
if(isset($ref["offers"])){
|
|
|
|
foreach($ref["offers"] as $offer){
|
|
|
|
$price = null;
|
|
|
|
if(isset($offer["price"])){
|
|
|
|
if((float)$offer["price"] == 0){
|
|
|
|
$price = "Free";
|
|
}else{
|
|
|
|
$price = $offer["price"];
|
|
}
|
|
}
|
|
|
|
if($price !== "Free"){
|
|
if(isset($offer["priceCurrency"])){
|
|
|
|
$price .= " " . $offer["priceCurrency"];
|
|
}
|
|
}
|
|
|
|
if($price !== null){
|
|
|
|
$table["Price"] = trim($price);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(isset($ref["rating"])){
|
|
|
|
$rating = null;
|
|
if(isset($ref["rating"]["ratingValue"])){
|
|
|
|
$rating = $ref["rating"]["ratingValue"];
|
|
|
|
if(isset($ref["rating"]["bestRating"])){
|
|
|
|
$rating .= "/" . $ref["rating"]["bestRating"];
|
|
}
|
|
}
|
|
|
|
if(isset($ref["rating"]["reviewCount"])){
|
|
|
|
$isnull = $rating === null ? false : true;
|
|
|
|
if($isnull){
|
|
|
|
$rating .= " (";
|
|
}
|
|
|
|
$rating .= number_format($ref["rating"]["reviewCount"]) . " hits";
|
|
|
|
if($isnull){
|
|
|
|
$rating .= ")";
|
|
}
|
|
}
|
|
|
|
if($rating !== null){
|
|
|
|
$table["Rating"] = $rating;
|
|
}
|
|
}
|
|
}
|
|
|
|
// video
|
|
if(isset($result["video"])){
|
|
|
|
foreach($result["video"] as $key => $value){
|
|
|
|
if(is_string($result["video"][$key]) === false){
|
|
|
|
continue;
|
|
}
|
|
|
|
$table[ucfirst($key)] = $value;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Get StackOverflow answers
|
|
*/
|
|
// commented out since it also returns alot of garbage
|
|
/*
|
|
if(
|
|
$has_so_answer === false &&
|
|
isset($result["qa"])
|
|
){
|
|
|
|
$has_so_answer = true;
|
|
$answer = $this->stackoverflow_parse($result["qa"]["answer"]["text"]);
|
|
|
|
if(isset($result["qa"]["answer"]["author"])){
|
|
|
|
$answer[] = [
|
|
"type" => "quote",
|
|
"value" => "Answer from " . $result["qa"]["answer"]["author"]
|
|
];
|
|
}
|
|
|
|
$out["answer"][] = [
|
|
"title" =>
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$result["qa"]["question"]
|
|
),
|
|
"description" => $answer,
|
|
"url" => $result["url"],
|
|
"thumb" => null,
|
|
"table" => [],
|
|
"sublink" => []
|
|
];
|
|
}*/
|
|
|
|
$out["web"][] = [
|
|
"title" =>
|
|
$this->titledots(
|
|
$result["title"]
|
|
),
|
|
"description" =>
|
|
$this->titledots(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$result["description"]
|
|
)
|
|
),
|
|
"url" => $result["url"],
|
|
"date" => isset($result["age"]) ? strtotime($result["age"]) : null,
|
|
"type" => "web",
|
|
"thumb" => $thumb,
|
|
"sublink" => $sublink,
|
|
"table" => $table
|
|
];
|
|
}
|
|
|
|
/*
|
|
Get spelling autocorrect
|
|
*/
|
|
if(
|
|
isset($data["query"]["bo_altered_diff"][0][0]) &&
|
|
$data["query"]["bo_altered_diff"][0][0] == "true"
|
|
){
|
|
|
|
$out["spelling"] = [
|
|
"type" => "including",
|
|
"using" => $data["query"]["bo_altered_diff"][0][1],
|
|
"correction" => $get["s"]
|
|
];
|
|
}
|
|
|
|
/*
|
|
Get wikipedia heads
|
|
*/
|
|
if(isset($data["infobox"]["results"][0])){
|
|
|
|
foreach($data["infobox"]["results"] as $info){
|
|
|
|
if($info["subtype"] == "code"){
|
|
|
|
$description =
|
|
$this->stackoverflow_parse($info["data"]["answer"]["text"]);
|
|
|
|
if(isset($info["data"]["answer"]["author"])){
|
|
|
|
$description[] = [
|
|
"type" => "quote",
|
|
"value" => "Answer from " . $info["data"]["answer"]["author"]
|
|
];
|
|
}
|
|
}else{
|
|
|
|
$description = [];
|
|
|
|
if(
|
|
isset($info["description"]) &&
|
|
$info["description"] != ""
|
|
){
|
|
$description[] = [
|
|
"type" => "quote",
|
|
"value" => $info["description"]
|
|
];
|
|
}
|
|
|
|
if(
|
|
isset($info["long_desc"]) &&
|
|
$info["long_desc"] != ""
|
|
){
|
|
$description[] = [
|
|
"type" => "text",
|
|
"value" => $this->titledots($info["long_desc"])
|
|
];
|
|
}
|
|
}
|
|
|
|
$table = [];
|
|
if(isset($info["attributes"])){
|
|
|
|
foreach($info["attributes"] as $row){
|
|
|
|
if(
|
|
$row[1] == "null" &&
|
|
count($table) !== 0
|
|
){
|
|
|
|
break;
|
|
}
|
|
|
|
if($row[1] == "null"){
|
|
|
|
continue;
|
|
}
|
|
|
|
$table[
|
|
$this->fuckhtml->getTextContent($row[0])
|
|
] =
|
|
$this->fuckhtml->getTextContent($row[1]);
|
|
}
|
|
}
|
|
|
|
$sublink = [];
|
|
if(isset($info["profiles"])){
|
|
|
|
foreach($info["profiles"] as $row){
|
|
|
|
$name = $this->fuckhtml->getTextContent($row["name"]);
|
|
|
|
if(strtolower($name) == "steampowered"){
|
|
|
|
$name = "Steam";
|
|
}
|
|
|
|
$sublink[
|
|
$this->fuckhtml->getTextContent($name)
|
|
] =
|
|
$this->fuckhtml->getTextContent($row["url"]);
|
|
}
|
|
}
|
|
|
|
$out["answer"][] = [
|
|
"title" => $this->fuckhtml->getTextContent($info["title"]),
|
|
"description" => $description,
|
|
"url" => $info["url"],
|
|
"thumb" => isset($info["images"][0]["original"]) ? $info["images"][0]["original"] : null,
|
|
"table" => $table,
|
|
"sublink" => $sublink
|
|
];
|
|
|
|
break; // only iterate once, we get garbage most of the time
|
|
}
|
|
}
|
|
|
|
/*
|
|
Get videos
|
|
*/
|
|
if(isset($data["videos"]["results"])){
|
|
|
|
foreach($data["videos"]["results"] as $video){
|
|
|
|
$out["video"][] = [
|
|
"title" => $this->titledots($video["title"]),
|
|
"description" => $this->titledots($video["description"]),
|
|
"date" => isset($video["age"]) ? strtotime($video["age"]) : null,
|
|
"duration" => isset($video["video"]["duration"]) ? $this->hms2int($video["video"]["duration"]) : null,
|
|
"views" => null,
|
|
"thumb" =>
|
|
isset($video["thumbnail"]["src"]) ?
|
|
[
|
|
"ratio" => "16:9",
|
|
"url" => $this->unshiturl($video["thumbnail"]["src"])
|
|
] :
|
|
[
|
|
"ratio" => null,
|
|
"url" => null
|
|
],
|
|
"url" => $video["url"]
|
|
];
|
|
}
|
|
}
|
|
|
|
/*
|
|
Get news
|
|
*/
|
|
if(isset($data["news"]["results"])){
|
|
|
|
foreach($data["news"]["results"] as $news){
|
|
|
|
$out["news"][] = [
|
|
"title" => $this->titledots($news["title"]),
|
|
"description" => $this->titledots($news["description"]),
|
|
"date" => isset($news["age"]) ? strtotime($news["age"]) : null,
|
|
"thumb" =>
|
|
isset($video["thumbnail"]["src"]) ?
|
|
[
|
|
"ratio" => "16:9",
|
|
"url" => $this->unshiturl($video["thumbnail"]["src"])
|
|
] :
|
|
[
|
|
"ratio" => null,
|
|
"url" => null
|
|
],
|
|
"url" => $news["url"]
|
|
];
|
|
}
|
|
}
|
|
|
|
/*
|
|
Get discussions
|
|
*/
|
|
$disc_out = [];
|
|
|
|
if(isset($data["discussions"]["results"])){
|
|
|
|
foreach($data["discussions"]["results"] as $disc){
|
|
|
|
$table = [];
|
|
|
|
if(isset($disc["data"]["num_votes"])){
|
|
|
|
$table["Votes"] = (int)$disc["data"]["num_votes"];
|
|
}
|
|
|
|
if(isset($disc["data"]["num_answers"])){
|
|
|
|
$table["Comments"] = (int)$disc["data"]["num_answers"];
|
|
}
|
|
|
|
$disc_out[] = [
|
|
"title" =>
|
|
$this->titledots(
|
|
$disc["title"]
|
|
),
|
|
"description" =>
|
|
$this->limitstrlen(
|
|
$this->titledots(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$disc["description"]
|
|
)
|
|
)
|
|
),
|
|
"url" => $disc["url"],
|
|
"date" => isset($disc["age"]) ? strtotime($disc["age"]) : null,
|
|
"type" => "web",
|
|
"thumb" => [
|
|
"ratio" => null,
|
|
"url" => null
|
|
],
|
|
"sublink" => [],
|
|
"table" => $table
|
|
];
|
|
}
|
|
}
|
|
|
|
// append discussions at position 2
|
|
array_splice($out["web"], 1, 0, $disc_out);
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function news($get){
|
|
|
|
$search = $get["s"];
|
|
if(strlen($search) === 0){
|
|
|
|
throw new Exception("Search term is empty!");
|
|
}
|
|
|
|
$nsfw = $get["nsfw"];
|
|
$country = $get["country"];
|
|
|
|
if(strlen($search) > 2048){
|
|
|
|
throw new Exception("Search query is too long!");
|
|
}
|
|
/*
|
|
$handle = fopen("scraper/brave-news.html", "r");
|
|
$html = fread($handle, filesize("scraper/brave-news.html"));
|
|
fclose($handle);*/
|
|
try{
|
|
$html =
|
|
$this->get(
|
|
"https://search.brave.com/news",
|
|
[
|
|
"q" => $search
|
|
],
|
|
$nsfw,
|
|
$country
|
|
);
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Could not fetch search page");
|
|
}
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"news" => []
|
|
];
|
|
|
|
// load html
|
|
$this->fuckhtml->load($html);
|
|
|
|
$news =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"snippet inline gap-standard",
|
|
"div"
|
|
);
|
|
|
|
foreach($news as $article){
|
|
|
|
$data = [
|
|
"title" => null,
|
|
"author" => null,
|
|
"description" => null,
|
|
"date" => null,
|
|
"thumb" =>
|
|
[
|
|
"url" => null,
|
|
"ratio" => null
|
|
],
|
|
"url" => null
|
|
];
|
|
|
|
$this->fuckhtml->load($article);
|
|
$elems =
|
|
$this->fuckhtml
|
|
->getElementsByTagName("*");
|
|
|
|
// get title
|
|
$data["title"] =
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"snippet-title",
|
|
$elems
|
|
)
|
|
[0]
|
|
["innerHTML"]
|
|
);
|
|
|
|
// get description
|
|
$data["description"] =
|
|
$this->titledots(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"snippet-description",
|
|
$elems
|
|
)
|
|
[0]
|
|
["innerHTML"]
|
|
)
|
|
);
|
|
|
|
// get date
|
|
$date =
|
|
explode(
|
|
"•",
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"snippet-url",
|
|
$elems
|
|
)[0]
|
|
)
|
|
);
|
|
|
|
if(
|
|
count($date) !== 1 &&
|
|
trim($date[1]) != ""
|
|
){
|
|
|
|
$data["date"] =
|
|
strtotime(
|
|
$date[1]
|
|
);
|
|
}
|
|
|
|
// get URL
|
|
$data["url"] =
|
|
$this->fuckhtml->getTextContent(
|
|
$this->unshiturl(
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
"result-header",
|
|
$elems
|
|
)
|
|
[0]
|
|
["attributes"]
|
|
["href"]
|
|
)
|
|
);
|
|
|
|
// get thumbnail
|
|
$thumb =
|
|
$this->fuckhtml
|
|
->getElementsByTagName(
|
|
"img"
|
|
);
|
|
|
|
if(
|
|
count($thumb) === 2 &&
|
|
trim(
|
|
$thumb[1]
|
|
["attributes"]
|
|
["src"]
|
|
) != ""
|
|
){
|
|
|
|
$data["thumb"] = [
|
|
"url" =>
|
|
$this->fuckhtml->getTextContent(
|
|
$this->unshiturl(
|
|
$thumb[1]
|
|
["attributes"]
|
|
["src"]
|
|
)
|
|
),
|
|
"ratio" => "16:9"
|
|
];
|
|
}
|
|
|
|
$out["news"][] = $data;
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function image($get){
|
|
|
|
$search = $get["s"];
|
|
$country = $get["country"];
|
|
$nsfw = $get["nsfw"];
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"image" => []
|
|
];
|
|
|
|
try{
|
|
$html =
|
|
$this->get(
|
|
"https://search.brave.com/images",
|
|
[
|
|
"q" => $search
|
|
],
|
|
$nsfw,
|
|
$country
|
|
);
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Could not fetch search page");
|
|
}
|
|
/*
|
|
$handle = fopen("scraper/brave-image.html", "r");
|
|
$html = fread($handle, filesize("scraper/brave-image.html"));
|
|
fclose($handle);*/
|
|
|
|
preg_match(
|
|
'/const data = (\[{.*}\]);/',
|
|
$html,
|
|
$json
|
|
);
|
|
|
|
if(!isset($json[1])){
|
|
|
|
throw new Exception("Failed to get data object");
|
|
}
|
|
|
|
$json =
|
|
$this->fuckhtml
|
|
->parseJsObject(
|
|
$json[1]
|
|
);
|
|
|
|
foreach(
|
|
$json[1]
|
|
["data"]
|
|
["body"]
|
|
["response"]
|
|
["results"]
|
|
as $result
|
|
){
|
|
|
|
$out["image"][] = [
|
|
"title" => $result["title"],
|
|
"source" => [
|
|
[
|
|
"url" => $result["properties"]["url"],
|
|
"width" => null,
|
|
"height" => null
|
|
],
|
|
[
|
|
"url" => $result["thumbnail"]["src"],
|
|
"width" => null,
|
|
"height" => null
|
|
]
|
|
],
|
|
"url" => $result["url"]
|
|
];
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function video($get){
|
|
|
|
$search = $get["s"];
|
|
$country = $get["country"];
|
|
$nsfw = $get["nsfw"];
|
|
|
|
$out = [
|
|
"status" => "ok",
|
|
"npt" => null,
|
|
"video" => [],
|
|
"author" => [],
|
|
"livestream" => [],
|
|
"playlist" => [],
|
|
"reel" => []
|
|
];
|
|
|
|
try{
|
|
$html =
|
|
$this->get(
|
|
"https://search.brave.com/videos",
|
|
[
|
|
"q" => $search
|
|
],
|
|
$nsfw,
|
|
$country
|
|
);
|
|
|
|
}catch(Exception $error){
|
|
|
|
throw new Exception("Could not fetch search page");
|
|
}
|
|
/*
|
|
$handle = fopen("scraper/brave-video.html", "r");
|
|
$html = fread($handle, filesize("scraper/brave-video.html"));
|
|
fclose($handle);*/
|
|
|
|
preg_match(
|
|
'/const data = (\[{.*}\]);/',
|
|
$html,
|
|
$json
|
|
);
|
|
|
|
if(!isset($json[1])){
|
|
|
|
throw new Exception("Failed to get data object");
|
|
}
|
|
|
|
$json =
|
|
$this->fuckhtml
|
|
->parseJsObject(
|
|
$json[1]
|
|
);
|
|
|
|
foreach(
|
|
$json
|
|
[1]
|
|
["data"]
|
|
["body"]
|
|
["response"]
|
|
["results"]
|
|
as $result
|
|
){
|
|
|
|
if($result["video"]["author"] != "null"){
|
|
|
|
$author = [
|
|
"name" => $result["video"]["author"]["name"] == "null" ? null : $result["video"]["author"]["name"],
|
|
"url" => $result["video"]["author"]["url"] == "null" ? null : $result["video"]["author"]["url"],
|
|
"avatar" => $result["video"]["author"]["img"] == "null" ? null : $result["video"]["author"]["img"]
|
|
];
|
|
}else{
|
|
|
|
$author = [
|
|
"name" => null,
|
|
"url" => null,
|
|
"avatar" => null
|
|
];
|
|
}
|
|
|
|
if($result["thumbnail"] != "null"){
|
|
|
|
$thumb = [
|
|
"url" => $result["thumbnail"]["original"],
|
|
"ratio" => "16:9"
|
|
];
|
|
}else{
|
|
|
|
$thumb = [
|
|
"url" => null,
|
|
"ratio" => null
|
|
];
|
|
}
|
|
|
|
$out["video"][] = [
|
|
"title" => $result["title"],
|
|
"description" => $result["description"] == "null" ? null : $this->titledots($result["description"]),
|
|
"author" => $author,
|
|
"date" => $result["age"] == "null" ? null : strtotime($result["age"]),
|
|
"duration" => $result["video"]["duration"] == "null" ? null : $this->hms2int($result["video"]["duration"]),
|
|
"views" => $result["video"]["views"] == "null" ? null : (int)$result["video"]["views"],
|
|
"thumb" => $thumb,
|
|
"url" => $result["url"]
|
|
];
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
private function stackoverflow_parse($html){
|
|
|
|
$i = 0;
|
|
$answer = [];
|
|
|
|
$this->fuckhtml->load($html);
|
|
|
|
foreach(
|
|
$this->fuckhtml->getElementsByTagName("*")
|
|
as $snippet
|
|
){
|
|
|
|
switch($snippet["tagName"]){
|
|
|
|
case "p":
|
|
$this->fuckhtml->load($snippet["innerHTML"]);
|
|
|
|
$codetags =
|
|
$this->fuckhtml
|
|
->getElementsByTagName("*");
|
|
|
|
$tmphtml = $snippet["innerHTML"];
|
|
|
|
foreach($codetags as $tag){
|
|
|
|
if(!isset($tag["outerHTML"])){
|
|
|
|
continue;
|
|
}
|
|
|
|
$tmphtml =
|
|
explode(
|
|
$tag["outerHTML"],
|
|
$tmphtml,
|
|
2
|
|
);
|
|
|
|
$value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
|
|
$this->appendtext($value, $answer, $i);
|
|
|
|
$type = null;
|
|
switch($tag["tagName"]){
|
|
|
|
case "code": $type = "inline_code"; break;
|
|
case "em": $type = "italic"; break;
|
|
case "blockquote": $type = "quote"; break;
|
|
default: $type = "text";
|
|
}
|
|
|
|
if($type !== null){
|
|
$value = $this->fuckhtml->getTextContent($tag, false, true);
|
|
|
|
if(trim($value) != ""){
|
|
|
|
if(
|
|
$i !== 0 &&
|
|
$type == "title"
|
|
){
|
|
|
|
$answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
|
|
}
|
|
|
|
$answer[] = [
|
|
"type" => $type,
|
|
"value" => $value
|
|
];
|
|
$i++;
|
|
}
|
|
}
|
|
|
|
if(count($tmphtml) === 2){
|
|
|
|
$tmphtml = $tmphtml[1];
|
|
}else{
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(is_array($tmphtml)){
|
|
|
|
$tmphtml = $tmphtml[0];
|
|
}
|
|
|
|
if(strlen($tmphtml) !== 0){
|
|
|
|
$value = $this->fuckhtml->getTextContent($tmphtml, false, false);
|
|
$this->appendtext($value, $answer, $i);
|
|
}
|
|
break;
|
|
|
|
case "img":
|
|
$answer[] = [
|
|
"type" => "image",
|
|
"url" =>
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$tag["attributes"]["src"]
|
|
)
|
|
];
|
|
$i++;
|
|
break;
|
|
|
|
case "pre":
|
|
|
|
switch($answer[$i - 1]["type"]){
|
|
|
|
case "text":
|
|
case "italic":
|
|
$answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
|
|
break;
|
|
}
|
|
|
|
$answer[] =
|
|
[
|
|
"type" => "code",
|
|
"value" =>
|
|
rtrim(
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$snippet,
|
|
true,
|
|
false
|
|
)
|
|
)
|
|
];
|
|
$i++;
|
|
|
|
break;
|
|
|
|
case "ol":
|
|
$o = 0;
|
|
|
|
$this->fuckhtml->load($snippet);
|
|
$li =
|
|
$this->fuckhtml
|
|
->getElementsByTagName("li");
|
|
|
|
foreach($li as $elem){
|
|
$o++;
|
|
|
|
$this->appendtext(
|
|
$o . ". " .
|
|
$this->fuckhtml
|
|
->getTextContent(
|
|
$elem
|
|
),
|
|
$answer,
|
|
$i
|
|
);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(
|
|
$i !== 0 &&
|
|
$answer[$i - 1]["type"] == "text"
|
|
){
|
|
|
|
$answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
|
|
}
|
|
|
|
return $answer;
|
|
}
|
|
|
|
private function hms2int($time){
|
|
|
|
$parts = explode(":", $time, 3);
|
|
$time = 0;
|
|
|
|
if(count($parts) === 3){
|
|
|
|
// hours
|
|
$time = $time + ((int)$parts[0] * 3600);
|
|
array_shift($parts);
|
|
}
|
|
|
|
if(count($parts) === 2){
|
|
|
|
// minutes
|
|
$time = $time + ((int)$parts[0] * 60);
|
|
array_shift($parts);
|
|
}
|
|
|
|
// seconds
|
|
$time = $time + (int)$parts[0];
|
|
|
|
return $time;
|
|
}
|
|
|
|
private function appendtext($payload, &$text, &$index){
|
|
|
|
if(trim($payload) == ""){
|
|
|
|
return;
|
|
}
|
|
|
|
if(
|
|
$index !== 0 &&
|
|
$text[$index - 1]["type"] == "text"
|
|
){
|
|
|
|
$text[$index - 1]["value"] .= "\n\n" . preg_replace('/ $/', " ", $payload);
|
|
}else{
|
|
|
|
$text[] = [
|
|
"type" => "text",
|
|
"value" => preg_replace('/ $/', " ", $payload)
|
|
];
|
|
$index++;
|
|
}
|
|
}
|
|
|
|
private function tablesublink($html_collection, &$data){
|
|
|
|
foreach($html_collection as $html){
|
|
|
|
$html["innerHTML"] = preg_replace(
|
|
'/<style>[\S\s]*<\/style>/i',
|
|
"",
|
|
$html["innerHTML"]
|
|
);
|
|
|
|
$html =
|
|
explode(
|
|
":",
|
|
$this->fuckhtml->getTextContent($html),
|
|
2
|
|
);
|
|
|
|
if(count($html) === 1){
|
|
|
|
$html = ["Rating", $html[0]];
|
|
}
|
|
|
|
$data["table"][trim($html[0])] = trim($html[1]);
|
|
}
|
|
}
|
|
|
|
private function getimagelinkfromstyle($thumb){
|
|
|
|
$thumb =
|
|
$this->fuckhtml
|
|
->getElementsByClassName(
|
|
$thumb,
|
|
"div"
|
|
);
|
|
|
|
if(count($thumb) === 0){
|
|
|
|
return [
|
|
"url" => null,
|
|
"ratio" => null
|
|
];
|
|
}
|
|
|
|
$thumb = $thumb[0]["attributes"]["style"];
|
|
|
|
preg_match(
|
|
'/background-image: ?url\((\'[^\']+\'|"[^"]+"|[^\)]+)\)/',
|
|
$thumb,
|
|
$thumb
|
|
);
|
|
|
|
$url = $this->fuckhtml->getTextContent($this->unshiturl(trim($thumb[1], '"\' ')));
|
|
|
|
if(parse_url($url, PHP_URL_HOST) == "cdn.search.brave.com"){
|
|
|
|
return [
|
|
"url" => null,
|
|
"ratio" => null
|
|
];
|
|
}
|
|
|
|
return [
|
|
"url" => $url,
|
|
"ratio" => "16:9"
|
|
];
|
|
}
|
|
|
|
private function limitstrlen($text){
|
|
|
|
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
|
}
|
|
|
|
private function limitwhitespace($text){
|
|
|
|
return
|
|
preg_replace(
|
|
'/[\s]+/',
|
|
" ",
|
|
$text
|
|
);
|
|
}
|
|
|
|
private function titledots($title){
|
|
|
|
$substr = substr($title, -3);
|
|
|
|
if(
|
|
$substr == "..." ||
|
|
$substr == "…"
|
|
){
|
|
|
|
return trim(substr($title, 0, -3));
|
|
}
|
|
|
|
return trim($title);
|
|
}
|
|
|
|
private function unshiturl($url){
|
|
|
|
// https://imgs.search.brave.com/XFnbR8Sl7ge82MBDEH7ju0UHImRovMVmQ2qnDvgNTuA/rs:fit:844:225:1/g:ce/aHR0cHM6Ly90c2U0/Lm1tLmJpbmcubmV0/L3RoP2lkPU9JUC54/UWotQXU5N2ozVndT/RDJnNG9BNVhnSGFF/SyZwaWQ9QXBp.jpeg
|
|
|
|
$tmp = explode("aHR0", $url);
|
|
|
|
if(count($tmp) !== 2){
|
|
|
|
// nothing to do
|
|
return $url;
|
|
}
|
|
|
|
return
|
|
base64_decode(
|
|
"aHR0" .
|
|
str_replace(["/", "_"], ["", "/"],
|
|
explode(
|
|
".",
|
|
$tmp[1]
|
|
)[0]
|
|
)
|
|
);
|
|
}
|
|
}
|