forked from lolcat/4get
v8
This commit is contained in:
@@ -3,78 +3,103 @@
|
||||
class marginalia{
|
||||
public function __construct(){
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("marginalia");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
switch($page){
|
||||
if(config::MARGINALIA_API_KEY === null){
|
||||
|
||||
case "web":
|
||||
return [
|
||||
"profile" => [
|
||||
"display" => "Profile",
|
||||
"option" => [
|
||||
"any" => "Default",
|
||||
"modern" => "Modern"
|
||||
]
|
||||
],
|
||||
"format" => [
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any",
|
||||
"html5" => "html5",
|
||||
"xhtml" => "xhtml",
|
||||
"html123" => "html123"
|
||||
]
|
||||
],
|
||||
"file" => [
|
||||
"display" => "File",
|
||||
"option" => [
|
||||
"any" => "Any",
|
||||
"nomedia" => "Deny media",
|
||||
"media" => "Contains media",
|
||||
"audio" => "Contains audio",
|
||||
"video" => "Contains video",
|
||||
"archive" => "Contains archive",
|
||||
"document" => "Contains document"
|
||||
]
|
||||
],
|
||||
"javascript" => [
|
||||
"display" => "Javascript",
|
||||
"option" => [
|
||||
"any" => "Allow JS",
|
||||
"deny" => "Deny JS",
|
||||
"require" => "Require JS"
|
||||
]
|
||||
],
|
||||
"trackers" => [
|
||||
"display" => "Trackers",
|
||||
"option" => [
|
||||
"any" => "Allow trackers",
|
||||
"deny" => "Deny trackers",
|
||||
"require" => "Require trackers"
|
||||
]
|
||||
],
|
||||
"cookies" => [
|
||||
"display" => "Cookies",
|
||||
"option" => [
|
||||
"any" => "Allow cookies",
|
||||
"deny" => "Deny cookies",
|
||||
"require" => "Require cookies"
|
||||
]
|
||||
],
|
||||
"affiliate" => [
|
||||
"display" => "Affiliate links in body",
|
||||
"option" => [
|
||||
"any" => "Allow affiliate links",
|
||||
"deny" => "Deny affiliate links",
|
||||
"require" => "Require affiliate links"
|
||||
]
|
||||
$base = [
|
||||
"adtech" => [
|
||||
"display" => "Reduce adtech",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
];
|
||||
],
|
||||
"recent" => [
|
||||
"display" => "Recent results",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
],
|
||||
"intitle" => [
|
||||
"display" => "Search in title",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
]
|
||||
];
|
||||
}else{
|
||||
|
||||
$base = [];
|
||||
}
|
||||
|
||||
return array_merge(
|
||||
$base,
|
||||
[
|
||||
"format" => [
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
"html5" => "html5",
|
||||
"xhtml" => "xhtml",
|
||||
"html123" => "html123"
|
||||
]
|
||||
],
|
||||
"file" => [
|
||||
"display" => "Filetype",
|
||||
"option" => [
|
||||
"any" => "Any filetype",
|
||||
"nomedia" => "Deny media",
|
||||
"media" => "Contains media",
|
||||
"audio" => "Contains audio",
|
||||
"video" => "Contains video",
|
||||
"archive" => "Contains archive",
|
||||
"document" => "Contains document"
|
||||
]
|
||||
],
|
||||
"javascript" => [
|
||||
"display" => "Javascript",
|
||||
"option" => [
|
||||
"any" => "Allow JS",
|
||||
"deny" => "Deny JS",
|
||||
"require" => "Require JS"
|
||||
]
|
||||
],
|
||||
"trackers" => [
|
||||
"display" => "Trackers",
|
||||
"option" => [
|
||||
"any" => "Allow trackers",
|
||||
"deny" => "Deny trackers",
|
||||
"require" => "Require trackers"
|
||||
]
|
||||
],
|
||||
"cookies" => [
|
||||
"display" => "Cookies",
|
||||
"option" => [
|
||||
"any" => "Allow cookies",
|
||||
"deny" => "Deny cookies",
|
||||
"require" => "Require cookies"
|
||||
]
|
||||
],
|
||||
"affiliate" => [
|
||||
"display" => "Affiliate links in body",
|
||||
"option" => [
|
||||
"any" => "Allow affiliate links",
|
||||
"deny" => "Deny affiliate links",
|
||||
"require" => "Require affiliate links"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
@@ -132,7 +157,6 @@ class marginalia{
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$profile = $get["profile"];
|
||||
$format = $get["format"];
|
||||
$file = $get["file"];
|
||||
|
||||
@@ -180,38 +204,6 @@ class marginalia{
|
||||
|
||||
$search = implode(" ", $search);
|
||||
|
||||
$params = [
|
||||
"count" => 20
|
||||
];
|
||||
|
||||
if($profile == "modern"){
|
||||
|
||||
$params["index"] = 1;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(), // no nextpage
|
||||
"https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
if($json == "Slow down"){
|
||||
|
||||
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
/*
|
||||
$handle = fopen("scraper/marginalia.json", "r");
|
||||
$json = json_decode(fread($handle, filesize("scraper/marginalia.json")), true);
|
||||
fclose($handle);*/
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
@@ -228,19 +220,169 @@ class marginalia{
|
||||
"related" => []
|
||||
];
|
||||
|
||||
foreach($json["results"] as $result){
|
||||
if(config::MARGINALIA_API_KEY !== null){
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(), // no nextpage
|
||||
"https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
|
||||
[
|
||||
"count" => 20
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
if($json == "Slow down"){
|
||||
|
||||
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
foreach($json["results"] as $result){
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $result["title"],
|
||||
"description" => str_replace("\n", " ", $result["description"]),
|
||||
"url" => $result["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
// no more cloudflare!! Parse html by default
|
||||
$params = [
|
||||
"query" => $search
|
||||
];
|
||||
|
||||
foreach(["adtech", "recent", "intitle"] as $v){
|
||||
|
||||
if($get[$v] == "yes"){
|
||||
|
||||
switch($v){
|
||||
|
||||
case "adtech": $params["adtech"] = "reduce"; break;
|
||||
case "recent": $params["recent"] = "recent"; break;
|
||||
case "adtech": $params["searchTitle"] = "title"; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://search.marginalia.nu/search",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get HTML");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$sections =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"card search-result",
|
||||
"section"
|
||||
);
|
||||
|
||||
foreach($sections as $section){
|
||||
|
||||
$this->fuckhtml->load($section);
|
||||
|
||||
$title =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"title",
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"description",
|
||||
"p"
|
||||
);
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
);
|
||||
}else{
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$sublinks = [];
|
||||
$sublink_html =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName("additional-results");
|
||||
|
||||
if(count($sublink_html) !== 0){
|
||||
|
||||
$this->fuckhtml->load($sublink_html[0]);
|
||||
|
||||
$links =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("a");
|
||||
|
||||
foreach($links as $link){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$link
|
||||
),
|
||||
"date" => null,
|
||||
"description" => null,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$link["attributes"]["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $result["title"],
|
||||
"description" => str_replace("\n", " ", $result["description"]),
|
||||
"url" => $result["url"],
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$title
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$title["attributes"]["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
Reference in New Issue
Block a user