Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

13 changed files with 2276 additions and 2761 deletions

21
api.txt
View File

@ -1,17 +1,10 @@
44
4444444 44
44444444 44444 444
44444444 444444 444444444
44444 44444444 444444444
444444444 4444444
4444444444 444444
4444444444444
444444444444444444
444444444444444
44444444
4444
44
__ __ __
/ // / ____ ____ / /_
/ // /_/ __ `/ _ \/ __/
/__ __/ /_/ / __/ /_
/_/ \__, /\___/\__/
/____/
+ Welcome to the 4get API documentation +
+ Terms of use

View File

@ -119,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0";
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
@ -129,7 +129,6 @@ class config{
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
@ -158,9 +157,6 @@ class config{
// Scraper-specific parameters
//
// GOOGLE CSE
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.

View File

@ -75,7 +75,6 @@ class backend{
break;
case "socks5_hostname":
case "socks5h":
case "socks5a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);

View File

@ -838,10 +838,10 @@ class frontend{
}
$payload .=
'<a href="https://webcache.googleusercontent.com/search?q=cache:' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://google.com" alt="go">Google cache</a>' .
'<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' .
'<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' .
'<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' .
'<a href="https://arquivo.pt/wayback/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://arquivo.pt" alt="ar">Arquivo.pt</a>' .
'<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' .
'<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' .
'</div>';
@ -939,7 +939,6 @@ class frontend{
"brave" => "Brave",
"yandex" => "Yandex",
"google" => "Google",
"google_cse" => "Google CSE",
"startpage" => "Startpage",
"qwant" => "Qwant",
"ghostery" => "Ghostery",
@ -964,7 +963,6 @@ class frontend{
"yandex" => "Yandex",
"brave" => "Brave",
"google" => "Google",
"google_cse" => "Google CSE",
"startpage" => "Startpage",
"qwant" => "Qwant",
"yep" => "Yep",

View File

@ -381,8 +381,6 @@ class fuckhtml{
$json_out = null;
$last_char = null;
$keyword_check = null;
for($i=0; $i<strlen($json); $i++){
switch($json[$i]){
@ -398,7 +396,6 @@ class fuckhtml{
$bracket = false;
$is_close_bracket = true;
}else{
if($bracket === false){
@ -432,31 +429,6 @@ class fuckhtml{
$is_close_bracket === false
){
// do keyword check
$keyword_check .= $json[$i];
if(in_array($json[$i], [":", "{"])){
$keyword_check = substr($keyword_check, 0, -1);
if(
preg_match(
'/function|array|return/i',
$keyword_check
)
){
$json_out =
preg_replace(
'/[{"]*' . preg_quote($keyword_check, "/") . '$/',
"",
$json_out
);
}
$keyword_check = null;
}
// here we know we're not iterating over a quoted string
switch($json[$i]){
@ -526,85 +498,4 @@ class fuckhtml{
$string
);
}
public function extract_json($json){
$len = strlen($json);
$array_level = 0;
$object_level = 0;
$in_quote = null;
$start = null;
for($i=0; $i<$len; $i++){
switch($json[$i]){
case "[":
if($in_quote === null){
$array_level++;
if($start === null){
$start = $i;
}
}
break;
case "]":
if($in_quote === null){
$array_level--;
}
break;
case "{":
if($in_quote === null){
$object_level++;
if($start === null){
$start = $i;
}
}
break;
case "}":
if($in_quote === null){
$object_level--;
}
break;
case "\"":
case "'":
if(
$i !== 0 &&
$json[$i - 1] !== "\\"
){
// found a non-escaped quote
if($in_quote === null){
// open quote
$in_quote = $json[$i];
}elseif($in_quote === $json[$i]){
// close quote
$in_quote = null;
}
}
break;
}
if(
$start !== null &&
$array_level === 0 &&
$object_level === 0
){
return substr($json, $start, $i - $start + 1);
break;
}
}
}
}

View File

@ -293,8 +293,8 @@ class brave{
/*
$handle = fopen("scraper/brave.html", "r");
$html = fread($handle, filesize("scraper/brave.html"));
fclose($handle);*/
fclose($handle);
*/
try{
$html =
@ -410,20 +410,10 @@ class brave{
throw new Exception("Could not grep JavaScript object");
}
$data =
rtrim(
preg_replace(
'/\(Array\(0\)\)\).*$/',
"",
$grep[1]
),
" ]"
) . "]";
$data =
$this->fuckhtml
->parseJsObject(
$data
$grep[1]
);
unset($grep);
@ -673,10 +663,7 @@ class brave{
$table["Address"] = $result["location"]["postal_address"]["displayAddress"];
}
if(
isset($result["location"]["rating"]) &&
$result["location"]["rating"] != "void 0"
){
if(isset($result["location"]["rating"])){
$table["Rating"] =
$result["location"]["rating"]["ratingValue"] . "/" .
@ -684,19 +671,13 @@ class brave{
number_format($result["location"]["rating"]["reviewCount"]) . " votes)";
}
if(
isset($result["location"]["contact"]["telephone"]) &&
$result["location"]["contact"]["telephone"] != "void 0"
){
if(isset($result["location"]["contact"]["telephone"])){
$table["Phone number"] =
$result["location"]["contact"]["telephone"];
}
if(
isset($result["location"]["price_range"]) &&
$result["location"]["price_range"] != "void 0"
){
if(isset($result["location"]["price_range"])){
$table["Price"] =
$result["location"]["price_range"];

File diff suppressed because it is too large Load Diff

View File

@ -136,7 +136,7 @@ class ftm{
"source" => [
[
"url" =>
"https://s3.thehackerblog.com/findthatmeme/" .
"https://findthatmeme.us-southeast-1.linodeobjects.com/" .
$thumb,
"width" => null,
"height" => null

File diff suppressed because it is too large Load Diff

View File

@ -220,7 +220,6 @@ class marginalia{
"related" => []
];
// API scraper
if(config::MARGINALIA_API_KEY !== null){
try{
@ -264,57 +263,34 @@ class marginalia{
return $out;
}
// HTML parser
$proxy = $this->backend->get_ip();
// no more cloudflare!! Parse html by default
$params = [
"query" => $search
];
if($get["npt"]){
foreach(["adtech", "recent", "intitle"] as $v){
[$params, $proxy] =
$this->backend->get(
$get["npt"],
"web"
);
try{
$html =
$this->get(
$proxy,
"https://search.marginalia.nu/search?" . $params
);
}catch(Exception $error){
if($get[$v] == "yes"){
throw new Exception("Failed to get HTML");
}
}else{
$params = [
"query" => $search
];
foreach(["adtech", "recent", "intitle"] as $v){
if($get[$v] == "yes"){
switch($v){
switch($v){
case "adtech": $params["adtech"] = "reduce"; break;
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break;
}
case "adtech": $params["adtech"] = "reduce"; break;
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break;
}
}
}
try{
$html =
$this->get(
$this->backend->get_ip(),
"https://search.marginalia.nu/search",
$params
);
}catch(Exception $error){
try{
$html =
$this->get(
$proxy,
"https://search.marginalia.nu/search",
$params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML");
}
throw new Exception("Failed to get HTML");
}
$this->fuckhtml->load($html);
@ -411,65 +387,6 @@ class marginalia{
];
}
// get next page
$this->fuckhtml->load($html);
$pagination =
$this->fuckhtml
->getElementsByAttributeValue(
"aria-label",
"pagination",
"nav"
);
if(count($pagination) === 0){
// no pagination
return $out;
}
$this->fuckhtml->load($pagination[0]);
$pages =
$this->fuckhtml
->getElementsByClassName(
"page-link",
"a"
);
$found_current_page = false;
foreach($pages as $page){
if(
stripos(
$page["attributes"]["class"],
"active"
) !== false
){
$found_current_page = true;
continue;
}
if($found_current_page){
// we found current page index, and we iterated over
// the next page <a>
$out["npt"] =
$this->backend->store(
parse_url(
$page["attributes"]["href"],
PHP_URL_QUERY
),
"web",
$proxy
);
break;
}
}
return $out;
}
}

View File

@ -701,11 +701,9 @@ class mojeek{
if(count($thumb) === 2){
$answer["thumb"] =
urldecode(
$this->fuckhtml
->getTextContent(
$thumb[1]
)
$this->fuckhtml
->getTextContent(
$thumb[1]
);
}
}

View File

@ -133,10 +133,6 @@ $settings = [
"value" => "google",
"text" => "Google"
],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[
"value" => "startpage",
"text" => "Startpage"
@ -207,10 +203,6 @@ $settings = [
"value" => "google",
"text" => "Google"
],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[
"value" => "startpage",
"text" => "Startpage"

View File

@ -16,7 +16,6 @@
body{
padding:15px 4% 40px;
margin:unset;
}
h1,h2,h3,h4,h5,h6{