forked from lolcat/4get
1
0
Fork 0

google web, videos and news, various other fixes

This commit is contained in:
lolcat 2023-11-27 01:01:56 -05:00
parent 9fd993b47b
commit 2519666e1c
22 changed files with 2913 additions and 1118 deletions

View File

@ -23,6 +23,7 @@ https://4get.ca
- DuckDuckGo - DuckDuckGo
- Brave - Brave
- Yandex - Yandex
- Google
- Mojeek - Mojeek
- Marginalia - Marginalia
- wiby - wiby
@ -41,10 +42,12 @@ https://4get.ca
- DuckDuckgo - DuckDuckgo
- Brave - Brave
- Yandex - Yandex
- Google
4. News 4. News
- DuckDuckGo - DuckDuckGo
- Brave - Brave
- Google
- Mojeek - Mojeek
5. Music 5. Music
@ -61,7 +64,7 @@ https://4get.ca
- YouTube - YouTube
- SoundCloud - SoundCloud
More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list. More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list.
# Installation # Installation
This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK. This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK.
@ -190,6 +193,41 @@ services:
Replace relevant values and start with `docker-compose up -d` Replace relevant values and start with `docker-compose up -d`
## Install on Caddy
1. Install dependencies:
`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
2. Clone this repository where you want to host this from:
`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
3. Set permission on the `icons` directory inside `4get`
`cd /var/www/4get/ && sudo chmod 777 -R icons/`
4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
```sh
4get.konakona.moe {
root * /var/www/4get
file_server
encode gzip
php_fastcgi unix//var/run/php/php8.2-fpm.sock {
index index.php
}
redir /{path}.php{query} 301
try_files {path} {path}.php
}
```
Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
5. Restart Caddy
`sudo systemctl restart caddy`
# Encryption setup # Encryption setup
I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour. I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.

View File

@ -5,8 +5,8 @@ header("Access-Control-Allow-Origin: *");
include "data/config.php"; include "data/config.php";
$bot_requests = apcu_fetch("captcha");
$real_requests = apcu_fetch("real_requests"); $real_requests = apcu_fetch("real_requests");
$bot_requests = apcu_fetch("captcha_gen");
echo json_encode( echo json_encode(
[ [

View File

@ -68,8 +68,8 @@
+ Get the next page of results + Get the next page of results
All API responses come with an array index named "nextpage". To get All API responses come with an array index named "npt". To get the
the next page of results, you must make another API call with &npt. next page of results, you must make another API call with &npt.
Example :: Example ::

View File

@ -39,10 +39,12 @@ if(
} }
try{ try{
echo json_encode(
$scraper->web($get), echo
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES json_encode(
); $scraper->web($get),
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
}catch(Exception $e){ }catch(Exception $e){

View File

@ -5,7 +5,7 @@ class config{
// any parameters. // any parameters.
// 4get version. Please keep this updated // 4get version. Please keep this updated
const VERSION = 5; const VERSION = 6;
// Will be shown pretty much everywhere. // Will be shown pretty much everywhere.
const SERVER_NAME = "4get"; const SERVER_NAME = "4get";
@ -56,14 +56,22 @@ class config{
const INSTANCES = [ const INSTANCES = [
"https://4get.ca", "https://4get.ca",
"https://4get.zzls.xyz", "https://4get.zzls.xyz",
"https://4getus.zzls.xyz",
"https://4get.silly.computer", "https://4get.silly.computer",
"https://4g.opnxng.com", "https://4g.opnxng.com",
"https://4get.konakona.moe" "https://4get.konakona.moe",
"https://4get.lvkaszus.pl",
"https://4g.ggtyler.dev",
"https://4get.perennialte.ch",
"https://4get.sihj.net",
"https://4get.hbubli.cc",
"https://4get.plunked.party",
"https://4get.seitan-ayoub.lol"
]; ];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP

View File

@ -7,6 +7,7 @@ class captcha{
// check if we want captcha // check if we want captcha
if(config::BOT_PROTECTION !== 1){ if(config::BOT_PROTECTION !== 1){
apcu_inc("real_requests");
if($output === true){ if($output === true){
$frontend->loadheader( $frontend->loadheader(
$get, $get,
@ -45,6 +46,8 @@ class captcha{
}else{ }else{
// the cookie is OK! dont die() and give results // the cookie is OK! dont die() and give results
apcu_inc("real_requests");
if($output === true){ if($output === true){
$frontend->loadheader( $frontend->loadheader(
$get, $get,
@ -175,6 +178,8 @@ class captcha{
apcu_inc($key, 1, $stupid, 86400); apcu_inc($key, 1, $stupid, 86400);
apcu_inc("real_requests");
setcookie( setcookie(
"pass", "pass",
$key, $key,

View File

@ -25,7 +25,7 @@ class frontend{
if($theme != "Dark"){ if($theme != "Dark"){
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">'; $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . rawurlencode($theme) . '.css?v' . config::VERSION . '">';
}else{ }else{
$replacements["style"] = ""; $replacements["style"] = "";
@ -84,6 +84,8 @@ class frontend{
){ ){
// bot detected !! // bot detected !!
apcu_inc("captcha_gen");
$this->drawerror( $this->drawerror(
"Tshh, blocked!", "Tshh, blocked!",
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.', 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
@ -889,7 +891,7 @@ class frontend{
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex", "yandex" => "Yandex",
//"google" => "Google", "google" => "Google",
"mojeek" => "Mojeek", "mojeek" => "Mojeek",
"marginalia" => "Marginalia", "marginalia" => "Marginalia",
"wiby" => "wiby" "wiby" => "wiby"
@ -921,8 +923,8 @@ class frontend{
//"fb" => "Facebook videos", //"fb" => "Facebook videos",
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex" "yandex" => "Yandex",
//"google" => "Google" "google" => "Google"
] ]
]; ];
break; break;
@ -933,7 +935,7 @@ class frontend{
"option" => [ "option" => [
"ddg" => "DuckDuckGo", "ddg" => "DuckDuckGo",
"brave" => "Brave", "brave" => "Brave",
//"google" => "Google", "google" => "Google",
"mojeek" => "Mojeek" "mojeek" => "Mojeek"
] ]
]; ];

View File

@ -15,7 +15,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){ if(!isset($html["innerHTML"])){
throw new Exception("(load) Supplied array doesn't contain a innerHTML index"); throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
} }
$html = $html["innerHTML"]; $html = $html["innerHTML"];
} }
@ -35,6 +35,11 @@ class fuckhtml{
$this->strlen = strlen($this->html); $this->strlen = strlen($this->html);
} }
public function getloadedhtml(){
return $this->html;
}
public function getElementsByTagName(string $tagname){ public function getElementsByTagName(string $tagname){
$out = []; $out = [];
@ -46,7 +51,7 @@ class fuckhtml{
if($tagname == "*"){ if($tagname == "*"){
$tagname = '[^\/<>\s]+'; $tagname = '[A-Za-z0-9._-]+';
}else{ }else{
$tagname = preg_quote(strtolower($tagname)); $tagname = preg_quote(strtolower($tagname));
@ -126,7 +131,7 @@ class fuckhtml{
} }
); );
// computer the indent level for each element // compute the indent level for each element
$level = []; $level = [];
$count = count($out); $count = count($out);
@ -314,7 +319,7 @@ class fuckhtml{
if(!isset($html["innerHTML"])){ if(!isset($html["innerHTML"])){
throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index"); throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
} }
$html = $html["innerHTML"]; $html = $html["innerHTML"];
} }
@ -441,4 +446,27 @@ class fuckhtml{
return json_decode($json_out, true); return json_decode($json_out, true);
} }
public function parseJsString($string){
return
preg_replace_callback(
'/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}/',
function($match){
if($match[0][1] == "u"){
return json_decode('"' . $match[0] . '"');
}else{
return mb_convert_encoding(
stripcslashes($match[0]),
"utf-8",
"windows-1252"
);
}
},
$string
);
}
} }

View File

@ -24,5 +24,5 @@
User-agent: * User-agent: *
Disallow: Disallow:
host: 4get.ca Host: 4get.ca
sitemap: https://4get.ca/sitemap.xml Sitemap: https://4get.ca/sitemap

View File

@ -857,7 +857,9 @@ class brave{
// parse ratings // parse ratings
if( if(
isset($info["ratings"]) && isset($info["ratings"]) &&
$info["ratings"] != "void 0" $info["ratings"] != "void 0" &&
is_array($info["ratings"]) &&
count($info["ratings"]) !== 0
){ ){
$description[] = [ $description[] = [
@ -1183,7 +1185,7 @@ class brave{
"title" => $news["title"], "title" => $news["title"],
"author" => null, "author" => null,
"description" => $news["description"], "description" => $news["description"],
"date" => !isset($news["age"]) || $news["age"] == "void 0" ? null : strtotime($news["age"]), "date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]),
"thumb" => $thumb, "thumb" => $thumb,
"url" => $news["url"] "url" => $news["url"]
]; ];

View File

@ -545,8 +545,6 @@ class ddg{
public function web($get){ public function web($get){
$proxy = null;
if($get["npt"]){ if($get["npt"]){
[$jsgrep, $proxy] = $this->backend->get($get["npt"], "web"); [$jsgrep, $proxy] = $this->backend->get($get["npt"], "web");

File diff suppressed because it is too large Load Diff

View File

@ -608,7 +608,7 @@ class mojeek{
$this->fuckhtml $this->fuckhtml
->getTextContent( ->getTextContent(
$this->fuckhtml $this->fuckhtml
->getElementsByClassName("i", "p")[1] ->getElementsByClassName("i", "p")[0]
) )
); );

View File

@ -229,7 +229,7 @@ class sc{
if($json === null){ if($json === null){
throw new Exception("Failed to decode JSON"); throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?");
} }
$out = [ $out = [

View File

@ -117,10 +117,10 @@ $settings = [
"value" => "yandex", "value" => "yandex",
"text" => "Yandex" "text" => "Yandex"
], ],
/*[ [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
],*/ ],
[ [
"value" => "mojeek", "value" => "mojeek",
"text" => "Mojeek" "text" => "Mojeek"
@ -192,11 +192,11 @@ $settings = [
[ [
"value" => "yandex", "value" => "yandex",
"text" => "Yandex" "text" => "Yandex"
]/*, ],
[ [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
]*/ ]
] ]
], ],
[ [
@ -211,10 +211,10 @@ $settings = [
"value" => "brave", "value" => "brave",
"text" => "Brave" "text" => "Brave"
], ],
/*[ [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
],*/ ],
[ [
"value" => "mojeek", "value" => "mojeek",
"text" => "Mojeek" "text" => "Mojeek"
@ -434,20 +434,33 @@ $left .=
'</div>' . '</div>' .
'<div class="settings-submit">' . '<div class="settings-submit">' .
'<input type="submit" value="Update settings!">' . '<input type="submit" value="Update settings!">' .
'<a href="../">&lt; Return to front page</a>' . '<a href="../">&lt; Go back</a>' .
'</div>' . '</div>' .
'</form>'; '</form>';
if(count($_GET) === 0){ if(count($_GET) === 0){
$code = [];
foreach($_COOKIE as $key => $value){
$code[] = rawurlencode($key) . "=" . rawurlencode($value);
}
$code = implode("&", $code);
if($code != ""){
$code = "?" . $code;
}
echo echo
$frontend->load( $frontend->load(
"search.html", "search.html",
[ [
"class" => "", "class" => "",
"right-left" => "right-left" =>
'<div class="infobox"><h2>Preference link</h2>Follow this link to auto-apply all cookies. Useful if your browser clears out cookies after a browsing session. Following this link will redirect you to the front page, unless no settings are set.<br><br>' . '<div class="infobox"><h2>Preference link</h2>Following this link will re-apply all cookies configured here and will redirect you to the front page. Useful if your browser clears out cookies after a browsing session.<br><br>' .
'<a href="settings' . rtrim("?" . str_replace("; ", "&", $code), "?") . '">Bookmark me!</a>' . '<a href="settings' . $code . '">Bookmark me!</a>' .
'</div>', '</div>',
"right-right" => "", "right-right" => "",
"left" => $left "left" => $left

BIN
static/icon/call.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
static/icon/directions.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@ -160,6 +160,16 @@ function number_format(int){
return new Intl.NumberFormat().format(int); return new Intl.NumberFormat().format(int);
} }
window.fetch = (function(fetch) {
return function(fn, t){
const begin = Date.now();
return fetch.apply(this, arguments).then(function(response) {
response.ping = Date.now() - begin;
return response;
});
};
})(window.fetch);
// parse initial server list // parse initial server list
fetch_server(window.location.origin); fetch_server(window.location.origin);
@ -187,25 +197,24 @@ async function fetch_server(server){
list.push(server); list.push(server);
var data = null; var data = null;
var ping = new Date().getTime();
try{ try{
data = await fetch( var payload = await fetch(
server + "/ami4get" server + "/ami4get"
); );
if(data.status !== 200){ if(payload.status !== 200){
// endpoint is not available // endpoint is not available
errors++; errors++;
div_failedreqs.textContent = number_format(errors); div_failedreqs.textContent = number_format(errors);
console.warn(server + ": Invalid HTTP code " + data.status); console.warn(server + ": Invalid HTTP code " + payload.status);
return; return;
} }
data = await data.json(); data = await payload.json();
data.server.ping = new Date().getTime() - ping; data.server.ping = payload.ping;
}catch(error){ }catch(error){

View File

@ -499,6 +499,7 @@ h3,h4,h5,h6{
text-align:center; text-align:center;
display:block; display:block;
text-align:left; text-align:left;
white-space:nowrap;
} }
.favicon-dropdown img{ .favicon-dropdown img{
@ -1247,6 +1248,11 @@ table tr a:last-child{
padding-left:20px; padding-left:20px;
} }
.instances .go-back{
margin-top:17px;
display:inline-block;
}
/* /*
Responsive image Responsive image

View File

@ -27,6 +27,7 @@
</tbody> </tbody>
</table> </table>
</noscript> </noscript>
<a href="../" class="go-back">&lt; Go back</a>
<div id="popup-bg"></div> <div id="popup-bg"></div>
<div class="popup-wrapper"> <div class="popup-wrapper">
<div class="popup"></div> <div class="popup"></div>

14
web.php
View File

@ -146,9 +146,17 @@ if(count($results["image"]) !== 0){
$right["image"] .= $right["image"] .=
'<a class="image" href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow" title="' . htmlspecialchars($image["title"]) . '" data-json="' . htmlspecialchars(json_encode($image["source"])) . '" tabindex="-1">' . '<a class="image" href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow" title="' . htmlspecialchars($image["title"]) . '" data-json="' . htmlspecialchars(json_encode($image["source"])) . '" tabindex="-1">' .
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">' . '<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">';
'<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>' .
'</a>'; if(
$image["source"][0]["width"] !== null &&
$image["source"][0]["height"] !== null
){
$right["image"] .= '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>';
}
$right["image"] .= '</a>';
} }
$right["image"] .= $right["image"] .=