google web, videos and news, various other fixes
This commit is contained in:
parent
9fd993b47b
commit
2519666e1c
40
README.md
40
README.md
|
@ -23,6 +23,7 @@ https://4get.ca
|
|||
- DuckDuckGo
|
||||
- Brave
|
||||
- Yandex
|
||||
- Google
|
||||
- Mojeek
|
||||
- Marginalia
|
||||
- wiby
|
||||
|
@ -41,10 +42,12 @@ https://4get.ca
|
|||
- DuckDuckgo
|
||||
- Brave
|
||||
- Yandex
|
||||
- Google
|
||||
|
||||
4. News
|
||||
- DuckDuckGo
|
||||
- Brave
|
||||
- Google
|
||||
- Mojeek
|
||||
|
||||
5. Music
|
||||
|
@ -61,7 +64,7 @@ https://4get.ca
|
|||
- YouTube
|
||||
- SoundCloud
|
||||
|
||||
More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list.
|
||||
More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list.
|
||||
|
||||
# Installation
|
||||
This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK.
|
||||
|
@ -190,6 +193,41 @@ services:
|
|||
|
||||
Replace relevant values and start with `docker-compose up -d`
|
||||
|
||||
## Install on Caddy
|
||||
|
||||
1. Install dependencies:
|
||||
|
||||
`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
|
||||
|
||||
2. Clone this repository where you want to host this from:
|
||||
|
||||
`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
|
||||
|
||||
3. Set permission on the `icons` directory inside `4get`
|
||||
|
||||
`cd /var/www/4get/ && sudo chmod 777 -R icons/`
|
||||
|
||||
4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
|
||||
|
||||
```sh
|
||||
4get.konakona.moe {
|
||||
root * /var/www/4get
|
||||
file_server
|
||||
encode gzip
|
||||
php_fastcgi unix//var/run/php/php8.2-fpm.sock {
|
||||
index index.php
|
||||
}
|
||||
redir /{path}.php{query} 301
|
||||
try_files {path} {path}.php
|
||||
}
|
||||
```
|
||||
|
||||
Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
|
||||
|
||||
5. Restart Caddy
|
||||
|
||||
`sudo systemctl restart caddy`
|
||||
|
||||
# Encryption setup
|
||||
I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@ header("Access-Control-Allow-Origin: *");
|
|||
|
||||
include "data/config.php";
|
||||
|
||||
$bot_requests = apcu_fetch("captcha");
|
||||
$real_requests = apcu_fetch("real_requests");
|
||||
$bot_requests = apcu_fetch("captcha_gen");
|
||||
|
||||
echo json_encode(
|
||||
[
|
||||
|
|
4
api.txt
4
api.txt
|
@ -68,8 +68,8 @@
|
|||
|
||||
|
||||
+ Get the next page of results
|
||||
All API responses come with an array index named "nextpage". To get
|
||||
the next page of results, you must make another API call with &npt.
|
||||
All API responses come with an array index named "npt". To get the
|
||||
next page of results, you must make another API call with &npt.
|
||||
|
||||
Example ::
|
||||
|
||||
|
|
|
@ -39,7 +39,9 @@ if(
|
|||
}
|
||||
|
||||
try{
|
||||
echo json_encode(
|
||||
|
||||
echo
|
||||
json_encode(
|
||||
$scraper->web($get),
|
||||
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
|
||||
);
|
||||
|
|
|
@ -5,7 +5,7 @@ class config{
|
|||
// any parameters.
|
||||
|
||||
// 4get version. Please keep this updated
|
||||
const VERSION = 5;
|
||||
const VERSION = 6;
|
||||
|
||||
// Will be shown pretty much everywhere.
|
||||
const SERVER_NAME = "4get";
|
||||
|
@ -56,14 +56,22 @@ class config{
|
|||
const INSTANCES = [
|
||||
"https://4get.ca",
|
||||
"https://4get.zzls.xyz",
|
||||
"https://4getus.zzls.xyz",
|
||||
"https://4get.silly.computer",
|
||||
"https://4g.opnxng.com",
|
||||
"https://4get.konakona.moe"
|
||||
"https://4get.konakona.moe",
|
||||
"https://4get.lvkaszus.pl",
|
||||
"https://4g.ggtyler.dev",
|
||||
"https://4get.perennialte.ch",
|
||||
"https://4get.sihj.net",
|
||||
"https://4get.hbubli.cc",
|
||||
"https://4get.plunked.party",
|
||||
"https://4get.seitan-ayoub.lol"
|
||||
];
|
||||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0";
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0";
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
|
|
|
@ -7,6 +7,7 @@ class captcha{
|
|||
// check if we want captcha
|
||||
if(config::BOT_PROTECTION !== 1){
|
||||
|
||||
apcu_inc("real_requests");
|
||||
if($output === true){
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
|
@ -45,6 +46,8 @@ class captcha{
|
|||
}else{
|
||||
|
||||
// the cookie is OK! dont die() and give results
|
||||
apcu_inc("real_requests");
|
||||
|
||||
if($output === true){
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
|
@ -175,6 +178,8 @@ class captcha{
|
|||
|
||||
apcu_inc($key, 1, $stupid, 86400);
|
||||
|
||||
apcu_inc("real_requests");
|
||||
|
||||
setcookie(
|
||||
"pass",
|
||||
$key,
|
||||
|
|
|
@ -25,7 +25,7 @@ class frontend{
|
|||
|
||||
if($theme != "Dark"){
|
||||
|
||||
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">';
|
||||
$replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . rawurlencode($theme) . '.css?v' . config::VERSION . '">';
|
||||
}else{
|
||||
|
||||
$replacements["style"] = "";
|
||||
|
@ -84,6 +84,8 @@ class frontend{
|
|||
){
|
||||
|
||||
// bot detected !!
|
||||
apcu_inc("captcha_gen");
|
||||
|
||||
$this->drawerror(
|
||||
"Tshh, blocked!",
|
||||
'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
|
||||
|
@ -889,7 +891,7 @@ class frontend{
|
|||
"ddg" => "DuckDuckGo",
|
||||
"brave" => "Brave",
|
||||
"yandex" => "Yandex",
|
||||
//"google" => "Google",
|
||||
"google" => "Google",
|
||||
"mojeek" => "Mojeek",
|
||||
"marginalia" => "Marginalia",
|
||||
"wiby" => "wiby"
|
||||
|
@ -921,8 +923,8 @@ class frontend{
|
|||
//"fb" => "Facebook videos",
|
||||
"ddg" => "DuckDuckGo",
|
||||
"brave" => "Brave",
|
||||
"yandex" => "Yandex"
|
||||
//"google" => "Google"
|
||||
"yandex" => "Yandex",
|
||||
"google" => "Google"
|
||||
]
|
||||
];
|
||||
break;
|
||||
|
@ -933,7 +935,7 @@ class frontend{
|
|||
"option" => [
|
||||
"ddg" => "DuckDuckGo",
|
||||
"brave" => "Brave",
|
||||
//"google" => "Google",
|
||||
"google" => "Google",
|
||||
"mojeek" => "Mojeek"
|
||||
]
|
||||
];
|
||||
|
|
|
@ -15,7 +15,7 @@ class fuckhtml{
|
|||
|
||||
if(!isset($html["innerHTML"])){
|
||||
|
||||
throw new Exception("(load) Supplied array doesn't contain a innerHTML index");
|
||||
throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
|
||||
}
|
||||
$html = $html["innerHTML"];
|
||||
}
|
||||
|
@ -35,6 +35,11 @@ class fuckhtml{
|
|||
$this->strlen = strlen($this->html);
|
||||
}
|
||||
|
||||
public function getloadedhtml(){
|
||||
|
||||
return $this->html;
|
||||
}
|
||||
|
||||
public function getElementsByTagName(string $tagname){
|
||||
|
||||
$out = [];
|
||||
|
@ -46,7 +51,7 @@ class fuckhtml{
|
|||
|
||||
if($tagname == "*"){
|
||||
|
||||
$tagname = '[^\/<>\s]+';
|
||||
$tagname = '[A-Za-z0-9._-]+';
|
||||
}else{
|
||||
|
||||
$tagname = preg_quote(strtolower($tagname));
|
||||
|
@ -126,7 +131,7 @@ class fuckhtml{
|
|||
}
|
||||
);
|
||||
|
||||
// computer the indent level for each element
|
||||
// compute the indent level for each element
|
||||
$level = [];
|
||||
$count = count($out);
|
||||
|
||||
|
@ -314,7 +319,7 @@ class fuckhtml{
|
|||
|
||||
if(!isset($html["innerHTML"])){
|
||||
|
||||
throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index");
|
||||
throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
|
||||
}
|
||||
$html = $html["innerHTML"];
|
||||
}
|
||||
|
@ -441,4 +446,27 @@ class fuckhtml{
|
|||
|
||||
return json_decode($json_out, true);
|
||||
}
|
||||
|
||||
public function parseJsString($string){
|
||||
|
||||
return
|
||||
preg_replace_callback(
|
||||
'/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}/',
|
||||
function($match){
|
||||
|
||||
if($match[0][1] == "u"){
|
||||
|
||||
return json_decode('"' . $match[0] . '"');
|
||||
}else{
|
||||
|
||||
return mb_convert_encoding(
|
||||
stripcslashes($match[0]),
|
||||
"utf-8",
|
||||
"windows-1252"
|
||||
);
|
||||
}
|
||||
},
|
||||
$string
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,5 +24,5 @@
|
|||
|
||||
User-agent: *
|
||||
Disallow:
|
||||
host: 4get.ca
|
||||
sitemap: https://4get.ca/sitemap.xml
|
||||
Host: 4get.ca
|
||||
Sitemap: https://4get.ca/sitemap
|
||||
|
|
|
@ -857,7 +857,9 @@ class brave{
|
|||
// parse ratings
|
||||
if(
|
||||
isset($info["ratings"]) &&
|
||||
$info["ratings"] != "void 0"
|
||||
$info["ratings"] != "void 0" &&
|
||||
is_array($info["ratings"]) &&
|
||||
count($info["ratings"]) !== 0
|
||||
){
|
||||
|
||||
$description[] = [
|
||||
|
@ -1183,7 +1185,7 @@ class brave{
|
|||
"title" => $news["title"],
|
||||
"author" => null,
|
||||
"description" => $news["description"],
|
||||
"date" => !isset($news["age"]) || $news["age"] == "void 0" ? null : strtotime($news["age"]),
|
||||
"date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]),
|
||||
"thumb" => $thumb,
|
||||
"url" => $news["url"]
|
||||
];
|
||||
|
|
|
@ -545,8 +545,6 @@ class ddg{
|
|||
|
||||
public function web($get){
|
||||
|
||||
$proxy = null;
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$jsgrep, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
|
2665
scraper/google.php
2665
scraper/google.php
File diff suppressed because it is too large
Load Diff
|
@ -608,7 +608,7 @@ class mojeek{
|
|||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName("i", "p")[1]
|
||||
->getElementsByClassName("i", "p")[0]
|
||||
)
|
||||
);
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ class sc{
|
|||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?");
|
||||
}
|
||||
|
||||
$out = [
|
||||
|
|
31
settings.php
31
settings.php
|
@ -117,10 +117,10 @@ $settings = [
|
|||
"value" => "yandex",
|
||||
"text" => "Yandex"
|
||||
],
|
||||
/*[
|
||||
[
|
||||
"value" => "google",
|
||||
"text" => "Google"
|
||||
],*/
|
||||
],
|
||||
[
|
||||
"value" => "mojeek",
|
||||
"text" => "Mojeek"
|
||||
|
@ -192,11 +192,11 @@ $settings = [
|
|||
[
|
||||
"value" => "yandex",
|
||||
"text" => "Yandex"
|
||||
]/*,
|
||||
],
|
||||
[
|
||||
"value" => "google",
|
||||
"text" => "Google"
|
||||
]*/
|
||||
]
|
||||
]
|
||||
],
|
||||
[
|
||||
|
@ -211,10 +211,10 @@ $settings = [
|
|||
"value" => "brave",
|
||||
"text" => "Brave"
|
||||
],
|
||||
/*[
|
||||
[
|
||||
"value" => "google",
|
||||
"text" => "Google"
|
||||
],*/
|
||||
],
|
||||
[
|
||||
"value" => "mojeek",
|
||||
"text" => "Mojeek"
|
||||
|
@ -434,20 +434,33 @@ $left .=
|
|||
'</div>' .
|
||||
'<div class="settings-submit">' .
|
||||
'<input type="submit" value="Update settings!">' .
|
||||
'<a href="../">< Return to front page</a>' .
|
||||
'<a href="../">< Go back</a>' .
|
||||
'</div>' .
|
||||
'</form>';
|
||||
|
||||
if(count($_GET) === 0){
|
||||
|
||||
$code = [];
|
||||
foreach($_COOKIE as $key => $value){
|
||||
|
||||
$code[] = rawurlencode($key) . "=" . rawurlencode($value);
|
||||
}
|
||||
|
||||
$code = implode("&", $code);
|
||||
|
||||
if($code != ""){
|
||||
|
||||
$code = "?" . $code;
|
||||
}
|
||||
|
||||
echo
|
||||
$frontend->load(
|
||||
"search.html",
|
||||
[
|
||||
"class" => "",
|
||||
"right-left" =>
|
||||
'<div class="infobox"><h2>Preference link</h2>Follow this link to auto-apply all cookies. Useful if your browser clears out cookies after a browsing session. Following this link will redirect you to the front page, unless no settings are set.<br><br>' .
|
||||
'<a href="settings' . rtrim("?" . str_replace("; ", "&", $code), "?") . '">Bookmark me!</a>' .
|
||||
'<div class="infobox"><h2>Preference link</h2>Following this link will re-apply all cookies configured here and will redirect you to the front page. Useful if your browser clears out cookies after a browsing session.<br><br>' .
|
||||
'<a href="settings' . $code . '">Bookmark me!</a>' .
|
||||
'</div>',
|
||||
"right-right" => "",
|
||||
"left" => $left
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
|
@ -160,6 +160,16 @@ function number_format(int){
|
|||
return new Intl.NumberFormat().format(int);
|
||||
}
|
||||
|
||||
window.fetch = (function(fetch) {
|
||||
return function(fn, t){
|
||||
const begin = Date.now();
|
||||
return fetch.apply(this, arguments).then(function(response) {
|
||||
response.ping = Date.now() - begin;
|
||||
return response;
|
||||
});
|
||||
};
|
||||
})(window.fetch);
|
||||
|
||||
// parse initial server list
|
||||
fetch_server(window.location.origin);
|
||||
|
||||
|
@ -187,25 +197,24 @@ async function fetch_server(server){
|
|||
list.push(server);
|
||||
|
||||
var data = null;
|
||||
var ping = new Date().getTime();
|
||||
|
||||
try{
|
||||
|
||||
data = await fetch(
|
||||
var payload = await fetch(
|
||||
server + "/ami4get"
|
||||
);
|
||||
|
||||
if(data.status !== 200){
|
||||
if(payload.status !== 200){
|
||||
|
||||
// endpoint is not available
|
||||
errors++;
|
||||
div_failedreqs.textContent = number_format(errors);
|
||||
console.warn(server + ": Invalid HTTP code " + data.status);
|
||||
console.warn(server + ": Invalid HTTP code " + payload.status);
|
||||
return;
|
||||
}
|
||||
|
||||
data = await data.json();
|
||||
data.server.ping = new Date().getTime() - ping;
|
||||
data = await payload.json();
|
||||
data.server.ping = payload.ping;
|
||||
|
||||
}catch(error){
|
||||
|
||||
|
|
|
@ -499,6 +499,7 @@ h3,h4,h5,h6{
|
|||
text-align:center;
|
||||
display:block;
|
||||
text-align:left;
|
||||
white-space:nowrap;
|
||||
}
|
||||
|
||||
.favicon-dropdown img{
|
||||
|
@ -1247,6 +1248,11 @@ table tr a:last-child{
|
|||
padding-left:20px;
|
||||
}
|
||||
|
||||
.instances .go-back{
|
||||
margin-top:17px;
|
||||
display:inline-block;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Responsive image
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
</tbody>
|
||||
</table>
|
||||
</noscript>
|
||||
<a href="../" class="go-back">< Go back</a>
|
||||
<div id="popup-bg"></div>
|
||||
<div class="popup-wrapper">
|
||||
<div class="popup"></div>
|
||||
|
|
14
web.php
14
web.php
|
@ -146,9 +146,17 @@ if(count($results["image"]) !== 0){
|
|||
|
||||
$right["image"] .=
|
||||
'<a class="image" href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow" title="' . htmlspecialchars($image["title"]) . '" data-json="' . htmlspecialchars(json_encode($image["source"])) . '" tabindex="-1">' .
|
||||
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">' .
|
||||
'<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>' .
|
||||
'</a>';
|
||||
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">';
|
||||
|
||||
if(
|
||||
$image["source"][0]["width"] !== null &&
|
||||
$image["source"][0]["height"] !== null
|
||||
){
|
||||
|
||||
$right["image"] .= '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>';
|
||||
}
|
||||
|
||||
$right["image"] .= '</a>';
|
||||
}
|
||||
|
||||
$right["image"] .=
|
||||
|
|
Loading…
Reference in New Issue