forked from lolcat/4get
1
0
Fork 0

Compare commits

...

24 Commits

Author SHA1 Message Date
lolcat 9ca93f34c6 ddg hotfix 2024-12-17 21:01:36 -05:00
lolcat 0a43b9c849 added arquivo.pt 2024-12-17 10:11:53 -05:00
lolcat b636fec319 fucking git is so shit 2024-12-17 00:35:15 -05:00
lolcat 774f7113df duckduckgo scraper rewrite 2024-12-17 00:31:15 -05:00
lolcat 0b3bbe0f15 gore's shitty theme fix 2024-12-02 15:19:10 -05:00
lolcat 5f0b0a7b83 findthatmeme fix 2024-12-01 15:59:03 -05:00
lolcat 920b9d5b3f brave crash fix 2024-11-19 09:22:58 -05:00
lolcat 9cd369ac08 http2 on ddg 2024-11-07 23:37:43 -05:00
lolcat e83865be49 added pagination 2024-11-07 00:12:06 -05:00
lolcat 68dd7f29f6 mojeek thumbnail fix 2024-11-06 23:43:54 -05:00
lolcat aaa30c79f5 fix google cse image crash + added word autocorrect 2024-10-31 20:31:23 -04:00
lolcat 070f9d442b brave fix 2024-10-29 21:29:17 -04:00
lolcat 9c18753ec3 yes of course i need to fucking forget the .php again AAAAAAAAAAA 2024-10-24 21:46:54 -04:00
lolcat d8a729796e fix crash on google cse, added settings 2024-10-22 20:15:00 -04:00
lolcat 2bbe5a29a9 Merge branch 'master' of https://git.lolcat.ca/lolcat/4get 2024-10-22 11:34:06 -04:00
lolcat 9ac195ac3b added google CSE 2024-10-22 11:33:14 -04:00
lolcat d427a48ed4 Merge pull request 'nginx documentation but better' (#41) from bread/4get:master into master
Reviewed-on: lolcat/4get#41
2024-10-21 14:17:07 +00:00
lolcat 12d5b4ade8 Merge branch 'master' into master 2024-10-21 14:16:54 +00:00
Pano c422abbdc6 add css via copy and paste (slightly edited to not require a lot from my shithole) 2024-10-21 14:15:34 +00:00
Pano 85246cc7ec 194 lines of mark(down) 2024-10-19 01:12:41 +00:00
Pano d709d12111 fix html making md look bad 2024-10-10 04:45:06 +00:00
Pano 19f82a8536 even more little things I missed (polish!) 2024-10-08 19:22:38 +00:00
Pano 155a38d454 things I missed 2024-10-08 19:19:01 +00:00
Pano 6926e374af Upgrade nginx configuration to a better state 2024-10-07 23:48:24 +00:00
15 changed files with 2964 additions and 2348 deletions

21
api.txt
View File

@ -1,10 +1,17 @@
__ __ __ 44
/ // / ____ ____ / /_ 4444444 44
/ // /_/ __ `/ _ \/ __/ 44444444 44444 444
/__ __/ /_/ / __/ /_ 44444444 444444 444444444
/_/ \__, /\___/\__/ 44444 44444444 444444444
/____/ 444444444 4444444
4444444444 444444
4444444444444
444444444444444444
444444444444444
44444444
4444
44
+ Welcome to the 4get API documentation + + Welcome to the 4get API documentation +
+ Terms of use + Terms of use

View File

@ -119,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things. // Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0"; const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0";
// Proxy pool assignments for each scraper // Proxy pool assignments for each scraper
// false = Use server's raw IP // false = Use server's raw IP
@ -129,6 +129,7 @@ class config{
const PROXY_BRAVE = false; const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false; const PROXY_GOOGLE = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false; const PROXY_STARTPAGE = false;
const PROXY_QWANT = false; const PROXY_QWANT = false;
const PROXY_GHOSTERY = false; const PROXY_GHOSTERY = false;
@ -157,6 +158,9 @@ class config{
// Scraper-specific parameters // Scraper-specific parameters
// //
// GOOGLE CSE
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA // MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to // Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters. // use the API (Eg: "public"). API has less filters.

View File

@ -1,103 +1,194 @@
# Install on NGINX <h1 align=center>Installation of 4get in NGINX</h1>
>I do NOT recommend following this guide, only follow this if you *really* need to use nginx. I recommend you use the apache2 steps instead. <div align=right>
Login as root. > NOTE: As the previous version stated, it is better to follow the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">Apache2 guide</a> instead of the Nginx one.
Create a file in `/etc/nginx/sites-avaliable/` called `4get.conf` or any name you want and put this into the file: > NOTE: This is going to guess that you're using either a <abbr title="(Arch Linux, Artix Linux, Endeavouros, etc...) ">Arch-based system</abbr> or a <abbr title="(Debian, Ubuntu, Devuan, etc...)">Debian-based system</abbr>, although you can still follow it with minor issues.
``` </div>
server {
# DO YOU REALLY NEED TO LOG SEARCHES?
access_log /dev/null;
error_log /dev/null;
# Change this if you have 4get in other folder.
root /var/www/4get;
# Change yourdomain by your domain lol
server_name www.yourdomain.com yourdomain.com;
location @php { 1. Login as root.
try_files $uri.php $uri/index.php =404; 2. Upgrade your system:
# Change the unix socket address if it's different for you. * On Arch-based, run `pacman -Syu`.
fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock; * On Debian-based, run `apt update`, then `apt upgrade`.
fastcgi_index index.php; 3. Install the following dependencies:
# Change this to `fastcgi_params` if you use a debian based distro. * `git`: So you can clone <a href="https://git.lolcat.ca/lolcat/4get">this</a> repository.
include fastcgi.conf; * `nginx`: So you can run Nginx.
fastcgi_intercept_errors on; * `php-fpm`: This is what allows Nginx to run *(and show)* PHP files.
} * `php-imagick`, `imagemagick`: Image manipulation.
* `php-apcu`: Caching module.
* `php-curl`, `curl`: Transferring data with URLs.
* `php-mbstring`: String utils.
* `certbot`, `certbot-nginx`: ACME client. Used to create SSL certificates.
* In Arch-based distributions:
* `pacman -S nginx certbot php-imagick certbot-nginx imagemagick curl php-apcu git`
* In Debian-based distributions:
* `apt install php-mbstring nginx certbot-nginx certbot php-imagick imagemagick php-curl curl php-apcu git`
location / { <div align=right>
try_files $uri @php;
}
location ~* ^(.*)\.php$ { > IMPORTANT: `php-curl`, `php-mbstring` might be a Debian-only package, but this needs further fact checking.
return 301 $1;
}
> IMPORTANT: If having issues with `php-apcu` or `libsodium`, go to [^1].
</div>
4. `cd` to `/etc/nginx` and make the `conf.d/` directory if it doesn't exist:
* Again, this guesses you're logged in as root.
```sh
cd /etc/nginx
ls -l conf.d/ # If ls shows conf.d, then it means it exists.
# If it does not, run:
mkdir conf.d
```
5. Make a file inside `conf.d/` called `4get.conf` and place the following content:
* First run `touch conf.d/4get.conf` then `nano conf.d/4get.conf` to open the nano editor: *(Install it if it is not, or use another editor.)*
```sh
server {
access_log /dev/null; # Search log file. Do you really need to?
error_log /dev/null; # Error log file.
# Change this if you have 4get in another folder.
root /var/www/4get;
# Change 'yourdomain' to your domain.
server_name www.yourdomain.com yourdomain.com;
# Port to listen to.
listen 80; listen 80;
}
```
That is a very basic config so you will need to adapt it to your needs in case you have a more complicated nginx configuration. Anyways, you can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf) location @php {
try_files $uri.php $uri/index.php =404;
# Change the unix socket address if it's different for you.
fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
fastcgi_index index.php;
# Change this to `fastcgi_params` if you use a debian based distribution.
include fastcgi.conf;
fastcgi_intercept_errors on;
}
After you save the file you will need to do a symlink of the `4get.conf` file to `/etc/nignx/sites-enabled/`, you can do it with this command: location / {
try_files $uri @php;
}
```sh location ~* ^(.*)\.php$ {
ln -s /etc/nginx/sites-available/4get.conf /etc/nginx/sites-available/4get.conf return 301 $1;
``` }
Now test the nginx config with `nginx -t`, if it says that everything is good, restart nginx using `systemctl restart nginx`
# Encryption setup
Generate a certificate for the domain using:
```sh
certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com
```
(Remember to install the nginx certbot plugin!!!)
After doing that certbot should deploy the certificate automatically into your 4get nginx config file. It should be ready to use at that point.
# Tor setup on NGINX
Important Note: Tor onion addresses are significantly longer than traditional domain names. Before proceeding with Nginx configuration, ensure you increase the `server_names_hash_bucket_size` value in your `nginx.conf` file. This setting in your Nginx configuration controls the internal data structure used to manage multiple server names (hostnames) associated with your web server. Each hostname requires a certain amount of memory within this structure. If the size is insufficient, Nginx will encounter errors.
1. Open your `nginx.conf` file (that is under `/etc/nginx/nginx.conf`).
2. Find the line containing `# server_names_hash_bucket_size 64;`.
3. Uncomment the line and adjust the value. Start with 64, but if you encounter issues, incrementally increase it (e.g., 128, 256) until it accommodates your configuration.
Open your current 4get NGINX config (that is under `/etc/nginx/sites-available/`) and append this to the end of the file:
```
server {
access_log /dev/null;
error_log /dev/null;
listen 80;
server_name <youronionaddress>;
root /var/www/4get;
location @php {
try_files $uri.php $uri/index.php =404;
# Change the unix socket address if it's different for you.
fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
fastcgi_index index.php;
# Change this to `fastcgi_params` if you use a debian based distro.
include fastcgi.conf;
fastcgi_intercept_errors on;
} }
```
* The above is a very basic configuration and thus will need tweaking to your personal needs. It should still work as-is, though. A 'real world' example is present in [^2].
* After saving the file, check that the `nginx.conf` file inside the main directory includes files inside `conf.d/`:
* It should be inside the the http block: *(The following is an example! Don't just Copy and Paste it!)*
```sh
http {
include mime.types;
include conf.d/*.conf;
types_hash_max_size 4096;
# ...
}
```
* Now, test your configuration with `nginx -t`, if it says that everything is good, restart *(or start)* the Nginx daemon:
* This depends on the init manager, most distributions use `systemd`, but it's better practice to include most.
```sh
# systemd
systemctl stop nginx
systemctl start nginxt
# or
systemctl restart nginx
# openrc
rc-service nginx stop
rc-service nginx start
# or
rc-service nginx restart
# runit
sv down nginx
sv up nginx
# or
sv restart nginx
# s6
s6-rc -d change nginx
s6-rc -u change nginx
# or
s6-svc -r /run/service/nginx
# dinit
dinitctl stop nginx
dinitctl start nginx
# or
dinitctl restart nginx
```
6. Clone the repository to `/var/www`:
* `git clone --depth 1 https://git.lolcat.ca/lolcat/4get 4get` - It clones the repository with the depth of one commit *(so it takes less time to download)* and saves the cloned repository as '4get'.
7. That should be it! There are some extra steps you can take, but it really just depends on you.
<h2 align=center>Encryption setup</h2>
1. Generate a certificate for the domain you're using with:
* Note that `certbot-nginx` is needed.
```sh
certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com
```
2. After that, certbot will deploy the certificate automatically to your 4get conf file; It should be ready to use from there.
<h2 align=center>Tor Setup</h2>
<div align=right>
> IMPORTANT: Tor onion addresses are very long compared to traditional domains, so, Before doing anything, edit `nginx.conf` and increase <abbr title="This setting in your Nginx configuration controls the internal data structure used to manage multiple server names (hostnames) associated with your web server. Each hostname requires a certain amount of memory within this structure. If the size is insufficient, Nginx will encounter errors."><code>server_names_hash_bucket_size</code></abbr> to your needs.
</div>
1. `cd` to `/etc/nginx` *(if you haven't)* and open your `nginx.conf` file.
2. Find the line containing `# server_names_hash_bucket_size 64;` inside said file.
3. Uncomment the line and adjust the value; start with 64, but if you encounter issues, incrementally increase it *(e.g., 128, 256)* until it accommodates your configuration.
4. Open *(or duplicate the configuration)* and edit it:
* Example configuration, again:
```sh
server {
access_log /dev/null; # Search log file. Do you really need to?
error_log /dev/null; # Error log file.
# Change this if you have 4get in another folder.
root /var/www/4get;
# Change 'onionadress.onion' to your onion link.
server_name onionadress.onion;
# Port to listen to.
listen 80;
location @php {
try_files $uri.php $uri/index.php =404;
# Change the unix socket address if it's different for you.
fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
fastcgi_index index.php;
# Change this to `fastcgi_params` if you use a debian based distribution.
include fastcgi.conf;
fastcgi_intercept_errors on;
}
location / {
try_files $uri @php;
}
location ~* ^(.*)\.php$ {
return 301 $1;
}
location / {
try_files $uri @php;
} }
```
A real world example is present in [^2].
5. Once done, check the configuration with `nginx -t`. If everything's fine and dandy, refer to <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/tor.md">the Tor guide</a> to setup your onion site.
location ~* ^(.*)\.php$ { <h2 align=center>Other important things</h2>
return 301 $1;
}
}
```
Obviously replace `<youronionaddress>` by the onion address of `/var/lib/tor/4get/hostname` and then check if the nginx config is valid with `nginx -t` if yes, then restart the nginx service and try opening the onion address into the Tor Browser. You can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf) 1. <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/configure.md">Configuration guide</a>: Things to do after setup.
2. <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">Apache2 guide</a>: Fallback to this if you couldn't get something to work, or you don't know something.
Once you did the above, refer to <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/tor.md">this tor guide</a> to setup your onionsite. <h2 align=center>Known issues</h2>
1. https://git.lolcat.ca/lolcat/4get/issues
[^1]: lolcat/4get#40, If having issues with `libsodium`, or `php-apcu`.
[^2]: <a href="https://git.nadeko.net/Fijxu/etc-configs/src/branch/selfhost/nginx/conf.d/4get.conf">git.nadeko.net</a> nadeko.net's 4get instance configuration.

View File

@ -75,6 +75,7 @@ class backend{
break; break;
case "socks5_hostname": case "socks5_hostname":
case "socks5h":
case "socks5a": case "socks5a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);

View File

@ -838,10 +838,10 @@ class frontend{
} }
$payload .= $payload .=
'<a href="https://webcache.googleusercontent.com/search?q=cache:' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://google.com" alt="go">Google cache</a>' .
'<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' . '<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' .
'<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' . '<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' .
'<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' . '<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' .
'<a href="https://arquivo.pt/wayback/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://arquivo.pt" alt="ar">Arquivo.pt</a>' .
'<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' . '<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' .
'<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' . '<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' .
'</div>'; '</div>';
@ -939,6 +939,7 @@ class frontend{
"brave" => "Brave", "brave" => "Brave",
"yandex" => "Yandex", "yandex" => "Yandex",
"google" => "Google", "google" => "Google",
"google_cse" => "Google CSE",
"startpage" => "Startpage", "startpage" => "Startpage",
"qwant" => "Qwant", "qwant" => "Qwant",
"ghostery" => "Ghostery", "ghostery" => "Ghostery",
@ -963,6 +964,7 @@ class frontend{
"yandex" => "Yandex", "yandex" => "Yandex",
"brave" => "Brave", "brave" => "Brave",
"google" => "Google", "google" => "Google",
"google_cse" => "Google CSE",
"startpage" => "Startpage", "startpage" => "Startpage",
"qwant" => "Qwant", "qwant" => "Qwant",
"yep" => "Yep", "yep" => "Yep",

View File

@ -381,6 +381,8 @@ class fuckhtml{
$json_out = null; $json_out = null;
$last_char = null; $last_char = null;
$keyword_check = null;
for($i=0; $i<strlen($json); $i++){ for($i=0; $i<strlen($json); $i++){
switch($json[$i]){ switch($json[$i]){
@ -396,6 +398,7 @@ class fuckhtml{
$bracket = false; $bracket = false;
$is_close_bracket = true; $is_close_bracket = true;
}else{ }else{
if($bracket === false){ if($bracket === false){
@ -429,6 +432,31 @@ class fuckhtml{
$is_close_bracket === false $is_close_bracket === false
){ ){
// do keyword check
$keyword_check .= $json[$i];
if(in_array($json[$i], [":", "{"])){
$keyword_check = substr($keyword_check, 0, -1);
if(
preg_match(
'/function|array|return/i',
$keyword_check
)
){
$json_out =
preg_replace(
'/[{"]*' . preg_quote($keyword_check, "/") . '$/',
"",
$json_out
);
}
$keyword_check = null;
}
// here we know we're not iterating over a quoted string // here we know we're not iterating over a quoted string
switch($json[$i]){ switch($json[$i]){
@ -498,4 +526,85 @@ class fuckhtml{
$string $string
); );
} }
public function extract_json($json){
$len = strlen($json);
$array_level = 0;
$object_level = 0;
$in_quote = null;
$start = null;
for($i=0; $i<$len; $i++){
switch($json[$i]){
case "[":
if($in_quote === null){
$array_level++;
if($start === null){
$start = $i;
}
}
break;
case "]":
if($in_quote === null){
$array_level--;
}
break;
case "{":
if($in_quote === null){
$object_level++;
if($start === null){
$start = $i;
}
}
break;
case "}":
if($in_quote === null){
$object_level--;
}
break;
case "\"":
case "'":
if(
$i !== 0 &&
$json[$i - 1] !== "\\"
){
// found a non-escaped quote
if($in_quote === null){
// open quote
$in_quote = $json[$i];
}elseif($in_quote === $json[$i]){
// close quote
$in_quote = null;
}
}
break;
}
if(
$start !== null &&
$array_level === 0 &&
$object_level === 0
){
return substr($json, $start, $i - $start + 1);
break;
}
}
}
} }

View File

@ -293,8 +293,8 @@ class brave{
/* /*
$handle = fopen("scraper/brave.html", "r"); $handle = fopen("scraper/brave.html", "r");
$html = fread($handle, filesize("scraper/brave.html")); $html = fread($handle, filesize("scraper/brave.html"));
fclose($handle); fclose($handle);*/
*/
try{ try{
$html = $html =
@ -410,10 +410,20 @@ class brave{
throw new Exception("Could not grep JavaScript object"); throw new Exception("Could not grep JavaScript object");
} }
$data =
rtrim(
preg_replace(
'/\(Array\(0\)\)\).*$/',
"",
$grep[1]
),
" ]"
) . "]";
$data = $data =
$this->fuckhtml $this->fuckhtml
->parseJsObject( ->parseJsObject(
$grep[1] $data
); );
unset($grep); unset($grep);
@ -663,7 +673,10 @@ class brave{
$table["Address"] = $result["location"]["postal_address"]["displayAddress"]; $table["Address"] = $result["location"]["postal_address"]["displayAddress"];
} }
if(isset($result["location"]["rating"])){ if(
isset($result["location"]["rating"]) &&
$result["location"]["rating"] != "void 0"
){
$table["Rating"] = $table["Rating"] =
$result["location"]["rating"]["ratingValue"] . "/" . $result["location"]["rating"]["ratingValue"] . "/" .
@ -671,13 +684,19 @@ class brave{
number_format($result["location"]["rating"]["reviewCount"]) . " votes)"; number_format($result["location"]["rating"]["reviewCount"]) . " votes)";
} }
if(isset($result["location"]["contact"]["telephone"])){ if(
isset($result["location"]["contact"]["telephone"]) &&
$result["location"]["contact"]["telephone"] != "void 0"
){
$table["Phone number"] = $table["Phone number"] =
$result["location"]["contact"]["telephone"]; $result["location"]["contact"]["telephone"];
} }
if(isset($result["location"]["price_range"])){ if(
isset($result["location"]["price_range"]) &&
$result["location"]["price_range"] != "void 0"
){
$table["Price"] = $table["Price"] =
$result["location"]["price_range"]; $result["location"]["price_range"];

File diff suppressed because it is too large Load Diff

View File

@ -136,7 +136,7 @@ class ftm{
"source" => [ "source" => [
[ [
"url" => "url" =>
"https://findthatmeme.us-southeast-1.linodeobjects.com/" . "https://s3.thehackerblog.com/findthatmeme/" .
$thumb, $thumb,
"width" => null, "width" => null,
"height" => null "height" => null

1054
scraper/google_cse.php Normal file

File diff suppressed because it is too large Load Diff

View File

@ -220,6 +220,7 @@ class marginalia{
"related" => [] "related" => []
]; ];
// API scraper
if(config::MARGINALIA_API_KEY !== null){ if(config::MARGINALIA_API_KEY !== null){
try{ try{
@ -263,34 +264,57 @@ class marginalia{
return $out; return $out;
} }
// no more cloudflare!! Parse html by default // HTML parser
$params = [ $proxy = $this->backend->get_ip();
"query" => $search
];
foreach(["adtech", "recent", "intitle"] as $v){ if($get["npt"]){
if($get[$v] == "yes"){ [$params, $proxy] =
$this->backend->get(
$get["npt"],
"web"
);
try{
$html =
$this->get(
$proxy,
"https://search.marginalia.nu/search?" . $params
);
}catch(Exception $error){
switch($v){ throw new Exception("Failed to get HTML");
}
}else{
$params = [
"query" => $search
];
foreach(["adtech", "recent", "intitle"] as $v){
if($get[$v] == "yes"){
case "adtech": $params["adtech"] = "reduce"; break; switch($v){
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break; case "adtech": $params["adtech"] = "reduce"; break;
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break;
}
} }
} }
}
try{
$html =
$this->get(
$this->backend->get_ip(),
"https://search.marginalia.nu/search",
$params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML"); try{
$html =
$this->get(
$proxy,
"https://search.marginalia.nu/search",
$params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML");
}
} }
$this->fuckhtml->load($html); $this->fuckhtml->load($html);
@ -387,6 +411,65 @@ class marginalia{
]; ];
} }
// get next page
$this->fuckhtml->load($html);
$pagination =
$this->fuckhtml
->getElementsByAttributeValue(
"aria-label",
"pagination",
"nav"
);
if(count($pagination) === 0){
// no pagination
return $out;
}
$this->fuckhtml->load($pagination[0]);
$pages =
$this->fuckhtml
->getElementsByClassName(
"page-link",
"a"
);
$found_current_page = false;
foreach($pages as $page){
if(
stripos(
$page["attributes"]["class"],
"active"
) !== false
){
$found_current_page = true;
continue;
}
if($found_current_page){
// we found current page index, and we iterated over
// the next page <a>
$out["npt"] =
$this->backend->store(
parse_url(
$page["attributes"]["href"],
PHP_URL_QUERY
),
"web",
$proxy
);
break;
}
}
return $out; return $out;
} }
} }

View File

@ -701,9 +701,11 @@ class mojeek{
if(count($thumb) === 2){ if(count($thumb) === 2){
$answer["thumb"] = $answer["thumb"] =
$this->fuckhtml urldecode(
->getTextContent( $this->fuckhtml
$thumb[1] ->getTextContent(
$thumb[1]
)
); );
} }
} }

View File

@ -133,6 +133,10 @@ $settings = [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
], ],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[ [
"value" => "startpage", "value" => "startpage",
"text" => "Startpage" "text" => "Startpage"
@ -203,6 +207,10 @@ $settings = [
"value" => "google", "value" => "google",
"text" => "Google" "text" => "Google"
], ],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[ [
"value" => "startpage", "value" => "startpage",
"text" => "Startpage" "text" => "Startpage"

View File

@ -16,6 +16,7 @@
body{ body{
padding:15px 4% 40px; padding:15px 4% 40px;
margin:unset;
} }
h1,h2,h3,h4,h5,h6{ h1,h2,h3,h4,h5,h6{

40
static/themes/Wine.css Normal file
View File

@ -0,0 +1,40 @@
:root
{
--accent : #f79e98;
--1d2021 : #180d0c;
--282828 : #180d0c;
--3c3836 : #251615;
--504945 : #251615;
--928374 : var(--accent);
--a89984 : #d8c5c4;
--bdae93 : #d8c5c4;
--8ec07c : var(--accent);
--ebdbb2 : #d8c5c4;
--comment: #928374;
--default: #DCC9BC;
--keyword: #F07342;
--string : var(--accent);
--green : #959A6B;
--yellow : #E39C45;
--red : #CF223E;
--white : var(--a89984);
--black : var(--1d2021);
--hover : #b18884
}
a.link, a { color: var(--accent); text-decoration: none; }
.searchbox { width: 23%; }
.filters filter select { color: #E39C45; }
.web .separator::before { color: var(--white) }
.searchbox input[type="text"]::placeholder { color: var(--white); }
a.link:hover
{
color: var(--hover);
text-shadow: 0 0 .2rem var(--hover);
}
.code-inline
{ border-color: var(--default); font-family: monospace;}
.home #center a
{ color: var(--accent); }
.home .subtext
{ color: var(--white); }