From a2bc1e6190bab561b7244e2e9bbda994ab0d0d31 Mon Sep 17 00:00:00 2001
From: lolcat <will@lolcat.ca>
Date: Fri, 20 Jun 2025 01:18:57 -0400
Subject: [PATCH] bypass anubis bullshit on marginalia

---
 lib/anubis.php         | 100 ++++++++++++++++++++++++++++++++++++
 scraper/marginalia.php | 113 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 206 insertions(+), 7 deletions(-)
 create mode 100644 lib/anubis.php

diff --git a/lib/anubis.php b/lib/anubis.php
new file mode 100644
index 0000000..ab075ff
--- /dev/null
+++ b/lib/anubis.php
@@ -0,0 +1,100 @@
+<?php
+
+//
+// Reference
+// https://github.com/TecharoHQ/anubis/blob/ecc716940e34ebe7249974f2789a99a2c7115e4e/web/js/proof-of-work.mjs
+//
+
+class anubis{
+	
+	public function __construct(){
+		
+		include_once "fuckhtml.php";
+		$this->fuckhtml = new fuckhtml();
+	}
+	
+	public function scrape($html){
+		
+		$this->fuckhtml->load($html);
+		
+		$script =
+			$this->fuckhtml
+			->getElementById(
+				"anubis_challenge",
+				"script"
+			);
+		
+		if(count($script) === 0){
+			
+			throw new Exception("Failed to scrape anubis challenge data");
+		}
+		
+		$script =
+			json_decode(
+				$this->fuckhtml
+				->getTextContent(
+					$script
+				),
+				true
+			);
+		
+		if($script === null){
+			
+			throw new Exception("Failed to decode anubis challenge data");
+		}
+		
+		if(
+			!isset($script["challenge"]) ||
+			!isset($script["rules"]["difficulty"]) ||
+			!is_int($script["rules"]["difficulty"]) ||
+			!is_string($script["challenge"])
+		){
+			
+			throw new Exception("Found invalid challenge data");
+		}
+		
+		return $this->rape($script["challenge"], $script["rules"]["difficulty"]);
+	}
+	
+	private function is_valid_hash($hash, $difficulty){
+		
+		for ($i=0; $i<$difficulty; $i++) {
+			
+			$index = (int)floor($i / 2);
+			$nibble = $i % 2;
+			
+			$byte = ord($hash[$index]);
+			$nibble = ($byte >> ($nibble === 0 ? 4 : 0)) & 0x0f;
+			
+			if($nibble !== 0){
+				return false;
+			}
+		}
+		
+		return true;
+	}
+	
+	public function rape($data, $difficulty = 5){
+		
+		$nonce = 0;
+		
+		while(true){
+			
+			$hash_binary = hash("sha256", $data . $nonce, true);
+			
+			if($this->is_valid_hash($hash_binary, $difficulty)){
+				
+				$hash_hex = bin2hex($hash_binary);
+				
+				return [
+					"response" => $hash_hex,
+					//"data" => $data,
+					//"difficulty" => $difficulty,
+					"nonce" => $nonce
+				];
+			}
+			
+			$nonce++;
+		}
+	}
+}
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
index b9d555a..e62a485 100644
--- a/scraper/marginalia.php
+++ b/scraper/marginalia.php
@@ -3,7 +3,10 @@
 class marginalia{
 	public function __construct(){
 		
-		include "lib/fuckhtml.php";
+		include "lib/anubis.php";
+		$this->anubis = new anubis();
+		
+		include_once "lib/fuckhtml.php";
 		$this->fuckhtml = new fuckhtml();
 		
 		include "lib/backend.php";
@@ -102,7 +105,40 @@ class marginalia{
 		);
 	}
 	
-	private function get($proxy, $url, $get = []){
+	private function get($proxy, $url, $get = [], $get_cookies = 1){
+		
+		$curlproc = curl_init();
+		
+		switch($get_cookies){
+			
+			case 0:
+				$cookies = "";
+				$cookies_tmp = [];
+				curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+					
+					$length = strlen($header);
+					
+					$header = explode(":", $header, 2);
+					
+					if(trim(strtolower($header[0])) == "set-cookie"){
+						
+						$cookie_tmp = explode("=", trim($header[1]), 2);
+						
+						$cookies_tmp[trim($cookie_tmp[0])] =
+							explode(";", $cookie_tmp[1], 2)[0];
+					}
+					
+					return $length;
+				});
+				break;
+			
+			case 1:
+				$cookies = "";
+				break;
+			
+			default:
+				$cookies = "Cookie: " . $get_cookies;
+		}
 		
 		$headers = [
 			"User-Agent: " . config::USER_AGENT,
@@ -110,6 +146,7 @@ class marginalia{
 			"Accept-Language: en-US,en;q=0.5",
 			"Accept-Encoding: gzip",
 			"DNT: 1",
+			$cookies,
 			"Connection: keep-alive",
 			"Upgrade-Insecure-Requests: 1",
 			"Sec-Fetch-Dest: document",
@@ -118,8 +155,6 @@ class marginalia{
 			"Sec-Fetch-User: ?1"
 		];
 		
-		$curlproc = curl_init();
-		
 		if($get !== []){
 			$get = http_build_query($get);
 			$url .= "?" . $get;
@@ -145,7 +180,19 @@ class marginalia{
 			throw new Exception(curl_error($curlproc));
 		}
 		
-		curl_close($curlproc);
+		if($get_cookies === 0){
+			
+			$cookie = [];
+			
+			foreach($cookies_tmp as $key => $value){
+				
+				$cookie[] = $key . "=" . $value;
+			}
+			
+			curl_close($curlproc);
+			return implode(";", $cookie);
+		}
+		
 		return $data;
 	}
 	
@@ -267,6 +314,55 @@ class marginalia{
 		// HTML parser
 		$proxy = $this->backend->get_ip();
 		
+		//
+		// Bypass anubis check
+		//
+		if(($anubis_key = apcu_fetch("marginalia_cookie")) === false){
+			
+			try{
+				$html =
+					$this->get(
+						$proxy,
+						"https://old-search.marginalia.nu/"
+					);
+			}catch(Exception $error){
+				
+				throw new Exception("Failed to get anubis challenge");
+			}
+			
+			try{
+				
+				$anubis_data = $this->anubis->scrape($html);
+			}catch(Exception $error){
+				
+				throw new Exception($error);
+			}
+			
+			// send anubis response & get cookies
+			// https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge?response=0000018966b086834f738bacba6031028adb5aa875974ead197a8b75778baf3a&nonce=39947&redir=https%3A%2F%2Fold-search.marginalia.nu%2F&elapsedTime=1164
+			
+			try{
+				
+				$anubis_key =
+					$this->get(
+						$proxy,
+						"https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge",
+						[
+							"response" => $anubis_data["response"],
+							"nonce" => $anubis_data["nonce"],
+							"redir" => "https://old-search.marginalia.nu/",
+							"elapsedTime" => random_int(1000, 2000)
+						],
+						0
+					);
+			}catch(Exception $error){
+				
+				throw new Exception("Failed to submit anubis challenge");
+			}
+			
+			apcu_store("marginalia_cookie", $anubis_key);
+		}
+		
 		if($get["npt"]){
 			
 			[$params, $proxy] =
@@ -279,7 +375,9 @@ class marginalia{
 				$html =
 					$this->get(
 						$proxy,
-						"https://old-search.marginalia.nu/search?" . $params
+						"https://old-search.marginalia.nu/search?" . $params,
+						[],
+						$anubis_key
 					);
 			}catch(Exception $error){
 				
@@ -309,7 +407,8 @@ class marginalia{
 					$this->get(
 						$proxy,
 						"https://old-search.marginalia.nu/search",
-						$params
+						$params,
+						$anubis_key
 					);
 			}catch(Exception $error){