greppr fix

This commit is contained in:
lolcat 2025-08-09 11:00:48 -04:00
parent ad535a1609
commit 319640cd77
1 changed files with 169 additions and 169 deletions

View File

@ -16,49 +16,82 @@ class greppr{
return []; return [];
} }
private function get($proxy, $url, $get = [], $cookie = false){ private function get($proxy, $url, $get = [], $cookie = false, $post){
$curlproc = curl_init(); $curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($cookie === false){ if($post === false){
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_HTTPHEADER, if($cookie === false){
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", curl_setopt($curlproc, CURLOPT_HTTPHEADER,
"Accept-Language: en-US,en;q=0.5", ["User-Agent: " . config::USER_AGENT,
"Accept-Encoding: gzip", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"DNT: 1", "Accept-Language: en-US,en;q=0.5",
"Connection: keep-alive", "Accept-Encoding: gzip",
"Upgrade-Insecure-Requests: 1", "DNT: 1",
"Sec-Fetch-Dest: document", "Connection: keep-alive",
"Sec-Fetch-Mode: navigate", "Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Site: none", "Sec-Fetch-Dest: document",
"Sec-Fetch-User: ?1"] "Sec-Fetch-Mode: navigate",
); "Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br, zstd",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Referer: https://greppr.org/search",
"Cookie: PHPSESSID=$cookie",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i"]
);
}
}else{ }else{
$get = http_build_query($get);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
curl_setopt($curlproc, CURLOPT_HTTPHEADER, curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT, ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5", "Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip", "Accept-Encoding: gzip, deflate, br, zstd",
"Cookie: PHPSESSID=" . $cookie, "Content-Type: application/x-www-form-urlencoded",
"Content-Length: " . strlen($get),
"Origin: https://greppr.org",
"DNT: 1", "DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive", "Connection: keep-alive",
"Referer: https://greppr.org/",
"Cookie: PHPSESSID=$cookie",
"Upgrade-Insecure-Requests: 1", "Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document", "Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate", "Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none", "Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1"] "Sec-Fetch-User: ?1",
"Priority: u=0, i"]
); );
} }
@ -113,7 +146,24 @@ class greppr{
[$q, $proxy] = $this->backend->get($get["npt"], "web"); [$q, $proxy] = $this->backend->get($get["npt"], "web");
$q = json_decode($q, true); $tokens = json_decode($q, true);
//
// Get paginated page
//
try{
$html = $this->get(
$proxy,
"https://greppr.org" . $tokens["get"],
[],
$tokens["cookie"],
false
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{ }else{
@ -124,88 +174,114 @@ class greppr{
} }
$proxy = $this->backend->get_ip(); $proxy = $this->backend->get_ip();
}
// get token
// token[0] = static token that changes once a day
// token[1] = dynamic token that changes on every request
// token[1] = PHPSESSID cookie
$tokens = apcu_fetch("greppr_token");
if(
$tokens === false ||
$first_attempt === false // force token fetch
){
// we haven't gotten the token yet, get it //
// get token
//
try{ try{
$response = $html =
$this->get( $this->get(
$proxy, $proxy,
"https://greppr.org", "https://greppr.org",
[] [],
false,
false
); );
}catch(Exception $error){ }catch(Exception $error){
throw new Exception("Failed to fetch search tokens"); throw new Exception("Failed to fetch search tokens");
} }
$tokens = $this->parse_token($response); //
// Parse token
//
$this->fuckhtml->load($html["data"]);
$tokens = [];
$inputs =
$this->fuckhtml
->getElementsByTagName(
"input"
);
foreach($inputs as $input){
if(!isset($input["attributes"]["name"])){
continue;
}
switch($input["attributes"]["name"]){
case "var1":
case "var2":
case "n":
$tokens[$input["attributes"]["name"]] =
$this->fuckhtml
->getTextContent(
$input["attributes"]["value"]
);
break;
default:
$tokens["req"] =
$this->fuckhtml
->getTextContent(
$input["attributes"]["name"]
);
break;
}
}
// get cookie
preg_match(
'/PHPSESSID=([^;]+)/',
$html["headers"]["set-cookie"],
$cookie
);
if(!isset($cookie[1])){
// server sent an unexpected cookie
throw new Exception("Got malformed cookie");
}
$tokens["cookie"] = $cookie[1];
if($tokens === false){ if($tokens === false){
throw new Exception("Failed to grep search tokens"); throw new Exception("Failed to grep search tokens");
} }
}
try{
if($get["npt"]){ //
// Get initial search page
//
try{
$html = $this->get(
$proxy,
"https://greppr.org/search",
[
"var1" => $tokens["var1"],
"var2" => $tokens["var2"],
$tokens["req"] => $search,
"n" => $tokens["n"]
],
$tokens["cookie"],
true
);
}catch(Exception $error){
$params = [ throw new Exception("Failed to fetch search page");
$tokens[0] => $q["q"],
"s" => $q["s"],
"l" => 30,
"n" => $tokens[1]
];
}else{
$params = [
$tokens[0] => $search,
"n" => $tokens[1]
];
} }
$searchresults = $this->get(
$proxy,
"https://greppr.org/search",
$params,
$tokens[2]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
} }
if(strlen($searchresults["data"]) === 0){ //$html = file_get_contents("scraper/greppr.html");
//$this->fuckhtml->load($html);
// redirected to main page, which means we got old token $this->fuckhtml->load($html["data"]);
// generate a new one
// ... unless we just tried to do that
if($first_attempt === false){
throw new Exception("Failed to get a new search token");
}
return $this->web($get, false);
}
// refresh the token with new data (this also triggers fuckhtml load)
$this->parse_token($searchresults, $tokens[2]);
// response object
$out = [ $out = [
"status" => "ok", "status" => "ok",
"spelling" => [ "spelling" => [
@ -254,24 +330,16 @@ class greppr{
if($break === true){ if($break === true){
parse_str(
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
),
$values
);
$values = array_values($values);
$out["npt"] = $out["npt"] =
$this->backend->store( $this->backend->store(
json_encode( json_encode([
[ "get" =>
"q" => $values[0], $this->fuckhtml
"s" => $values[1] ->getTextContent(
] $a["attributes"]["href"]
), ),
"cookie" => $tokens["cookie"]
]),
"web", "web",
$proxy $proxy
); );
@ -360,74 +428,6 @@ class greppr{
return $out; return $out;
} }
private function parse_token($response, $cookie = false){
$this->fuckhtml->load($response["data"]);
$scripts =
$this->fuckhtml
->getElementsByTagName("script");
$found = false;
foreach($scripts as $script){
preg_match(
'/window\.location ?= ?\'\/search\?([^=]+).*&n=([0-9]+)/',
$script["innerHTML"],
$tokens
);
if(isset($tokens[1])){
$found = true;
break;
}
}
if($found === false){
return false;
}
$tokens = [
$tokens[1],
$tokens[2]
];
if($cookie !== false){
// we already specified a cookie, so use the one we have already
$tokens[] = $cookie;
apcu_store("greppr_token", $tokens);
return $tokens;
}
if(!isset($response["headers"]["set-cookie"])){
// server didn't send a cookie
return false;
}
// get cookie
preg_match(
'/PHPSESSID=([^;]+)/',
$response["headers"]["set-cookie"],
$cookie
);
if(!isset($cookie[1])){
// server sent an unexpected cookie
return false;
}
$tokens[] = $cookie[1];
apcu_store("greppr_token", $tokens);
return $tokens;
}
private function limitstrlen($text){ private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0]; return explode("\n", wordwrap($text, 300, "\n"))[0];