Compare commits
18 Commits
60d6f649ee
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 95819bfe52 | |||
| e1e92d715e | |||
| 394f401921 | |||
| 25e8095d0d | |||
| cf3c77ed04 | |||
| c45f8b1e12 | |||
| 6086c63148 | |||
| d2b0a414ad | |||
| c713d52b5f | |||
| 0861450b8a | |||
| 88012f6ae2 | |||
| 0dabcea0aa | |||
| a8022d22a7 | |||
| 9ea0372bb7 | |||
| a54f212550 | |||
| b1f5974e40 | |||
| e63a17d6db | |||
| 4349bf232d |
11
Dockerfile
11
Dockerfile
@@ -1,8 +1,17 @@
|
|||||||
|
FROM lwthiker/curl-impersonate:0.6.1-ff-alpine AS curl-impersonate
|
||||||
|
|
||||||
FROM alpine:3.21
|
FROM alpine:3.21
|
||||||
WORKDIR /var/www/html/4get
|
WORKDIR /var/www/html/4get
|
||||||
|
|
||||||
RUN apk update && apk upgrade
|
RUN apk update && apk upgrade
|
||||||
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg
|
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg nss ca-certificates
|
||||||
|
|
||||||
|
COPY --from=curl-impersonate /usr/local/bin /usr/local/bin
|
||||||
|
COPY --from=curl-impersonate /usr/local/lib /usr/local/lib
|
||||||
|
|
||||||
|
ENV LD_PRELOAD=/usr/local/lib/libcurl-impersonate-ff.so
|
||||||
|
ENV CURL_IMPERSONATE=ff117
|
||||||
|
ENV CURL_IMPERSONATE_HEADERS=no
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
|||||||
52
README.md
52
README.md
@@ -13,11 +13,11 @@ _NOT to be confused with 4get.ch, 4get.lol and friends! I **don't** host these._
|
|||||||
|
|
||||||
## Totally unbiased comparison between alternatives
|
## Totally unbiased comparison between alternatives
|
||||||
|
|
||||||
| | 4get | searx(ng) | libreY | araa | hearch.co |
|
| | 4get | searx(ng) | whoogle | degoog |
|
||||||
|----------------------------|-------------------------|-----------|-------------|-----------|-------------------|
|
|----------------------------|-------------------------|-----------|------------|--------------------------------------|
|
||||||
| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | idk |
|
| RAM usage | 100-400mb~ | 400mb-1GB | 100mb | 200mb-1GB |
|
||||||
| Does it suck | no (debunked by snopes) | yes | yes | a little | better than searx |
|
| Does it suck | no (debunked by snopes) | yes | kind of? | its kinda cool but no search filters |
|
||||||
| Does it work | ye | sometimes | sometimes | sometimes | yes |
|
| Does it work | ye | lmao | shits dead | works right now... |
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
1. Rotating proxies on a per-scraper basis
|
1. Rotating proxies on a per-scraper basis
|
||||||
@@ -31,25 +31,29 @@ tl;dr 4get is the best way to browse for shit.
|
|||||||
|
|
||||||
# Supported websites
|
# Supported websites
|
||||||
|
|
||||||
| Web | Images | Videos | News | Music | Autocompleter |
|
| web | images | videos | news | music | autocomplete |
|
||||||
|------------|--------------|--------------|------------|------------|---------------|
|
|--------------|--------------|--------------|--------------|------------|--------------|
|
||||||
| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave |
|
| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | SoundCloud | Brave |
|
||||||
| Brave | Brave | Sepia Search | Brave | | DuckDuckGo |
|
| Brave | Yandex | Vimeo | Brave | Swisscows | DuckDuckGo |
|
||||||
| Yandex | Yandex | DuckDuckGo | Google | | Yandex |
|
| Yandex | Brave | Sepia Search | Google | | Yandex |
|
||||||
| Google | Google | Brave | Startpage | | Google |
|
| Google | Google | DuckDuckGo | Yahoo! JAPAN | | Google |
|
||||||
| Startpage | Startpage | Yandex | Qwant | | Startpage |
|
| Google API | Google API | Brave | Startpage | | Startpage |
|
||||||
| Qwant | Qwant | Google | Mojeek | | Kagi |
|
| Google CSE | Google CSE | Yandex | Qwant | | Kagi |
|
||||||
| Ghostery | Yep | Startpage | Baidu | | Qwant |
|
| Yahoo! JAPAN | Yahoo! JAPAN | Google | Mojeek | | Qwant |
|
||||||
| Yep | Baidu | Qwant | | | Ghostery |
|
| Startpage | Startpage | Yahoo! JAPAN | Baidu | | Ghostery |
|
||||||
| Greppr | Pinterest | Baidu | | | Yep |
|
| Qwant | Qwant | Startpage | | | Yep |
|
||||||
| Crowdview | 500px | Coc Coc | | | Marginalia |
|
| Ghostery | Baidu | Qwant | | | Marginalia |
|
||||||
| Mwmbl | VSCO | | | | YouTube |
|
| Yep | Solofield | Baidu | | | YouTube |
|
||||||
| Mojeek | Imgur | | | | Soundcloud |
|
| Mwmbl | Pinterest | Coc Coc | | | SoundCloud |
|
||||||
| Baidu | FindThatMeme | | | | |
|
| Mojeek | Cara | Solofield | | | |
|
||||||
| Coc Coc | | | | | |
|
| Baidu | Flickr | | | | |
|
||||||
| Marginalia | | | | | |
|
| Coc Coc | Pexels | | | | |
|
||||||
| wiby | | | | | |
|
| Solofield | Pixabay | | | | |
|
||||||
| Curlie | | | | | |
|
| Marginalia | Unsplash | | | | |
|
||||||
|
| wiby | 500px | | | | |
|
||||||
|
| | VSCO | | | | |
|
||||||
|
| | Imgur | | | | |
|
||||||
|
| | FindThatMeme | | | | |
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>.
|
Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>.
|
||||||
|
|||||||
1
data/api_keys/yep.txt
Normal file
1
data/api_keys/yep.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Paste Yep API keys here
|
||||||
@@ -23,6 +23,16 @@ class config{
|
|||||||
// Enable the API?
|
// Enable the API?
|
||||||
const API_ENABLED = true;
|
const API_ENABLED = true;
|
||||||
|
|
||||||
|
//
|
||||||
|
// 4play (session provider)
|
||||||
|
//
|
||||||
|
// Enable 4play API?
|
||||||
|
const FPLAY_ENABLE_API = true;
|
||||||
|
|
||||||
|
// 4play password. Please set this to something secure if you enable the 4play API.
|
||||||
|
// This password is used to POST sessions to /api/v2/provide_sesh
|
||||||
|
const FPLAY_PASSWORD = "1234";
|
||||||
|
|
||||||
//
|
//
|
||||||
// BOT PROTECTION
|
// BOT PROTECTION
|
||||||
//
|
//
|
||||||
@@ -118,10 +128,10 @@ class config{
|
|||||||
|
|
||||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||||
// Changing this might break things.
|
// Changing this might break things.
|
||||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0";
|
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
|
||||||
|
|
||||||
// User agent to use with 4get-friendly APIs
|
// User agent to use with 4get-friendly APIs
|
||||||
const USER_AGENT_FRIENDLY = "4get-scrapist";
|
const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
|
||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
// Proxy pool assignments for each scraper
|
||||||
// false = Use server's raw IP
|
// false = Use server's raw IP
|
||||||
@@ -131,7 +141,6 @@ class config{
|
|||||||
const PROXY_YAHOO = false;
|
const PROXY_YAHOO = false;
|
||||||
const PROXY_YAHOO_JAPAN = false;
|
const PROXY_YAHOO_JAPAN = false;
|
||||||
const PROXY_BRAVE = false;
|
const PROXY_BRAVE = false;
|
||||||
const PROXY_FB = false; // facebook
|
|
||||||
const PROXY_GOOGLE = false;
|
const PROXY_GOOGLE = false;
|
||||||
const PROXY_GOOGLE_API = false;
|
const PROXY_GOOGLE_API = false;
|
||||||
const PROXY_GOOGLE_CSE = false;
|
const PROXY_GOOGLE_CSE = false;
|
||||||
@@ -155,7 +164,6 @@ class config{
|
|||||||
const PROXY_VIMEO = false;
|
const PROXY_VIMEO = false;
|
||||||
const PROXY_YEP = false;
|
const PROXY_YEP = false;
|
||||||
const PROXY_PINTEREST = false;
|
const PROXY_PINTEREST = false;
|
||||||
const PROXY_SANKAKUCOMPLEX = false;
|
|
||||||
const PROXY_FLICKR = false;
|
const PROXY_FLICKR = false;
|
||||||
const PROXY_PIXABAY = false;
|
const PROXY_PIXABAY = false;
|
||||||
const PROXY_UNSPLASH = false;
|
const PROXY_UNSPLASH = false;
|
||||||
@@ -164,8 +172,6 @@ class config{
|
|||||||
const PROXY_VSCO = false;
|
const PROXY_VSCO = false;
|
||||||
const PROXY_SEZNAM = false;
|
const PROXY_SEZNAM = false;
|
||||||
const PROXY_NAVER = false;
|
const PROXY_NAVER = false;
|
||||||
const PROXY_GREPPR = false;
|
|
||||||
const PROXY_CROWDVIEW = false;
|
|
||||||
const PROXY_MWMBL = false;
|
const PROXY_MWMBL = false;
|
||||||
const PROXY_FTM = false; // findthatmeme
|
const PROXY_FTM = false; // findthatmeme
|
||||||
const PROXY_IMGUR = false;
|
const PROXY_IMGUR = false;
|
||||||
@@ -173,6 +179,11 @@ class config{
|
|||||||
const PROXY_YANDEX_W = false; // yandex web
|
const PROXY_YANDEX_W = false; // yandex web
|
||||||
const PROXY_YANDEX_I = false; // yandex images
|
const PROXY_YANDEX_I = false; // yandex images
|
||||||
const PROXY_YANDEX_V = false; // yandex videos
|
const PROXY_YANDEX_V = false; // yandex videos
|
||||||
|
const PROXY_SAFEBOORU = false;
|
||||||
|
const PROXY_KONACHAN = false;
|
||||||
|
const PROXY_YANDERE = false;
|
||||||
|
const PROXY_TBIB = false;
|
||||||
|
const PROXY_GELBOORU = false;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Scraper-specific parameters
|
// Scraper-specific parameters
|
||||||
@@ -185,4 +196,7 @@ class config{
|
|||||||
// Use "null" to default out to HTML scraping OR specify a string to
|
// Use "null" to default out to HTML scraping OR specify a string to
|
||||||
// use the API (Eg: "public"). API has less filters.
|
// use the API (Eg: "public"). API has less filters.
|
||||||
const MARGINALIA_API_KEY = null;
|
const MARGINALIA_API_KEY = null;
|
||||||
|
|
||||||
|
// Yep
|
||||||
|
const YEP_USE_API = false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -133,6 +133,9 @@ class bot_protection{
|
|||||||
$answers[] = $regex;
|
$answers[] = $regex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dedup
|
||||||
|
$answers = array_unique($answers);
|
||||||
|
|
||||||
if(
|
if(
|
||||||
!$invalid &&
|
!$invalid &&
|
||||||
$key !== false // has captcha been gen'd?
|
$key !== false // has captcha been gen'd?
|
||||||
|
|||||||
@@ -2,6 +2,52 @@
|
|||||||
|
|
||||||
class frontend{
|
class frontend{
|
||||||
|
|
||||||
|
public function validateurl($url, $net_validate = false){
|
||||||
|
|
||||||
|
$url_parts = parse_url($url);
|
||||||
|
|
||||||
|
// check if required parts are there
|
||||||
|
if(
|
||||||
|
!isset($url_parts["scheme"]) ||
|
||||||
|
!(
|
||||||
|
$url_parts["scheme"] == "http" ||
|
||||||
|
$url_parts["scheme"] == "https"
|
||||||
|
) ||
|
||||||
|
!isset($url_parts["host"])
|
||||||
|
){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($net_validate){
|
||||||
|
$ip =
|
||||||
|
str_replace(
|
||||||
|
["[", "]"], // handle ipv6
|
||||||
|
"",
|
||||||
|
$url_parts["host"]
|
||||||
|
);
|
||||||
|
|
||||||
|
// if its not an IP
|
||||||
|
if(!filter_var($ip, FILTER_VALIDATE_IP)){
|
||||||
|
|
||||||
|
// resolve domain's IP
|
||||||
|
$ip = gethostbyname($url_parts["host"] . ".");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if its localhost
|
||||||
|
if(
|
||||||
|
filter_var(
|
||||||
|
$ip,
|
||||||
|
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
|
||||||
|
) === false
|
||||||
|
){
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public function load($template, $replacements = []){
|
public function load($template, $replacements = []){
|
||||||
|
|
||||||
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
|
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
|
||||||
@@ -600,16 +646,13 @@ class frontend{
|
|||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
"ghostery" => "Ghostery",
|
"ghostery" => "Ghostery",
|
||||||
"yep" => "Yep",
|
"yep" => "Yep",
|
||||||
"greppr" => "Greppr",
|
|
||||||
"crowdview" => "Crowdview",
|
|
||||||
"mwmbl" => "Mwmbl",
|
"mwmbl" => "Mwmbl",
|
||||||
"mojeek" => "Mojeek",
|
"mojeek" => "Mojeek",
|
||||||
"baidu" => "Baidu",
|
"baidu" => "Baidu",
|
||||||
"coccoc" => "Cốc Cốc",
|
"coccoc" => "Cốc Cốc",
|
||||||
"solofield" => "Solofield",
|
"solofield" => "Solofield",
|
||||||
"marginalia" => "Marginalia",
|
"marginalia" => "Marginalia",
|
||||||
"wiby" => "wiby",
|
"wiby" => "wiby"
|
||||||
"curlie" => "Curlie"
|
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
@@ -622,11 +665,11 @@ class frontend{
|
|||||||
"yandex" => "Yandex",
|
"yandex" => "Yandex",
|
||||||
"brave" => "Brave",
|
"brave" => "Brave",
|
||||||
"google" => "Google",
|
"google" => "Google",
|
||||||
|
"google_api" => "Google API",
|
||||||
"google_cse" => "Google CSE",
|
"google_cse" => "Google CSE",
|
||||||
"yahoo_japan" => "Yahoo! JAPAN",
|
"yahoo_japan" => "Yahoo! JAPAN",
|
||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
"yep" => "Yep",
|
|
||||||
"baidu" => "Baidu",
|
"baidu" => "Baidu",
|
||||||
"solofield" => "Solofield",
|
"solofield" => "Solofield",
|
||||||
"pinterest" => "Pinterest",
|
"pinterest" => "Pinterest",
|
||||||
@@ -638,8 +681,7 @@ class frontend{
|
|||||||
"fivehpx" => "500px",
|
"fivehpx" => "500px",
|
||||||
"vsco" => "VSCO",
|
"vsco" => "VSCO",
|
||||||
"imgur" => "Imgur",
|
"imgur" => "Imgur",
|
||||||
"ftm" => "FindThatMeme",
|
"ftm" => "FindThatMeme"
|
||||||
//"sankakucomplex" => "SankakuComplex"
|
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
@@ -678,7 +720,6 @@ class frontend{
|
|||||||
"yahoo_japan" => "Yahoo! JAPAN",
|
"yahoo_japan" => "Yahoo! JAPAN",
|
||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
"yep" => "Yep",
|
|
||||||
"mojeek" => "Mojeek",
|
"mojeek" => "Mojeek",
|
||||||
"baidu" => "Baidu"
|
"baidu" => "Baidu"
|
||||||
]
|
]
|
||||||
@@ -695,6 +736,22 @@ class frontend{
|
|||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "booru":
|
||||||
|
$filters["scraper"] = [
|
||||||
|
"display" => "Scraper",
|
||||||
|
"option" => [
|
||||||
|
"safebooru" => "Safebooru",
|
||||||
|
"konachan" => "Konachan",
|
||||||
|
"tbib" => "The Big Imageboard",
|
||||||
|
"gelbooru" => "Gelbooru",
|
||||||
|
"yandere" => "Yande.re",
|
||||||
|
"tbib" => "The Big Imageboard",
|
||||||
|
"sankakucomplex" => "SankakuComplex",
|
||||||
|
"soybooru" => "SoyBooru"
|
||||||
|
]
|
||||||
|
];
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// get scraper name from user input, or default out to preferred scraper
|
// get scraper name from user input, or default out to preferred scraper
|
||||||
@@ -871,6 +928,7 @@ class frontend{
|
|||||||
|
|
||||||
$html = null;
|
$html = null;
|
||||||
|
|
||||||
|
//foreach(["web", "images", "videos", "news", "music", "booru"] as $type){
|
||||||
foreach(["web", "images", "videos", "news", "music"] as $type){
|
foreach(["web", "images", "videos", "news", "music"] as $type){
|
||||||
|
|
||||||
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
|
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
|
||||||
|
|||||||
@@ -553,28 +553,21 @@ class fuckhtml{
|
|||||||
|
|
||||||
case "\"":
|
case "\"":
|
||||||
case "'":
|
case "'":
|
||||||
if(
|
// count preceding backslashes
|
||||||
$i !== 0 && // only check if a quote could be there
|
$bsCount = 0;
|
||||||
(
|
$j = $i - 1;
|
||||||
(
|
|
||||||
$json[$i - 1] === "\\" &&
|
|
||||||
(
|
|
||||||
$i === 2 ||
|
|
||||||
$json[$i - 2] === "\\"
|
|
||||||
)
|
|
||||||
) ||
|
|
||||||
$json[$i - 1] !== "\\"
|
|
||||||
)
|
|
||||||
){
|
|
||||||
// found a non-escaped quote
|
|
||||||
|
|
||||||
|
while($j >= 0 && $json[$j] === "\\"){
|
||||||
|
$bsCount++;
|
||||||
|
$j--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// quote is NOT escaped if even number of backslashes
|
||||||
|
if($bsCount % 2 === 0){
|
||||||
if($in_quote === null){
|
if($in_quote === null){
|
||||||
|
|
||||||
// open quote
|
// open quote
|
||||||
$in_quote = $json[$i];
|
$in_quote = $json[$i];
|
||||||
|
|
||||||
}elseif($in_quote === $json[$i]){
|
}elseif($in_quote === $json[$i]){
|
||||||
|
|
||||||
// close quote
|
// close quote
|
||||||
$in_quote = null;
|
$in_quote = null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -347,11 +347,8 @@ class brave{
|
|||||||
$q["spellcheck"] = "0";
|
$q["spellcheck"] = "0";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
$handle = fopen("scraper/brave.html", "r");
|
|
||||||
$html = fread($handle, filesize("scraper/brave.html"));
|
|
||||||
fclose($handle);*/
|
|
||||||
|
|
||||||
|
//$html = file_get_contents("scraper/brave.html");
|
||||||
try{
|
try{
|
||||||
$html =
|
$html =
|
||||||
$this->get(
|
$this->get(
|
||||||
|
|||||||
@@ -1,145 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
class crowdview{
|
|
||||||
|
|
||||||
public function __construct(){
|
|
||||||
|
|
||||||
include "lib/backend.php";
|
|
||||||
$this->backend = new backend("crowdview");
|
|
||||||
|
|
||||||
include "lib/fuckhtml.php";
|
|
||||||
$this->fuckhtml = new fuckhtml();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getfilters($page){
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
|
||||||
|
|
||||||
$curlproc = curl_init();
|
|
||||||
|
|
||||||
if($get !== []){
|
|
||||||
$get = http_build_query($get);
|
|
||||||
$url .= "?" . $get;
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip",
|
|
||||||
"DNT: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: none",
|
|
||||||
"Sec-Fetch-User: ?1"]
|
|
||||||
);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
|
||||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
|
||||||
|
|
||||||
$this->backend->assign_proxy($curlproc, $proxy);
|
|
||||||
|
|
||||||
$data = curl_exec($curlproc);
|
|
||||||
|
|
||||||
if(curl_errno($curlproc)){
|
|
||||||
|
|
||||||
throw new Exception(curl_error($curlproc));
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_close($curlproc);
|
|
||||||
return $data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function web($get){
|
|
||||||
|
|
||||||
$search = $get["s"];
|
|
||||||
if(strlen($search) === 0){
|
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
|
||||||
}
|
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
|
||||||
|
|
||||||
try{
|
|
||||||
$json = $this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://crowdview-next-js.onrender.com/api/search-v3",
|
|
||||||
[
|
|
||||||
"query" => $search
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
|
||||||
"status" => "ok",
|
|
||||||
"spelling" => [
|
|
||||||
"type" => "no_correction",
|
|
||||||
"using" => null,
|
|
||||||
"correction" => null
|
|
||||||
],
|
|
||||||
"npt" => null,
|
|
||||||
"answer" => [],
|
|
||||||
"web" => [],
|
|
||||||
"image" => [],
|
|
||||||
"video" => [],
|
|
||||||
"news" => [],
|
|
||||||
"related" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
$json = json_decode($json, true);
|
|
||||||
|
|
||||||
if($json === NULL){
|
|
||||||
|
|
||||||
throw new Exception("Failed to decode JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($json["results"] as $item){
|
|
||||||
|
|
||||||
$description = explode("<b>", $item["snippet"], 2);
|
|
||||||
|
|
||||||
$out["web"][] = [
|
|
||||||
"title" => $this->sanitize($item["title"]),
|
|
||||||
"description" => $this->sanitize($description[1]),
|
|
||||||
"url" => $item["link"],
|
|
||||||
"date" => strtotime($description[0]),
|
|
||||||
"type" => "web",
|
|
||||||
"thumb" => [
|
|
||||||
"url" => null,
|
|
||||||
"ratio" => null
|
|
||||||
],
|
|
||||||
"sublink" => [],
|
|
||||||
"table" => []
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function sanitize($html){
|
|
||||||
|
|
||||||
return
|
|
||||||
trim(
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
html_entity_decode(
|
|
||||||
$html
|
|
||||||
)
|
|
||||||
),
|
|
||||||
". "
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,309 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
class curlie{
|
|
||||||
|
|
||||||
public function __construct(){
|
|
||||||
|
|
||||||
include "lib/backend.php";
|
|
||||||
$this->backend = new backend("curlie");
|
|
||||||
|
|
||||||
include "lib/fuckhtml.php";
|
|
||||||
$this->fuckhtml = new fuckhtml();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getfilters($page){
|
|
||||||
|
|
||||||
if($page != "web"){
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
|
||||||
"lang" => [
|
|
||||||
"display" => "Language",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any language",
|
|
||||||
"en" => "English",
|
|
||||||
"de" => "German",
|
|
||||||
"fr" => "French",
|
|
||||||
"ja" => "Japanese",
|
|
||||||
"it" => "Italian",
|
|
||||||
"es" => "Spanish",
|
|
||||||
"ru" => "Russian",
|
|
||||||
"nl" => "Dutch",
|
|
||||||
"pl" => "Polish",
|
|
||||||
"tr" => "Turkish",
|
|
||||||
"da" => "Danish",
|
|
||||||
"sv" => "Swedish",
|
|
||||||
"no" => "Norwegian",
|
|
||||||
"is" => "Icelandic",
|
|
||||||
"fo" => "Faroese",
|
|
||||||
"fi" => "Finnish",
|
|
||||||
"et" => "Estonian",
|
|
||||||
"lt" => "Lithuanian",
|
|
||||||
"lv" => "Latvian",
|
|
||||||
"cy" => "Welsh",
|
|
||||||
"ga" => "Irish",
|
|
||||||
"gd" => "Scottish Gaelic",
|
|
||||||
"br" => "Breton",
|
|
||||||
"fy" => "Frisian",
|
|
||||||
"frr" => "North Frisian",
|
|
||||||
"gem" => "Saterland Frisian",
|
|
||||||
"lb" => "Luxembourgish",
|
|
||||||
"rm" => "Romansh",
|
|
||||||
"pt" => "Portuguese",
|
|
||||||
"ca" => "Catalan",
|
|
||||||
"gl" => "Galician",
|
|
||||||
"eu" => "Basque",
|
|
||||||
"ast" => "Asturian",
|
|
||||||
"an" => "Aragonese",
|
|
||||||
"fur" => "Friulan",
|
|
||||||
"sc" => "Sardinian",
|
|
||||||
"scn" => "Sicilian",
|
|
||||||
"oc" => "Occitan",
|
|
||||||
"be" => "Belarusian",
|
|
||||||
"cs" => "Czech",
|
|
||||||
"hu" => "Hungarian",
|
|
||||||
"sk" => "Slovak",
|
|
||||||
"uk" => "Ukrainian",
|
|
||||||
"csb" => "Kashubian",
|
|
||||||
"tt" => "Tatar",
|
|
||||||
"ba" => "Bashkir",
|
|
||||||
"os" => "Ossetian",
|
|
||||||
"sl" => "Slovene",
|
|
||||||
"sr" => "Serbian",
|
|
||||||
"hr" => "Croatian",
|
|
||||||
"bs" => "Bosnian",
|
|
||||||
"bg" => "Bulgarian",
|
|
||||||
"sq" => "Albanian",
|
|
||||||
"ro" => "Romanian",
|
|
||||||
"mk" => "Macedonian",
|
|
||||||
"el" => "Greek",
|
|
||||||
"iw" => "Hebrew",
|
|
||||||
"fa" => "Persian",
|
|
||||||
"ar" => "Arabic",
|
|
||||||
"ku" => "Kurdish",
|
|
||||||
"az" => "Azerbaijani",
|
|
||||||
"hy" => "Armenian",
|
|
||||||
"af" => "Afrikaans",
|
|
||||||
"sw" => "Kiswahili",
|
|
||||||
"uz" => "Uzbek",
|
|
||||||
"kk" => "Kazakh",
|
|
||||||
"ky" => "Kyrgyz",
|
|
||||||
"tg" => "Tajik",
|
|
||||||
"tk" => "Turkmen",
|
|
||||||
"ug" => "Uyghurche",
|
|
||||||
"hi" => "Hindi",
|
|
||||||
"si" => "Sinhalese",
|
|
||||||
"gu" => "Gujarati",
|
|
||||||
"ur" => "Urdu",
|
|
||||||
"mr" => "Marathi",
|
|
||||||
"pa" => "Punjabi",
|
|
||||||
"bn" => "Bengali",
|
|
||||||
"ta" => "Tamil",
|
|
||||||
"te" => "Telugu",
|
|
||||||
"kn" => "Kannada",
|
|
||||||
"zh_CN" => "Chinese Simplified",
|
|
||||||
"zh_TW" => "Chinese Traditional",
|
|
||||||
"ko" => "Korean",
|
|
||||||
"cfr" => "Taiwanese",
|
|
||||||
"th" => "Thai",
|
|
||||||
"vi" => "Vietnamese",
|
|
||||||
"in" => "Indonesian",
|
|
||||||
"ms" => "Malay",
|
|
||||||
"tl" => "Tagalog",
|
|
||||||
"eo" => "Esperanto",
|
|
||||||
"ia" => "Interlingua",
|
|
||||||
"la" => "Latin"
|
|
||||||
]
|
|
||||||
]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
|
||||||
|
|
||||||
$curlproc = curl_init();
|
|
||||||
|
|
||||||
if($get !== []){
|
|
||||||
$get = http_build_query($get);
|
|
||||||
$url .= "?" . $get;
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip",
|
|
||||||
"DNT: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: none",
|
|
||||||
"Sec-Fetch-User: ?1"]
|
|
||||||
);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
|
||||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
|
||||||
|
|
||||||
$this->backend->assign_proxy($curlproc, $proxy);
|
|
||||||
|
|
||||||
$data = curl_exec($curlproc);
|
|
||||||
|
|
||||||
if(curl_errno($curlproc)){
|
|
||||||
|
|
||||||
throw new Exception(curl_error($curlproc));
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_close($curlproc);
|
|
||||||
return $data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function web($get){
|
|
||||||
|
|
||||||
if($get["npt"]){
|
|
||||||
|
|
||||||
[$query, $proxy] = $this->backend->get($get["npt"], "web");
|
|
||||||
|
|
||||||
try{
|
|
||||||
$html = $this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://curlie.org/" . $query,
|
|
||||||
[]
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch search page");
|
|
||||||
}
|
|
||||||
|
|
||||||
}else{
|
|
||||||
$proxy = $this->backend->get_ip();
|
|
||||||
|
|
||||||
$query = [
|
|
||||||
"q" => $get["s"],
|
|
||||||
"start" => 0,
|
|
||||||
"stime" => 92452189 // ?
|
|
||||||
];
|
|
||||||
|
|
||||||
if($get["lang"] !== "any"){
|
|
||||||
|
|
||||||
$query["lang"] = $get["lang"];
|
|
||||||
}
|
|
||||||
|
|
||||||
try{
|
|
||||||
$html = $this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://curlie.org/search",
|
|
||||||
$query
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch search page");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->fuckhtml->load($html);
|
|
||||||
|
|
||||||
$nextpage =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"next-page",
|
|
||||||
"a"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($nextpage) !== 0){
|
|
||||||
|
|
||||||
$nextpage =
|
|
||||||
$this->backend->store(
|
|
||||||
$nextpage[0]["attributes"]["href"],
|
|
||||||
"web",
|
|
||||||
$proxy
|
|
||||||
);
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$nextpage = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
|
||||||
"status" => "ok",
|
|
||||||
"spelling" => [
|
|
||||||
"type" => "no_correction",
|
|
||||||
"using" => null,
|
|
||||||
"correction" => null
|
|
||||||
],
|
|
||||||
"npt" => $nextpage,
|
|
||||||
"answer" => [],
|
|
||||||
"web" => [],
|
|
||||||
"image" => [],
|
|
||||||
"video" => [],
|
|
||||||
"news" => [],
|
|
||||||
"related" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
$items =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"site-item",
|
|
||||||
"div"
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($items as $item){
|
|
||||||
|
|
||||||
$this->fuckhtml->load($item);
|
|
||||||
|
|
||||||
$a =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByAttributeValue(
|
|
||||||
"target",
|
|
||||||
"_blank",
|
|
||||||
"a"
|
|
||||||
)[0];
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName("site-descr");
|
|
||||||
|
|
||||||
if(count($description) !== 0){
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$description[0]
|
|
||||||
);
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$description = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["web"][] = [
|
|
||||||
"title" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$a
|
|
||||||
),
|
|
||||||
"description" => $description,
|
|
||||||
"url" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$a["attributes"]["href"]
|
|
||||||
),
|
|
||||||
"date" => null,
|
|
||||||
"type" => "web",
|
|
||||||
"thumb" => [
|
|
||||||
"url" => null,
|
|
||||||
"ratio" => null
|
|
||||||
],
|
|
||||||
"sublink" => [],
|
|
||||||
"table" => []
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -12,6 +12,8 @@ class google{
|
|||||||
|
|
||||||
include "lib/backend.php";
|
include "lib/backend.php";
|
||||||
$this->backend = new backend("google");
|
$this->backend = new backend("google");
|
||||||
|
|
||||||
|
$this->message = "Still working on a Google scraper that uses a headful browser. It will require Firefox + a webExtension running on a dedicated server. Waiting for my EDID adapter and we can get the show going. In the meantime, use the Google CSE/API or Yahoo JP/Startpage scrapers. They're all crippled in their own special ways but they're serviceable I guess.";
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getfilters($page){
|
public function getfilters($page){
|
||||||
@@ -505,7 +507,7 @@ class google{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = [], $alt_ua = false){
|
private function get($proxy, $url, $get = []){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
@@ -518,35 +520,22 @@ class google{
|
|||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||||
|
|
||||||
if($alt_ua === true){
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
"User-Agent: " . config::USER_AGENT,
|
||||||
"User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 26_0_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
"Accept: text/html, application/xml;q=0.9, */*;q=0.8",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"Accept-Language: en-US,en;q=0.8",
|
"Accept-Encoding: gzip",
|
||||||
"Accept-Encoding: gzip, deflate",
|
"DNT: 1",
|
||||||
"Connection: Keep-Alive",
|
"Connection: keep-alive",
|
||||||
"Cache-Control: no-cache"
|
"Upgrade-Insecure-Requests: 1",
|
||||||
]);
|
"Sec-Fetch-Dest: document",
|
||||||
}else{
|
"Sec-Fetch-Mode: navigate",
|
||||||
|
"Sec-Fetch-Site: none",
|
||||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
"Sec-Fetch-User: ?1",
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
"Priority: u=1",
|
||||||
"User-Agent: " . config::USER_AGENT,
|
"TE: trailers"
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
]);
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip",
|
|
||||||
"DNT: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: none",
|
|
||||||
"Sec-Fetch-User: ?1",
|
|
||||||
"Priority: u=1",
|
|
||||||
"TE: trailers"
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
@@ -574,228 +563,22 @@ class google{
|
|||||||
|
|
||||||
public function web($get){
|
public function web($get){
|
||||||
|
|
||||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
throw new Exception($this->message);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public function video($get){
|
|
||||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public function news($get){
|
|
||||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public function image($get){
|
public function image($get){
|
||||||
|
throw new Exception($this->message);
|
||||||
|
}
|
||||||
|
|
||||||
// generate parameters
|
|
||||||
if($get["npt"]){
|
|
||||||
|
|
||||||
[$params, $proxy] =
|
public function video($get){
|
||||||
$this->backend->get(
|
throw new Exception($this->message);
|
||||||
$get["npt"],
|
}
|
||||||
"images"
|
|
||||||
);
|
|
||||||
|
|
||||||
$params = json_decode($params, true);
|
|
||||||
|
|
||||||
$page = $params["page"] + 1;
|
public function news($get){
|
||||||
$params = $params["params"];
|
throw new Exception($this->message);
|
||||||
$params["async"] = "_fmt:json,p:1,ijn:{$page}";
|
|
||||||
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$search = $get["s"];
|
|
||||||
if(strlen($search) === 0){
|
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
|
||||||
}
|
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
|
||||||
$country = $get["country"];
|
|
||||||
$nsfw = $get["nsfw"];
|
|
||||||
$time = $get["time"];
|
|
||||||
$size = $get["size"];
|
|
||||||
$ratio = $get["ratio"];
|
|
||||||
$color = $get["color"];
|
|
||||||
$type = $get["type"];
|
|
||||||
$format = $get["format"];
|
|
||||||
$rights = $get["rights"];
|
|
||||||
|
|
||||||
$page = 0;
|
|
||||||
|
|
||||||
$params = [
|
|
||||||
"q" => $search,
|
|
||||||
"tbm" => "isch",
|
|
||||||
"asearch" => "isch",
|
|
||||||
"async" => "_fmt:json,p:0,ijn:{$page}", // ijn:0 = page 1
|
|
||||||
];
|
|
||||||
|
|
||||||
// country (image search uses cr instead of gl)
|
|
||||||
if($country != "any"){
|
|
||||||
|
|
||||||
$params["cr"] = "country" . strtoupper($country);
|
|
||||||
}
|
|
||||||
|
|
||||||
// nsfw
|
|
||||||
$params["safe"] = $nsfw == "yes" ? "off" : "active";
|
|
||||||
|
|
||||||
// generate tbs
|
|
||||||
$tbs = [];
|
|
||||||
|
|
||||||
// time
|
|
||||||
if($time != "any"){
|
|
||||||
|
|
||||||
$tbs["qdr"] = $time;
|
|
||||||
}
|
|
||||||
|
|
||||||
// size
|
|
||||||
if($size != "any"){
|
|
||||||
|
|
||||||
$params["imgsz"] = $size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ratio
|
|
||||||
if($ratio != "any"){
|
|
||||||
|
|
||||||
$params["imgar"] = $ratio;
|
|
||||||
}
|
|
||||||
|
|
||||||
// color
|
|
||||||
if($color != "any"){
|
|
||||||
|
|
||||||
if(
|
|
||||||
$color == "color" ||
|
|
||||||
$color == "trans"
|
|
||||||
){
|
|
||||||
|
|
||||||
$params["imgc"] = $color;
|
|
||||||
}elseif($color == "bnw"){
|
|
||||||
|
|
||||||
$params["imgc"] = "gray";
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$tbs["ic"] = "specific";
|
|
||||||
$tbs["isc"] = $color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// type
|
|
||||||
if($type != "any"){
|
|
||||||
|
|
||||||
$tbs["itp"] = $type;
|
|
||||||
}
|
|
||||||
|
|
||||||
// format
|
|
||||||
if($format != "any"){
|
|
||||||
|
|
||||||
$params["as_filetype"] = $format;
|
|
||||||
}
|
|
||||||
|
|
||||||
// rights (tbs)
|
|
||||||
if($rights != "any"){
|
|
||||||
|
|
||||||
$tbs["sur"] = $rights;
|
|
||||||
}
|
|
||||||
|
|
||||||
// append tbs
|
|
||||||
if(count($tbs) !== 0){
|
|
||||||
|
|
||||||
$params["tbs"] = "";
|
|
||||||
|
|
||||||
foreach($tbs as $key => $value){
|
|
||||||
|
|
||||||
$params["tbs"] .= $key . ":" . $value . ",";
|
|
||||||
}
|
|
||||||
|
|
||||||
$params["tbs"] = rtrim($params["tbs"], ",");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try{
|
|
||||||
$json =
|
|
||||||
$this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://www.google.com/search",
|
|
||||||
$params
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to get search page");
|
|
||||||
}
|
|
||||||
|
|
||||||
unset($params["async"]);
|
|
||||||
|
|
||||||
//$json = file_get_contents("scraper/google.json");
|
|
||||||
|
|
||||||
// detect captcha
|
|
||||||
$this->fuckhtml->load($json);
|
|
||||||
$this->detect_sorry();
|
|
||||||
|
|
||||||
// remove xssi
|
|
||||||
$json =
|
|
||||||
preg_replace(
|
|
||||||
'/^[^{]*/',
|
|
||||||
"",
|
|
||||||
$json
|
|
||||||
);
|
|
||||||
|
|
||||||
$json = json_decode($json, true);
|
|
||||||
|
|
||||||
if($json === null){
|
|
||||||
|
|
||||||
throw new Exception("Failed to decode JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
|
||||||
"status" => "ok",
|
|
||||||
"npt" => null,
|
|
||||||
"image" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
if(!isset($json["ischj"]["metadata"])){
|
|
||||||
|
|
||||||
throw new Exception("Google did not return an image array");
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($json["ischj"]["metadata"] as $image){
|
|
||||||
|
|
||||||
$out["image"][] = [
|
|
||||||
"title" => $this->titledots($image["result"]["page_title"]),
|
|
||||||
"source" => [
|
|
||||||
[
|
|
||||||
"url" => $image["original_image"]["url"],
|
|
||||||
"width" => (int)$image["original_image"]["width"],
|
|
||||||
"height" => (int)$image["original_image"]["height"]
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"url" => $image["thumbnail"]["url"],
|
|
||||||
"width" => (int)$image["thumbnail"]["width"],
|
|
||||||
"height" => (int)$image["thumbnail"]["height"]
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"url" => $image["result"]["referrer_url"]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
$page++;
|
|
||||||
|
|
||||||
if(count($out["image"]) === 10){
|
|
||||||
|
|
||||||
$out["npt"] =
|
|
||||||
$this->backend->store(
|
|
||||||
json_encode([
|
|
||||||
"params" => $params,
|
|
||||||
"page" => $page
|
|
||||||
]),
|
|
||||||
"images",
|
|
||||||
$proxy
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -264,6 +264,25 @@ class google_api{
|
|||||||
"yes" => "Yes", // safe=active
|
"yes" => "Yes", // safe=active
|
||||||
"no" => "No" // safe=off
|
"no" => "No" // safe=off
|
||||||
]
|
]
|
||||||
|
],
|
||||||
|
"sort" => [ // sort
|
||||||
|
"display" => "Sort by",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any order",
|
||||||
|
"date:d" => "Oldest",
|
||||||
|
"date:a" => "Newest"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"newer" => [
|
||||||
|
"display" => "Newer than",
|
||||||
|
"option" => "_DATE"
|
||||||
|
],
|
||||||
|
"rm_dupes" => [ // filter
|
||||||
|
"display" => "Remove duplicates",
|
||||||
|
"option" => [
|
||||||
|
"yes" => "Yes", // 1
|
||||||
|
"no" => "No" // 0
|
||||||
|
]
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -313,109 +332,29 @@ class google_api{
|
|||||||
"zh-CN" => "Chinese (Simplified)",
|
"zh-CN" => "Chinese (Simplified)",
|
||||||
"zh-TW" => "Chinese (Traditional)"
|
"zh-TW" => "Chinese (Traditional)"
|
||||||
]
|
]
|
||||||
],
|
|
||||||
"sort" => [
|
|
||||||
"display" => "Sort by",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any order",
|
|
||||||
"date:d" => "Oldest",
|
|
||||||
"date:a" => "Newest"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"newer" => [
|
|
||||||
"display" => "Newer than",
|
|
||||||
"option" => "_DATE"
|
|
||||||
],
|
|
||||||
"rm_dupes" => [
|
|
||||||
"display" => "Remove duplicates",
|
|
||||||
"option" => [
|
|
||||||
"yes" => "Yes",
|
|
||||||
"no" => "No"
|
|
||||||
]
|
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
/*
|
|
||||||
case "images":
|
case "images":
|
||||||
return array_merge(
|
return array_merge(
|
||||||
$base,
|
$base,
|
||||||
[
|
[
|
||||||
"time" => [ // tbs=qdr:<time>
|
"size" => [ // imgSize
|
||||||
"display" => "Time posted",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any time",
|
|
||||||
"d" => "Past 24 hours",
|
|
||||||
"w" => "Past week",
|
|
||||||
"m" => "Past month",
|
|
||||||
"y" => "Past year"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"size" => [ // imgsz
|
|
||||||
"display" => "Size",
|
"display" => "Size",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any size",
|
"any" => "Any size",
|
||||||
"l" => "Large",
|
"icon" => "Icon",
|
||||||
"m" => "Medium",
|
"small" => "Small",
|
||||||
"i" => "Icon",
|
"medium" => "Medium",
|
||||||
"qsvga" => "Larger than 400x300",
|
"large" => "Large",
|
||||||
"vga" => "Larger than 640x480",
|
"xlarge" => "X-Large",
|
||||||
"svga" => "Larger than 800x600",
|
"xxlarge" => "XX-Large",
|
||||||
"xga" => "Larger than 1024x768",
|
"huge" => "Huge"
|
||||||
"2mp" => "Larger than 2MP",
|
|
||||||
"4mp" => "Larger than 4MP",
|
|
||||||
"6mp" => "Larger than 6MP",
|
|
||||||
"8mp" => "Larger than 8MP",
|
|
||||||
"10mp" => "Larger than 10MP",
|
|
||||||
"12mp" => "Larger than 12MP",
|
|
||||||
"15mp" => "Larger than 15MP",
|
|
||||||
"20mp" => "Larger than 20MP",
|
|
||||||
"40mp" => "Larger than 40MP",
|
|
||||||
"70mp" => "Larger than 70MP"
|
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"ratio" => [ // imgar
|
"format" => [ // fileType
|
||||||
"display" => "Aspect ratio",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any ratio",
|
|
||||||
"t|xt" => "Tall",
|
|
||||||
"s" => "Square",
|
|
||||||
"w" => "Wide",
|
|
||||||
"xw" => "Panoramic"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"color" => [ // imgc
|
|
||||||
"display" => "Color",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any color",
|
|
||||||
"color" => "Full color",
|
|
||||||
"bnw" => "Black & white",
|
|
||||||
"trans" => "Transparent",
|
|
||||||
// from here, imgcolor
|
|
||||||
"red" => "Red",
|
|
||||||
"orange" => "Orange",
|
|
||||||
"yellow" => "Yellow",
|
|
||||||
"green" => "Green",
|
|
||||||
"teal" => "Teal",
|
|
||||||
"blue" => "Blue",
|
|
||||||
"purple" => "Purple",
|
|
||||||
"pink" => "Pink",
|
|
||||||
"white" => "White",
|
|
||||||
"gray" => "Gray",
|
|
||||||
"black" => "Black",
|
|
||||||
"brown" => "Brown"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"type" => [ // tbs=itp:<type>
|
|
||||||
"display" => "Type",
|
|
||||||
"option" => [
|
|
||||||
"any" => "Any type",
|
|
||||||
"clipart" => "Clip Art",
|
|
||||||
"lineart" => "Line Drawing",
|
|
||||||
"animated" => "Animated"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"format" => [ // as_filetype
|
|
||||||
"display" => "Format",
|
"display" => "Format",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any format",
|
"any" => "Any format",
|
||||||
@@ -429,17 +368,55 @@ class google_api{
|
|||||||
"craw" => "RAW"
|
"craw" => "RAW"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"rights" => [ // tbs=sur:<rights>
|
"color" => [
|
||||||
|
"display" => "Color",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any color",
|
||||||
|
|
||||||
|
"color" => "Full color", // imgColorType
|
||||||
|
"mono" => "Black & White",
|
||||||
|
"trans" => "Transparent background",
|
||||||
|
|
||||||
|
"red" => "Red", // imgDominantColor
|
||||||
|
"orange" => "Orange",
|
||||||
|
"yellow" => "Yellow",
|
||||||
|
"green" => "Green",
|
||||||
|
"teal" => "Teal",
|
||||||
|
"blue" => "Blue",
|
||||||
|
"purple" => "Purple",
|
||||||
|
"pink" => "Pink",
|
||||||
|
"white" => "White",
|
||||||
|
"gray" => "Gray",
|
||||||
|
"black" => "Black",
|
||||||
|
"brown" => "Brown"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"type" => [ // imgType
|
||||||
|
"display" => "Type",
|
||||||
|
"option" => [
|
||||||
|
"any" => "Any type",
|
||||||
|
"clipart" => "Clip Art",
|
||||||
|
"face" => "Faces",
|
||||||
|
"lineart" => "Line Drawing",
|
||||||
|
"stock" => "Stock photos",
|
||||||
|
"photo" => "Photos",
|
||||||
|
"animated" => "Animated",
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"rights" => [ // rights
|
||||||
"display" => "Usage rights",
|
"display" => "Usage rights",
|
||||||
"option" => [
|
"option" => [
|
||||||
"any" => "Any license",
|
"any" => "Any license",
|
||||||
"cl" => "Creative Commons licenses",
|
"cc_publicdomain" => "Public domain",
|
||||||
"ol" => "Commercial & other licenses"
|
"cc_attribute" => "Attribution required",
|
||||||
|
"cc_sharealike" => "Sharealike",
|
||||||
|
"cc_noncommercial" => "Non-commercial use only",
|
||||||
|
"cc_nonderived" => "Original works"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
break;*/
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -485,6 +462,7 @@ class google_api{
|
|||||||
return $data;
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public function web($get){
|
public function web($get){
|
||||||
|
|
||||||
// rotate proxy + key on EVERY request
|
// rotate proxy + key on EVERY request
|
||||||
@@ -731,6 +709,160 @@ class google_api{
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public function image($get){
|
||||||
|
|
||||||
|
// rotate proxy + key on EVERY request
|
||||||
|
$keydata = $this->backend->get_key();
|
||||||
|
$proxy = $this->backend->get_ip($keydata["increment"]);
|
||||||
|
|
||||||
|
if($get["npt"]){
|
||||||
|
|
||||||
|
// $p is never used
|
||||||
|
[$params, $p] = $this->backend->get(
|
||||||
|
$get["npt"],
|
||||||
|
"web"
|
||||||
|
);
|
||||||
|
|
||||||
|
$params = json_decode($params, true);
|
||||||
|
|
||||||
|
$params["key"] = $keydata["key"];
|
||||||
|
|
||||||
|
}else{
|
||||||
|
|
||||||
|
//$json = file_get_contents("scraper/google.json");
|
||||||
|
$params = [
|
||||||
|
"q" => $get["s"],
|
||||||
|
"cx" => config::GOOGLE_CX_ENDPOINT,
|
||||||
|
"num" => 10,
|
||||||
|
"start" => 1,
|
||||||
|
"searchType" => "image",
|
||||||
|
"key" => $keydata["key"]
|
||||||
|
];
|
||||||
|
|
||||||
|
//
|
||||||
|
// parse filters
|
||||||
|
//
|
||||||
|
if($get["newer"] !== false){
|
||||||
|
|
||||||
|
$params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000));
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; }
|
||||||
|
if($get["country"] != "any"){ $params["gl"] = $get["country"]; }
|
||||||
|
|
||||||
|
if($get["nsfw"] == "yes"){
|
||||||
|
|
||||||
|
$params["safe"] = "off";
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$params["safe"] = "active";
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; }
|
||||||
|
|
||||||
|
// image filters
|
||||||
|
if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; }
|
||||||
|
if($get["format"] != "any"){ $params["fileType"] = $get["format"]; }
|
||||||
|
|
||||||
|
switch($get["color"]){
|
||||||
|
|
||||||
|
case "any":
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "color":
|
||||||
|
case "mono":
|
||||||
|
case "trans":
|
||||||
|
$params["imgColorType"] = $get["color"];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$params["imgDominantColor"] = $get["color"];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["type"] != "any"){ $params["imgType"] = $get["type"]; }
|
||||||
|
if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
try{
|
||||||
|
$json =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://www.googleapis.com/customsearch/v1",
|
||||||
|
$params
|
||||||
|
);
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to fetch JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
$json = json_decode($json, true);
|
||||||
|
|
||||||
|
if($json === null){
|
||||||
|
|
||||||
|
throw new Exception("Failed to decode JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [
|
||||||
|
"status" => "ok",
|
||||||
|
"npt" => null,
|
||||||
|
"image" => []
|
||||||
|
];
|
||||||
|
|
||||||
|
if(isset($json["error"]["message"])){
|
||||||
|
|
||||||
|
throw new Exception(
|
||||||
|
"API returned an error: " .
|
||||||
|
$json["error"]["message"] .
|
||||||
|
" (key #" . $keydata["increment"] . ")"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!isset($json["items"])){
|
||||||
|
|
||||||
|
// google just doesnt return items when theres no results
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($json["items"] as $image){
|
||||||
|
|
||||||
|
$out["image"][] = [
|
||||||
|
"title" => $this->titledots($image["title"]),
|
||||||
|
"source" => [
|
||||||
|
[
|
||||||
|
"url" => $image["link"],
|
||||||
|
"width" => (int)$image["image"]["width"],
|
||||||
|
"height" => (int)$image["image"]["height"]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"url" => $image["image"]["thumbnailLink"],
|
||||||
|
"width" => (int)$image["image"]["thumbnailWidth"],
|
||||||
|
"height" => (int)$image["image"]["thumbnailHeight"]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"url" => $image["image"]["contextLink"]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
// get npt
|
||||||
|
if(isset($json["queries"]["nextPage"][0]["startIndex"])){
|
||||||
|
|
||||||
|
unset($params["key"]);
|
||||||
|
$params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"];
|
||||||
|
|
||||||
|
$out["npt"] =
|
||||||
|
$this->backend->store(
|
||||||
|
json_encode($params),
|
||||||
|
"web",
|
||||||
|
$proxy
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private function titledots($title){
|
private function titledots($title){
|
||||||
|
|
||||||
return trim($title, " .\t\n\r\0\x0B…");
|
return trim($title, " .\t\n\r\0\x0B…");
|
||||||
|
|||||||
@@ -1,452 +0,0 @@
|
|||||||
<?php
|
|
||||||
// greppr dev probably monitors 4get code, lol
|
|
||||||
// hello greppr dude, add an API you moron
|
|
||||||
|
|
||||||
class greppr{
|
|
||||||
|
|
||||||
public function __construct(){
|
|
||||||
|
|
||||||
include "lib/backend.php";
|
|
||||||
$this->backend = new backend("greppr");
|
|
||||||
|
|
||||||
include "lib/fuckhtml.php";
|
|
||||||
$this->fuckhtml = new fuckhtml();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getfilters($page){
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
private function get($proxy, $url, $get = [], $cookies = [], $post = false){
|
|
||||||
|
|
||||||
$curlproc = curl_init();
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
|
||||||
|
|
||||||
$cookie = [];
|
|
||||||
foreach($cookies as $k => $v){
|
|
||||||
|
|
||||||
$cookie[] = "{$k}={$v}";
|
|
||||||
}
|
|
||||||
|
|
||||||
$cookie = implode("; ", $cookie);
|
|
||||||
|
|
||||||
if($post === false){
|
|
||||||
|
|
||||||
if($get !== []){
|
|
||||||
$get = http_build_query($get);
|
|
||||||
$url .= "?" . $get;
|
|
||||||
}
|
|
||||||
|
|
||||||
if($cookie == ""){
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip",
|
|
||||||
"DNT: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: none",
|
|
||||||
"Sec-Fetch-User: ?1"]
|
|
||||||
);
|
|
||||||
}else{
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
|
||||||
"DNT: 1",
|
|
||||||
"Sec-GPC: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Referer: https://greppr.org/search",
|
|
||||||
"Cookie: {$cookie}",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: same-origin",
|
|
||||||
"Sec-Fetch-User: ?1",
|
|
||||||
"Priority: u=0, i"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$get = http_build_query($get);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
|
||||||
"Content-Type: application/x-www-form-urlencoded",
|
|
||||||
"Content-Length: " . strlen($get),
|
|
||||||
"Origin: https://greppr.org",
|
|
||||||
"DNT: 1",
|
|
||||||
"Sec-GPC: 1",
|
|
||||||
"Connection: keep-alive",
|
|
||||||
"Referer: https://greppr.org/",
|
|
||||||
"Cookie: {$cookie}",
|
|
||||||
"Upgrade-Insecure-Requests: 1",
|
|
||||||
"Sec-Fetch-Dest: document",
|
|
||||||
"Sec-Fetch-Mode: navigate",
|
|
||||||
"Sec-Fetch-Site: same-origin",
|
|
||||||
"Sec-Fetch-User: ?1",
|
|
||||||
"Priority: u=0, i"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
|
||||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
|
||||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
|
||||||
|
|
||||||
$this->backend->assign_proxy($curlproc, $proxy);
|
|
||||||
|
|
||||||
$headers = [];
|
|
||||||
|
|
||||||
curl_setopt(
|
|
||||||
$curlproc,
|
|
||||||
CURLOPT_HEADERFUNCTION,
|
|
||||||
function($curlproc, $header) use (&$headers){
|
|
||||||
|
|
||||||
$len = strlen($header);
|
|
||||||
$header = explode(':', $header, 2);
|
|
||||||
|
|
||||||
if(count($header) < 2){
|
|
||||||
|
|
||||||
// ignore invalid headers
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
|
|
||||||
$headers[strtolower(trim($header[0]))][] = trim($header[1]);
|
|
||||||
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
$data = curl_exec($curlproc);
|
|
||||||
|
|
||||||
if(curl_errno($curlproc)){
|
|
||||||
|
|
||||||
throw new Exception(curl_error($curlproc));
|
|
||||||
}
|
|
||||||
|
|
||||||
curl_close($curlproc);
|
|
||||||
|
|
||||||
return [
|
|
||||||
"headers" => $headers,
|
|
||||||
"data" => $data
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
public function web($get, $first_attempt = true){
|
|
||||||
|
|
||||||
if($get["npt"]){
|
|
||||||
|
|
||||||
[$q, $proxy] = $this->backend->get($get["npt"], "web");
|
|
||||||
|
|
||||||
$tokens = json_decode($q, true);
|
|
||||||
|
|
||||||
//
|
|
||||||
// Get paginated page
|
|
||||||
//
|
|
||||||
try{
|
|
||||||
|
|
||||||
$html = $this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://greppr.org" . $tokens["get"],
|
|
||||||
[],
|
|
||||||
$tokens["cookies"],
|
|
||||||
false
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch search page");
|
|
||||||
}
|
|
||||||
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$search = $get["s"];
|
|
||||||
if(strlen($search) === 0){
|
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
|
||||||
}
|
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
|
||||||
|
|
||||||
//
|
|
||||||
// get token
|
|
||||||
//
|
|
||||||
try{
|
|
||||||
|
|
||||||
$html =
|
|
||||||
$this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://greppr.org",
|
|
||||||
[],
|
|
||||||
[],
|
|
||||||
false
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch homepage");
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Parse token
|
|
||||||
//
|
|
||||||
$this->fuckhtml->load($html["data"]);
|
|
||||||
|
|
||||||
$tokens = [
|
|
||||||
"req" => null,
|
|
||||||
"data" => null,
|
|
||||||
"cookies" => null
|
|
||||||
];
|
|
||||||
|
|
||||||
$inputs =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"input"
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($inputs as $input){
|
|
||||||
|
|
||||||
if(!isset($input["attributes"]["name"])){
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(
|
|
||||||
isset($input["attributes"]["value"]) &&
|
|
||||||
!empty($input["attributes"]["value"])
|
|
||||||
){
|
|
||||||
|
|
||||||
$tokens
|
|
||||||
["data"]
|
|
||||||
[$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$input["attributes"]["name"]
|
|
||||||
)] =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$input["attributes"]["value"]
|
|
||||||
);
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$tokens["req"] =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$input["attributes"]["name"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if($tokens["req"] === null){
|
|
||||||
|
|
||||||
throw new Exception("Failed to get request ID");
|
|
||||||
}
|
|
||||||
|
|
||||||
if(isset($html["headers"]["set-cookie"])){
|
|
||||||
|
|
||||||
foreach($html["headers"]["set-cookie"] as $cookie){
|
|
||||||
|
|
||||||
if(
|
|
||||||
preg_match(
|
|
||||||
'/([^=]+)=([^;]+)/',
|
|
||||||
$cookie,
|
|
||||||
$matches
|
|
||||||
)
|
|
||||||
){
|
|
||||||
|
|
||||||
$tokens["cookies"][$matches[1]] = $matches[2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Get initial search page
|
|
||||||
//
|
|
||||||
$tokens_req = $tokens["data"];
|
|
||||||
$tokens_req[$tokens["req"]] = $search;
|
|
||||||
|
|
||||||
try{
|
|
||||||
|
|
||||||
$html = $this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://greppr.org/search",
|
|
||||||
$tokens_req,
|
|
||||||
$tokens["cookies"],
|
|
||||||
true
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch search page");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//$html = file_get_contents("scraper/greppr.html");
|
|
||||||
//$this->fuckhtml->load($html);
|
|
||||||
$this->fuckhtml->load($html["data"]);
|
|
||||||
|
|
||||||
$out = [
|
|
||||||
"status" => "ok",
|
|
||||||
"spelling" => [
|
|
||||||
"type" => "no_correction",
|
|
||||||
"using" => null,
|
|
||||||
"correction" => null
|
|
||||||
],
|
|
||||||
"npt" => null,
|
|
||||||
"answer" => [],
|
|
||||||
"web" => [],
|
|
||||||
"image" => [],
|
|
||||||
"video" => [],
|
|
||||||
"news" => [],
|
|
||||||
"related" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
// get results for later
|
|
||||||
$results =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"result",
|
|
||||||
"div"
|
|
||||||
);
|
|
||||||
|
|
||||||
// check for next page
|
|
||||||
$next_elem =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"pagination",
|
|
||||||
"ul"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($next_elem) !== 0){
|
|
||||||
|
|
||||||
$this->fuckhtml->load($next_elem[0]);
|
|
||||||
|
|
||||||
$as =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"page-link",
|
|
||||||
"a"
|
|
||||||
);
|
|
||||||
|
|
||||||
$break = false;
|
|
||||||
foreach($as as $a){
|
|
||||||
|
|
||||||
if($break === true){
|
|
||||||
|
|
||||||
$out["npt"] =
|
|
||||||
$this->backend->store(
|
|
||||||
json_encode([
|
|
||||||
"get" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$a["attributes"]["href"]
|
|
||||||
),
|
|
||||||
"cookies" => $tokens["cookies"]
|
|
||||||
]),
|
|
||||||
"web",
|
|
||||||
$proxy
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if($a["attributes"]["href"] == "#"){
|
|
||||||
|
|
||||||
$break = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// scrape results
|
|
||||||
foreach($results as $result){
|
|
||||||
|
|
||||||
$this->fuckhtml->load($result);
|
|
||||||
|
|
||||||
$a =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"a"
|
|
||||||
)[0];
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"highlightedDesc",
|
|
||||||
"p"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($description) === 0){
|
|
||||||
|
|
||||||
$description = null;
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->limitstrlen(
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$description[0]
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
$date =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"p"
|
|
||||||
);
|
|
||||||
|
|
||||||
$date =
|
|
||||||
strtotime(
|
|
||||||
explode(
|
|
||||||
":",
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$date[count($date) - 1]["innerHTML"]
|
|
||||||
)
|
|
||||||
)[1]
|
|
||||||
);
|
|
||||||
|
|
||||||
$out["web"][] = [
|
|
||||||
"title" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$a["innerHTML"]
|
|
||||||
),
|
|
||||||
"description" => $description,
|
|
||||||
"url" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$a["attributes"]["href"]
|
|
||||||
),
|
|
||||||
"date" => $date,
|
|
||||||
"type" => "web",
|
|
||||||
"thumb" => [
|
|
||||||
"url" => null,
|
|
||||||
"ratio" => null
|
|
||||||
],
|
|
||||||
"sublink" => [],
|
|
||||||
"table" => []
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function limitstrlen($text){
|
|
||||||
|
|
||||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -297,6 +297,14 @@ class pinterest{
|
|||||||
throw new Exception("Failed to decode JSON");
|
throw new Exception("Failed to decode JSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
isset($json["client_context"]["is_bad_bot"]) &&
|
||||||
|
(int)$json["client_context"]["is_bad_bot"] === 1
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Pinterest blocked this instance or request proxy.");
|
||||||
|
}
|
||||||
|
|
||||||
$out = [
|
$out = [
|
||||||
"status" => "ok",
|
"status" => "ok",
|
||||||
"npt" => null,
|
"npt" => null,
|
||||||
@@ -426,7 +434,7 @@ class pinterest{
|
|||||||
]
|
]
|
||||||
],
|
],
|
||||||
"url" =>
|
"url" =>
|
||||||
$item["link"] === null ?
|
!isset($item["link"]) ?
|
||||||
"https://ca.pinterest.com/pin/" . $item["id"] :
|
"https://ca.pinterest.com/pin/" . $item["id"] :
|
||||||
$item["link"]
|
$item["link"]
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -564,12 +564,16 @@ class startpage{
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case "spellsuggest-google":
|
case "spellsuggest-google":
|
||||||
$out["spelling"] =
|
|
||||||
[
|
if(isset($category["results"][0]["query"])){
|
||||||
"type" => "including",
|
|
||||||
"using" => $json["render"]["query"],
|
$out["spelling"] =
|
||||||
"correction" => $category["results"][0]["query"]
|
[
|
||||||
];
|
"type" => "including",
|
||||||
|
"using" => $json["render"]["query"],
|
||||||
|
"correction" => urldecode($category["results"][0]["query"])
|
||||||
|
];
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "dictionary-qi":
|
case "dictionary-qi":
|
||||||
@@ -645,318 +649,6 @@ class startpage{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse instant answers
|
|
||||||
if(
|
|
||||||
$get["extendedsearch"] == "yes" &&
|
|
||||||
$get_instant_answer === true
|
|
||||||
){
|
|
||||||
|
|
||||||
// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
|
|
||||||
try{
|
|
||||||
$post = [
|
|
||||||
"se" => "n0vze2y9dqwy",
|
|
||||||
"q" => $json["render"]["query"],
|
|
||||||
"results" => [], // populate
|
|
||||||
"enableKnowledgePanel" => true,
|
|
||||||
"enableMediaThumbBar" => false,
|
|
||||||
"enableSearchSuggestions" => false,
|
|
||||||
"enableTripadvisorProperties" => [],
|
|
||||||
"enableTripadvisorPlaces" => [],
|
|
||||||
"enableTripadvisorPlacesForLocations" => [],
|
|
||||||
"enableWebProducts" => false,
|
|
||||||
"tripadvisorPartnerId" => null,
|
|
||||||
"tripadvisorMapColorMode" => "light",
|
|
||||||
"tripadvisorDisablesKnowledgePanel" => false,
|
|
||||||
"instantAnswers" => [
|
|
||||||
"smartAnswers",
|
|
||||||
"youtube",
|
|
||||||
"tripadvisor"
|
|
||||||
],
|
|
||||||
"iaType" => null,
|
|
||||||
"forceEnhancedKnowledgePanel" => false,
|
|
||||||
"shoppingOnly" => false,
|
|
||||||
"allowAdultProducts" => true,
|
|
||||||
"lang" => "en",
|
|
||||||
"browserLang" => "en-US",
|
|
||||||
"browserTimezone" => "America/New_York",
|
|
||||||
"market" => null,
|
|
||||||
"userLocation" => null,
|
|
||||||
"userDate" => date("Y-m-d"),
|
|
||||||
"userAgentType" => "unknown"
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach($out["web"] as $result){
|
|
||||||
|
|
||||||
$post["results"][] = [
|
|
||||||
"url" => $result["url"],
|
|
||||||
"title" => $result["title"]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
|
|
||||||
|
|
||||||
$additional_data =
|
|
||||||
$this->get(
|
|
||||||
$proxy,
|
|
||||||
"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
|
|
||||||
$post,
|
|
||||||
true,
|
|
||||||
true
|
|
||||||
);
|
|
||||||
|
|
||||||
$additional_data = json_decode($additional_data, true);
|
|
||||||
|
|
||||||
if($additional_data === null){
|
|
||||||
|
|
||||||
throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!isset($additional_data["knowledgePanel"])){
|
|
||||||
|
|
||||||
throw new Exception("Response has missing data (knowledgePanel)");
|
|
||||||
}
|
|
||||||
|
|
||||||
$additional_data = $additional_data["knowledgePanel"];
|
|
||||||
|
|
||||||
$answer = [
|
|
||||||
"title" => $additional_data["meta"]["title"],
|
|
||||||
"description" => [
|
|
||||||
[
|
|
||||||
"type" => "quote",
|
|
||||||
"value" => $additional_data["meta"]["description"]
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"url" => $additional_data["meta"]["origWikiUrl"],
|
|
||||||
"thumb" => $additional_data["meta"]["image"],
|
|
||||||
"table" => [],
|
|
||||||
"sublink" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
// parse html for instant answer
|
|
||||||
$this->fuckhtml->load($additional_data["html"]);
|
|
||||||
|
|
||||||
$div =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"div"
|
|
||||||
);
|
|
||||||
|
|
||||||
// get description
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"sx-kp-short-extract sx-kp-short-extract-complete",
|
|
||||||
$div
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($description) !== 0){
|
|
||||||
|
|
||||||
$answer["description"][] = [
|
|
||||||
"type" => "text",
|
|
||||||
"value" =>
|
|
||||||
html_entity_decode(
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$description[0]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
// get socials
|
|
||||||
$socials =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"sx-wiki-social-link",
|
|
||||||
"a"
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($socials as $social){
|
|
||||||
|
|
||||||
$title =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$social["attributes"]["title"]
|
|
||||||
);
|
|
||||||
|
|
||||||
$url =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$social["attributes"]["href"]
|
|
||||||
);
|
|
||||||
|
|
||||||
switch($title){
|
|
||||||
|
|
||||||
case "Official Website":
|
|
||||||
$title = "Website";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
$answer["sublink"][$title] = $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get videos
|
|
||||||
$videos =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"sx-kp-video-grid-item",
|
|
||||||
$div
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($videos as $video){
|
|
||||||
|
|
||||||
$this->fuckhtml->load($video);
|
|
||||||
|
|
||||||
$as =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"a"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($as) === 0){
|
|
||||||
|
|
||||||
// ?? invalid
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$image =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByAttributeName(
|
|
||||||
"data-sx-src",
|
|
||||||
"img"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($image) !== 0){
|
|
||||||
|
|
||||||
$thumb = [
|
|
||||||
"ratio" => "16:9",
|
|
||||||
"url" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$image[0]["attributes"]["data-sx-src"]
|
|
||||||
)
|
|
||||||
];
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$thumb = [
|
|
||||||
"ratio" => null,
|
|
||||||
"url" => null
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["video"][] = [
|
|
||||||
"title" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$as[0]["attributes"]["title"]
|
|
||||||
),
|
|
||||||
"description" => null,
|
|
||||||
"date" => null,
|
|
||||||
"duration" => null,
|
|
||||||
"views" => null,
|
|
||||||
"thumb" => $thumb,
|
|
||||||
"url" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$as[0]["attributes"]["href"]
|
|
||||||
)
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset
|
|
||||||
$this->fuckhtml->load($additional_data["html"]);
|
|
||||||
|
|
||||||
// get table elements
|
|
||||||
$table =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"sx-infobox",
|
|
||||||
"table"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($table) !== 0){
|
|
||||||
|
|
||||||
$trs =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"tr"
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($trs as $tr){
|
|
||||||
|
|
||||||
$this->fuckhtml->load($tr);
|
|
||||||
|
|
||||||
// ok so startpage devs cant fucking code a table
|
|
||||||
// td = content
|
|
||||||
// th (AAAHH) = title
|
|
||||||
$tds =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"td"
|
|
||||||
);
|
|
||||||
|
|
||||||
$ths =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"th"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(
|
|
||||||
count($ths) === 1 &&
|
|
||||||
count($tds) === 1
|
|
||||||
){
|
|
||||||
|
|
||||||
$title =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$ths[0]
|
|
||||||
);
|
|
||||||
|
|
||||||
$description = [];
|
|
||||||
|
|
||||||
$this->fuckhtml->load($tds[0]);
|
|
||||||
|
|
||||||
$lis =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName(
|
|
||||||
"li"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($lis) !== 0){
|
|
||||||
|
|
||||||
foreach($lis as $li){
|
|
||||||
|
|
||||||
$description[] =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$li
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
$description = implode(", ", $description);
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$tds[0]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
$answer["table"][$title] = $description;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["answer"][] = $answer;
|
|
||||||
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
// do nothing
|
|
||||||
//echo "error!";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1428,12 +1120,16 @@ class startpage{
|
|||||||
[
|
[
|
||||||
"lui" => "english",
|
"lui" => "english",
|
||||||
"language" => "english",
|
"language" => "english",
|
||||||
"query" => $str["q"],
|
|
||||||
"cat" => $pagetype,
|
|
||||||
"sc" => $str["sc"],
|
"sc" => $str["sc"],
|
||||||
"t" => "device",
|
"t" => "device",
|
||||||
|
"cat" => $pagetype,
|
||||||
"segment" => "startpage.udog",
|
"segment" => "startpage.udog",
|
||||||
"page" => $str["page"]
|
"abd" => 0,
|
||||||
|
"abe" => 0,
|
||||||
|
"query" => $str["q"],
|
||||||
|
"page" => $str["page"],
|
||||||
|
"qsr" => "all",
|
||||||
|
"qadf" => "none" // @ todo fix (??)
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
$pagetype,
|
$pagetype,
|
||||||
|
|||||||
@@ -868,123 +868,71 @@ class yandex{
|
|||||||
|
|
||||||
if($get["npt"]){
|
if($get["npt"]){
|
||||||
|
|
||||||
[$params, $proxy] =
|
[$get, $proxy] =
|
||||||
$this->backend->get(
|
$this->backend->get(
|
||||||
$get["npt"],
|
$get["npt"],
|
||||||
"video"
|
"video"
|
||||||
);
|
);
|
||||||
|
|
||||||
$params = json_decode($params, true);
|
$get = json_decode($get, true);
|
||||||
|
|
||||||
$nsfw = $params["nsfw"];
|
|
||||||
unset($params["nsfw"]);
|
|
||||||
}else{
|
}else{
|
||||||
|
|
||||||
$search = $get["s"];
|
if(strlen($get["s"]) === 0){
|
||||||
if(strlen($search) === 0){
|
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
throw new Exception("Search term is empty!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
$proxy = $this->backend->get_ip();
|
||||||
$nsfw = $get["nsfw"];
|
|
||||||
$time = $get["time"];
|
|
||||||
$duration = $get["duration"];
|
|
||||||
|
|
||||||
// https://yandex.com/video/search
|
|
||||||
// ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
|
|
||||||
// &format=json
|
|
||||||
// &request=
|
|
||||||
// {
|
|
||||||
// "blocks":[
|
|
||||||
// {"block":"extra-content","params":{},"version":2},
|
|
||||||
// {"block":"i-global__params:ajax","params":{},"version":2},
|
|
||||||
// {"block":"search2:ajax","params":{},"version":2},
|
|
||||||
// {"block":"vital-incut","params":{},"version":2},
|
|
||||||
// {"block":"content_type_search","params":{},"version":2},
|
|
||||||
// {"block":"serp-controller","params":{},"version":2},
|
|
||||||
// {"block":"cookies_ajax","params":{},"version":2}
|
|
||||||
// ],
|
|
||||||
// "metadata":{
|
|
||||||
// "bundles":{"lb":"^G]!q<X120"},
|
|
||||||
// "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
|
|
||||||
// "extraContent":{"names":["i-react-ajax-adapter"]}
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// &yu=4861394161661655015
|
|
||||||
// &from=tabbar
|
|
||||||
// &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
|
|
||||||
// &suggest_reqid=486139416166165501562797413447032
|
|
||||||
// &text=minecraft
|
|
||||||
|
|
||||||
$params = [
|
|
||||||
"tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
|
|
||||||
"format" => "json",
|
|
||||||
"request" => json_encode([
|
|
||||||
"blocks" => [
|
|
||||||
(object)[
|
|
||||||
"block" => "extra-content",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "i-global__params:ajax",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "search2:ajax",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "vital-incut",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "content_type_search",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "serp-controller",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
],
|
|
||||||
(object)[
|
|
||||||
"block" => "cookies_ajax",
|
|
||||||
"params" => (object)[],
|
|
||||||
"version" => 2
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"metadata" => (object)[
|
|
||||||
"bundles" => (object)[
|
|
||||||
"lb" => "^G]!q<X120"
|
|
||||||
],
|
|
||||||
"assets" => (object)[
|
|
||||||
"las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
|
|
||||||
],
|
|
||||||
"extraContent" => (object)[
|
|
||||||
"names" => [
|
|
||||||
"i-react-ajax-adapter"
|
|
||||||
]
|
|
||||||
]
|
|
||||||
]
|
|
||||||
]),
|
|
||||||
"text" => $search
|
|
||||||
];
|
|
||||||
|
|
||||||
if($duration != "any"){
|
|
||||||
|
|
||||||
$params["duration"] = $duration;
|
|
||||||
}
|
|
||||||
|
|
||||||
if($time != "any"){
|
|
||||||
|
|
||||||
$params["within"] = $time;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://yandex.com/video/search?text=skycamefalling&from=tabbar&format=json&ncrnd=7271&p=0&parent-reqid=&request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]}&serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL&yu=3091577281773194415&tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
|
||||||
|
// https://yandex.com/video/search
|
||||||
|
// ?text=skycamefalling
|
||||||
|
// &from=tabbar
|
||||||
|
// &format=json
|
||||||
|
// &ncrnd=7271
|
||||||
|
// &p=0
|
||||||
|
// &parent-reqid=
|
||||||
|
// &request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]} {"blocks":[{"block":"video-app","params":{}}]}
|
||||||
|
// &serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL
|
||||||
|
// &yu=3091577281773194415
|
||||||
|
// &tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
|
||||||
|
|
||||||
|
$params = [
|
||||||
|
"text" => $get["s"],
|
||||||
|
"from" => "tabbar",
|
||||||
|
"format" => "json",
|
||||||
|
"ncrnd" => 7271,
|
||||||
|
"p" => 0,
|
||||||
|
"parent-reqid" => "",
|
||||||
|
"request" => json_encode((object)[
|
||||||
|
"blocks" => [
|
||||||
|
(object)[
|
||||||
|
"block" => "video-app",
|
||||||
|
"params" => (object)[]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]),
|
||||||
|
"serpid" => "1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL",
|
||||||
|
"yu" => 3091577281773194415,
|
||||||
|
"tmpl_version" => "releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a"
|
||||||
|
];
|
||||||
|
|
||||||
|
if(isset($get["p"])){
|
||||||
|
|
||||||
|
$params["p"] = $get["p"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["duration"] != "any"){
|
||||||
|
|
||||||
|
$params["duration"] = $get["duration"];
|
||||||
|
}
|
||||||
|
|
||||||
|
if($get["time"] != "any"){
|
||||||
|
|
||||||
|
$params["within"] = $get["time"];
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
$handle = fopen("scraper/yandex-video.json", "r");
|
$handle = fopen("scraper/yandex-video.json", "r");
|
||||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||||
@@ -996,7 +944,7 @@ class yandex{
|
|||||||
$proxy,
|
$proxy,
|
||||||
"https://yandex.com/video/search",
|
"https://yandex.com/video/search",
|
||||||
$params,
|
$params,
|
||||||
$nsfw,
|
$get["nsfw"],
|
||||||
"yandex_v"
|
"yandex_v"
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
@@ -1011,7 +959,7 @@ class yandex{
|
|||||||
throw new Exception("Could not parse JSON");
|
throw new Exception("Could not parse JSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!isset($json["blocks"])){
|
if(!isset($json["results"]["clips"]["items"])){
|
||||||
|
|
||||||
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
|
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
|
||||||
}
|
}
|
||||||
@@ -1026,209 +974,120 @@ class yandex{
|
|||||||
"reel" => []
|
"reel" => []
|
||||||
];
|
];
|
||||||
|
|
||||||
$html = null;
|
foreach($json["results"]["clips"]["items"] as $k => $data){
|
||||||
foreach($json["blocks"] as $block){
|
|
||||||
|
|
||||||
if(isset($block["html"])){
|
if(isset($data["preview"]["posterSrc"])){
|
||||||
|
|
||||||
$html .= $block["html"];
|
$poster = $data["preview"]["posterSrc"];
|
||||||
|
|
||||||
|
if(
|
||||||
|
preg_match(
|
||||||
|
'/^\/\//',
|
||||||
|
$data["preview"]["posterSrc"]
|
||||||
|
)
|
||||||
|
){
|
||||||
|
|
||||||
|
$poster = "https:" . $poster;
|
||||||
|
}
|
||||||
|
|
||||||
|
$thumb = [
|
||||||
|
"ratio" => "16:9",
|
||||||
|
"url" => $poster
|
||||||
|
];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$thumb = [
|
||||||
|
"ratio" => null,
|
||||||
|
"url" => null
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$out["video"][] = [
|
||||||
|
"title" => $data["relatedParams"]["text"],
|
||||||
|
"description" => $this->titledots($data["description"]),
|
||||||
|
"author" => [
|
||||||
|
"name" =>
|
||||||
|
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"]) ?
|
||||||
|
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"] : null,
|
||||||
|
"url" =>
|
||||||
|
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"]) ?
|
||||||
|
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"] : null,
|
||||||
|
"avatar" => null
|
||||||
|
],
|
||||||
|
"date" =>
|
||||||
|
isset($json["results"]["clips"]["dups"][$k]["date"]) ?
|
||||||
|
strtotime($json["results"]["clips"]["dups"][$k]["date"]) : null,
|
||||||
|
"duration" =>
|
||||||
|
isset($json["results"]["clips"]["dups"][$k]["duration"]["value"]) ?
|
||||||
|
(int)$json["results"]["clips"]["dups"][$k]["duration"]["value"] : null,
|
||||||
|
"views" =>
|
||||||
|
isset($json["results"]["clips"]["dups"][$k]["views"]["text"]) ?
|
||||||
|
$this->parseviews($json["results"]["clips"]["dups"][$k]["views"]["text"]) : null,
|
||||||
|
"thumb" => $thumb,
|
||||||
|
"url" =>
|
||||||
|
preg_replace(
|
||||||
|
'/^http:\/\//',
|
||||||
|
"https://",
|
||||||
|
$data["relatedParams"]["related_url"]
|
||||||
|
)
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->fuckhtml->load($html);
|
// get npt
|
||||||
|
if($json["results"]["search"]["hasNextPage"]){
|
||||||
|
|
||||||
$div =
|
$get["p"] = (int)$json["results"]["search"]["currentPage"] + 1;
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByTagName("div");
|
|
||||||
|
|
||||||
/*
|
|
||||||
Get nextpage
|
|
||||||
*/
|
|
||||||
$npt =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"more more_direction_next i-bem",
|
|
||||||
$div
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($npt) !== 0){
|
|
||||||
|
|
||||||
$params["p"] = "1";
|
|
||||||
$params["nsfw"] = $nsfw;
|
|
||||||
$out["npt"] =
|
$out["npt"] =
|
||||||
$this->backend->store(
|
$this->backend->store(
|
||||||
json_encode($params),
|
json_encode($get),
|
||||||
"video",
|
"video",
|
||||||
$proxy
|
$proxy
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$items =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"serp-item",
|
|
||||||
$div
|
|
||||||
);
|
|
||||||
|
|
||||||
foreach($items as $item){
|
|
||||||
|
|
||||||
$data =
|
|
||||||
json_decode(
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$item["attributes"]["data-video"]
|
|
||||||
),
|
|
||||||
true
|
|
||||||
);
|
|
||||||
|
|
||||||
$this->fuckhtml->load($item);
|
|
||||||
|
|
||||||
$thumb =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"thumb-image__image",
|
|
||||||
"img"
|
|
||||||
);
|
|
||||||
|
|
||||||
$c = 1;
|
|
||||||
if(count($thumb) === 0){
|
|
||||||
|
|
||||||
$thumb = [
|
|
||||||
"url" => null,
|
|
||||||
"ratio" => null
|
|
||||||
];
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$thumb = [
|
|
||||||
"url" =>
|
|
||||||
str_replace(
|
|
||||||
"//",
|
|
||||||
"https://",
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$thumb
|
|
||||||
[0]
|
|
||||||
["attributes"]
|
|
||||||
["src"]
|
|
||||||
),
|
|
||||||
$c
|
|
||||||
),
|
|
||||||
"ratio" => "16:9"
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
$smallinfos =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"serp-item__sitelinks-item",
|
|
||||||
"div"
|
|
||||||
);
|
|
||||||
|
|
||||||
$date = null;
|
|
||||||
$views = null;
|
|
||||||
$first = true;
|
|
||||||
|
|
||||||
foreach($smallinfos as $info){
|
|
||||||
|
|
||||||
if($first){
|
|
||||||
|
|
||||||
$first = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$info =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$info
|
|
||||||
);
|
|
||||||
|
|
||||||
if($temp_date = strtotime($info)){
|
|
||||||
|
|
||||||
$date = $temp_date;
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$views = $this->parseviews($info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->fuckhtml
|
|
||||||
->getElementsByClassName(
|
|
||||||
"serp-item__text serp-item__text_visibleText_always",
|
|
||||||
"div"
|
|
||||||
);
|
|
||||||
|
|
||||||
if(count($description) === 0){
|
|
||||||
|
|
||||||
$description = null;
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$description =
|
|
||||||
$this->titledots(
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$description[0]
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["video"][] = [
|
|
||||||
"title" =>
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$this->titledots(
|
|
||||||
$data["title"]
|
|
||||||
)
|
|
||||||
),
|
|
||||||
"description" => $description,
|
|
||||||
"author" => [
|
|
||||||
"name" => null,
|
|
||||||
"url" => null,
|
|
||||||
"avatar" => null
|
|
||||||
],
|
|
||||||
"date" => $date,
|
|
||||||
"duration" =>
|
|
||||||
(int)$data
|
|
||||||
["counters"]
|
|
||||||
["toHostingLoaded"]
|
|
||||||
["stredParams"]
|
|
||||||
["duration"],
|
|
||||||
"views" => $views,
|
|
||||||
"thumb" => $thumb,
|
|
||||||
"url" =>
|
|
||||||
str_replace(
|
|
||||||
"http://",
|
|
||||||
"https://",
|
|
||||||
$this->fuckhtml
|
|
||||||
->getTextContent(
|
|
||||||
$data["counters"]
|
|
||||||
["toHostingLoaded"]
|
|
||||||
["postfix"]
|
|
||||||
["href"]
|
|
||||||
),
|
|
||||||
$c
|
|
||||||
)
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function parseviews($text){
|
private function parseviews($number){
|
||||||
|
|
||||||
$text = explode(" ", $text);
|
// decimal should always be 1 number long
|
||||||
|
$number = explode(" ", $number, 2);
|
||||||
|
$number = $number[0];
|
||||||
|
|
||||||
$num = (float)$text[0];
|
$unit = strtolower($number[strlen($number) - 1]);
|
||||||
$mod = $text[1];
|
|
||||||
|
|
||||||
switch($mod){
|
$tmp = explode(".", $number, 2);
|
||||||
|
$number = (int)$number;
|
||||||
|
|
||||||
case "bln.": $num = $num * 1000000000; break;
|
if(count($tmp) === 2){
|
||||||
case "mln.": $num = $num * 1000000; break;
|
|
||||||
case "thsd.": $num = $num * 1000; break;
|
$decimal = (int)$tmp[1];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
$decimal = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return $num;
|
switch($unit){
|
||||||
|
|
||||||
|
case "k":
|
||||||
|
$exponant = 1000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "m":
|
||||||
|
$exponant = 1000000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "b";
|
||||||
|
$exponant = 1000000000;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$exponant = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ($number * $exponant) + ($decimal * ($exponant / 10));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function titledots($title){
|
private function titledots($title){
|
||||||
|
|||||||
787
scraper/yep.php
787
scraper/yep.php
@@ -14,234 +14,209 @@ class yep{
|
|||||||
public function getfilters($page){
|
public function getfilters($page){
|
||||||
|
|
||||||
return [
|
return [
|
||||||
"country" => [
|
"lang" => [
|
||||||
"display" => "Country",
|
"display" => "Language",
|
||||||
"option" => [
|
"option" => [
|
||||||
"all" => "All regions",
|
"any" => "Any language",
|
||||||
"af" => "Afghanistan",
|
"aa" => "Afar",
|
||||||
"al" => "Albania",
|
"ab" => "Abkhazian",
|
||||||
"dz" => "Algeria",
|
"ae" => "Avestan",
|
||||||
"as" => "American Samoa",
|
"af" => "Afrikaans",
|
||||||
"ad" => "Andorra",
|
"ak" => "Akan",
|
||||||
"ao" => "Angola",
|
"am" => "Amharic",
|
||||||
"ai" => "Anguilla",
|
"an" => "Aragonese",
|
||||||
"ag" => "Antigua and Barbuda",
|
"ar" => "Arabic",
|
||||||
"ar" => "Argentina",
|
"as" => "Assamese",
|
||||||
"am" => "Armenia",
|
"av" => "Avaric",
|
||||||
"aw" => "Aruba",
|
"ay" => "Aymara",
|
||||||
"au" => "Australia",
|
"az" => "Azerbaijani",
|
||||||
"at" => "Austria",
|
"ba" => "Bashkir",
|
||||||
"az" => "Azerbaijan",
|
"be" => "Belarusian",
|
||||||
"bs" => "Bahamas",
|
"bg" => "Bulgarian",
|
||||||
"bh" => "Bahrain",
|
"bh" => "Bihari",
|
||||||
"bd" => "Bangladesh",
|
"bi" => "Bislama",
|
||||||
"bb" => "Barbados",
|
"bm" => "Bambara",
|
||||||
"by" => "Belarus",
|
"bn" => "Bengali",
|
||||||
"be" => "Belgium",
|
"bo" => "Tibetan",
|
||||||
"bz" => "Belize",
|
"br" => "Breton",
|
||||||
"bj" => "Benin",
|
"bs" => "Bosnian",
|
||||||
"bt" => "Bhutan",
|
"ca" => "Catalan",
|
||||||
"bo" => "Bolivia",
|
"ce" => "Chechen",
|
||||||
"ba" => "Bosnia and Herzegovina",
|
"ch" => "Chamorro",
|
||||||
"bw" => "Botswana",
|
"co" => "Corsican",
|
||||||
"br" => "Brazil",
|
"cr" => "Cree",
|
||||||
"bn" => "Brunei Darussalam",
|
"cs" => "Czech",
|
||||||
"bg" => "Bulgaria",
|
"cu" => "Church Slavic",
|
||||||
"bf" => "Burkina Faso",
|
"cv" => "Chuvash",
|
||||||
"bi" => "Burundi",
|
"cy" => "Welsh",
|
||||||
"cv" => "Cabo Verde",
|
"da" => "Danish",
|
||||||
"kh" => "Cambodia",
|
"de" => "German",
|
||||||
"cm" => "Cameroon",
|
"dv" => "Divehi",
|
||||||
"ca" => "Canada",
|
"dz" => "Dzongkha",
|
||||||
"ky" => "Cayman Islands",
|
"ee" => "Ewe",
|
||||||
"cf" => "Central African Republic",
|
"el" => "Greek",
|
||||||
"td" => "Chad",
|
"en" => "English",
|
||||||
"cl" => "Chile",
|
"eo" => "Esperanto",
|
||||||
"cn" => "China",
|
"es" => "Spanish",
|
||||||
"co" => "Colombia",
|
"et" => "Estonian",
|
||||||
"cg" => "Congo",
|
"eu" => "Basque",
|
||||||
"cd" => "Congo, Democratic Republic",
|
"fa" => "Persian",
|
||||||
"ck" => "Cook Islands",
|
"ff" => "Fulah",
|
||||||
"cr" => "Costa Rica",
|
"fi" => "Finnish",
|
||||||
"hr" => "Croatia",
|
"fj" => "Fijian",
|
||||||
"cu" => "Cuba",
|
"fo" => "Faroese",
|
||||||
"cy" => "Cyprus",
|
"fr" => "French",
|
||||||
"cz" => "Czechia",
|
"fy" => "Western Frisian",
|
||||||
"ci" => "Côte d'Ivoire",
|
"ga" => "Irish",
|
||||||
"dk" => "Denmark",
|
"gd" => "Scottish Gaelic",
|
||||||
"dj" => "Djibouti",
|
"gl" => "Galician",
|
||||||
"dm" => "Dominica",
|
"gn" => "Guarani",
|
||||||
"do" => "Dominican Republic",
|
"gu" => "Gujarati",
|
||||||
"ec" => "Ecuador",
|
"gv" => "Manx",
|
||||||
"eg" => "Egypt",
|
"ha" => "Hausa",
|
||||||
"sv" => "El Salvador",
|
"he" => "Hebrew",
|
||||||
"gq" => "Equatorial Guinea",
|
"hi" => "Hindi",
|
||||||
"ee" => "Estonia",
|
"ho" => "Hiri Motu",
|
||||||
"et" => "Ethiopia",
|
"hr" => "Croatian",
|
||||||
"fo" => "Faroe Islands",
|
"ht" => "Haitian",
|
||||||
"fj" => "Fiji",
|
"hu" => "Hungarian",
|
||||||
"fi" => "Finland",
|
"hy" => "Armenian",
|
||||||
"fr" => "France",
|
"hz" => "Herero",
|
||||||
"gf" => "French Guiana",
|
"ia" => "Interlingua",
|
||||||
"pf" => "French Polynesia",
|
"id" => "Indonesian",
|
||||||
"ga" => "Gabon",
|
"ie" => "Interlingue",
|
||||||
"gm" => "Gambia",
|
"ig" => "Igbo",
|
||||||
"ge" => "Georgia",
|
"ii" => "Sichuan Yi",
|
||||||
"de" => "Germany",
|
"ik" => "Inupiaq",
|
||||||
"gh" => "Ghana",
|
"io" => "Ido",
|
||||||
"gi" => "Gibraltar",
|
"is" => "Icelandic",
|
||||||
"gr" => "Greece",
|
"it" => "Italian",
|
||||||
"gl" => "Greenland",
|
"iu" => "Inuktitut",
|
||||||
"gd" => "Grenada",
|
"ja" => "Japanese",
|
||||||
"gp" => "Guadeloupe",
|
"jv" => "Javanese",
|
||||||
"gu" => "Guam",
|
"ka" => "Georgian",
|
||||||
"gt" => "Guatemala",
|
"kg" => "Kongo",
|
||||||
"gg" => "Guernsey",
|
"ki" => "Kikuyu",
|
||||||
"gn" => "Guinea",
|
"kj" => "Kuanyama",
|
||||||
"gy" => "Guyana",
|
"kk" => "Kazakh",
|
||||||
"ht" => "Haiti",
|
"kl" => "Kalaallisut",
|
||||||
"hn" => "Honduras",
|
"km" => "Central Khmer",
|
||||||
"hk" => "Hong Kong",
|
"kn" => "Kannada",
|
||||||
"hu" => "Hungary",
|
"ko" => "Korean",
|
||||||
"is" => "Iceland",
|
"kr" => "Kanuri",
|
||||||
"in" => "India",
|
"ks" => "Kashmiri",
|
||||||
"id" => "Indonesia",
|
"ku" => "Kurdish",
|
||||||
"iq" => "Iraq",
|
"kv" => "Komi",
|
||||||
"ie" => "Ireland",
|
"kw" => "Cornish",
|
||||||
"im" => "Isle of Man",
|
"ky" => "Kyrgyz",
|
||||||
"il" => "Israel",
|
"la" => "Latin",
|
||||||
"it" => "Italy",
|
"lb" => "Luxembourgish",
|
||||||
"jm" => "Jamaica",
|
"lg" => "Ganda",
|
||||||
"jp" => "Japan",
|
"li" => "Limburgish",
|
||||||
"je" => "Jersey",
|
"ln" => "Lingala",
|
||||||
"jo" => "Jordan",
|
"lo" => "Lao",
|
||||||
"kz" => "Kazakhstan",
|
"lt" => "Lithuanian",
|
||||||
"ke" => "Kenya",
|
"lu" => "Luba-Katanga",
|
||||||
"ki" => "Kiribati",
|
"lv" => "Latvian",
|
||||||
"kw" => "Kuwait",
|
"mg" => "Malagasy",
|
||||||
"kg" => "Kyrgyzstan",
|
"mh" => "Marshallese",
|
||||||
"la" => "Lao People's Democratic Republic",
|
"mi" => "Maori",
|
||||||
"lv" => "Latvia",
|
"mk" => "Macedonian",
|
||||||
"lb" => "Lebanon",
|
"ml" => "Malayalam",
|
||||||
"ls" => "Lesotho",
|
"mn" => "Mongolian",
|
||||||
"ly" => "Libya",
|
"mr" => "Marathi",
|
||||||
"li" => "Liechtenstein",
|
"ms" => "Malay",
|
||||||
"lt" => "Lithuania",
|
"mt" => "Maltese",
|
||||||
"lu" => "Luxembourg",
|
"my" => "Burmese",
|
||||||
"mk" => "Macedonia",
|
"na" => "Nauru",
|
||||||
"mg" => "Madagascar",
|
"nb" => "Norwegian Bokmål",
|
||||||
"mw" => "Malawi",
|
"nd" => "North Ndebele",
|
||||||
"my" => "Malaysia",
|
"ne" => "Nepali",
|
||||||
"mv" => "Maldives",
|
"ng" => "Ndonga",
|
||||||
"ml" => "Mali",
|
"nl" => "Dutch",
|
||||||
"mt" => "Malta",
|
"nn" => "Norwegian Nynorsk",
|
||||||
"mq" => "Martinique",
|
"no" => "Norwegian",
|
||||||
"mr" => "Mauritania",
|
"nr" => "South Ndebele",
|
||||||
"mu" => "Mauritius",
|
"nv" => "Navajo",
|
||||||
"yt" => "Mayotte",
|
"ny" => "Chichewa",
|
||||||
"mx" => "Mexico",
|
"oc" => "Occitan",
|
||||||
"fm" => "Micronesia, Federated States of",
|
"oj" => "Ojibwa",
|
||||||
"md" => "Moldova",
|
"om" => "Oromo",
|
||||||
"mc" => "Monaco",
|
"or" => "Oriya",
|
||||||
"mn" => "Mongolia",
|
"os" => "Ossetian",
|
||||||
"me" => "Montenegro",
|
"pa" => "Punjabi",
|
||||||
"ms" => "Montserrat",
|
"pi" => "Pali",
|
||||||
"ma" => "Morocco",
|
"pl" => "Polish",
|
||||||
"mz" => "Mozambique",
|
"ps" => "Pashto",
|
||||||
"mm" => "Myanmar",
|
"pt" => "Portuguese",
|
||||||
"na" => "Namibia",
|
"qu" => "Quechua",
|
||||||
"nr" => "Nauru",
|
"rm" => "Romansh",
|
||||||
"np" => "Nepal",
|
"rn" => "Rundi",
|
||||||
"nl" => "Netherlands",
|
"ro" => "Romanian",
|
||||||
"nc" => "New Caledonia",
|
"ru" => "Russian",
|
||||||
"nz" => "New Zealand",
|
"rw" => "Kinyarwanda",
|
||||||
"ni" => "Nicaragua",
|
"sa" => "Sanskrit",
|
||||||
"ne" => "Niger",
|
"sc" => "Sardinian",
|
||||||
"ng" => "Nigeria",
|
"sd" => "Sindhi",
|
||||||
"nu" => "Niue",
|
"se" => "Northern Sami",
|
||||||
"no" => "Norway",
|
"sg" => "Sango",
|
||||||
"om" => "Oman",
|
"si" => "Sinhala",
|
||||||
"pk" => "Pakistan",
|
"sk" => "Slovak",
|
||||||
"ps" => "Palestine, State of",
|
"sl" => "Slovenian",
|
||||||
"pa" => "Panama",
|
"sm" => "Samoan",
|
||||||
"pg" => "Papua New Guinea",
|
"sn" => "Shona",
|
||||||
"py" => "Paraguay",
|
"so" => "Somali",
|
||||||
"pe" => "Peru",
|
"sq" => "Albanian",
|
||||||
"ph" => "Philippines",
|
"sr" => "Serbian",
|
||||||
"pn" => "Pitcairn",
|
"ss" => "Swati",
|
||||||
"pl" => "Poland",
|
"st" => "Southern Sotho",
|
||||||
"pt" => "Portugal",
|
"su" => "Sundanese",
|
||||||
"pr" => "Puerto Rico",
|
"sv" => "Swedish",
|
||||||
"qa" => "Qatar",
|
"sw" => "Swahili",
|
||||||
"ro" => "Romania",
|
"ta" => "Tamil",
|
||||||
"ru" => "Russian Federation",
|
"te" => "Telugu",
|
||||||
"rw" => "Rwanda",
|
"tg" => "Tajik",
|
||||||
"re" => "Réunion",
|
"th" => "Thai",
|
||||||
"sh" => "Saint Helena",
|
"ti" => "Tigrinya",
|
||||||
"kn" => "Saint Kitts and Nevis",
|
"tk" => "Turkmen",
|
||||||
"lc" => "Saint Lucia",
|
"tl" => "Tagalog",
|
||||||
"vc" => "Saint Vincent and the Grenadines",
|
"tn" => "Tswana",
|
||||||
"ws" => "Samoa",
|
|
||||||
"sm" => "San Marino",
|
|
||||||
"st" => "Sao Tome and Principe",
|
|
||||||
"sa" => "Saudi Arabia",
|
|
||||||
"sn" => "Senegal",
|
|
||||||
"rs" => "Serbia",
|
|
||||||
"sc" => "Seychelles",
|
|
||||||
"sl" => "Sierra Leone",
|
|
||||||
"sg" => "Singapore",
|
|
||||||
"sk" => "Slovakia",
|
|
||||||
"si" => "Slovenia",
|
|
||||||
"sb" => "Solomon Islands",
|
|
||||||
"so" => "Somalia",
|
|
||||||
"kr" => "Sourth Korea",
|
|
||||||
"za" => "South Africa",
|
|
||||||
"es" => "Spain",
|
|
||||||
"lk" => "Sri Lanka",
|
|
||||||
"sr" => "Suriname",
|
|
||||||
"se" => "Sweden",
|
|
||||||
"ch" => "Switzerland",
|
|
||||||
"tw" => "Taiwan",
|
|
||||||
"tj" => "Tajikistan",
|
|
||||||
"tz" => "Tanzania",
|
|
||||||
"th" => "Thailand",
|
|
||||||
"tl" => "Timor-Leste",
|
|
||||||
"tg" => "Togo",
|
|
||||||
"tk" => "Tokelau",
|
|
||||||
"to" => "Tonga",
|
"to" => "Tonga",
|
||||||
"tt" => "Trinidad and Tobago",
|
"tr" => "Turkish",
|
||||||
"tn" => "Tunisia",
|
"ts" => "Tsonga",
|
||||||
"tr" => "Turkey",
|
"tt" => "Tatar",
|
||||||
"tm" => "Turkmenistan",
|
"tw" => "Twi",
|
||||||
"ug" => "Uganda",
|
"ty" => "Tahitian",
|
||||||
"ua" => "Ukraine",
|
"ug" => "Uyghur",
|
||||||
"ae" => "United Arab Emirates",
|
"uk" => "Ukrainian",
|
||||||
"gb" => "United Kingdom",
|
"ur" => "Urdu",
|
||||||
"us" => "United States",
|
"uz" => "Uzbek",
|
||||||
"uy" => "Uruguay",
|
"ve" => "Venda",
|
||||||
"uz" => "Uzbekistan",
|
"vi" => "Vietnamese",
|
||||||
"vu" => "Vanuatu",
|
"vo" => "Volapük",
|
||||||
"ve" => "Venezuela",
|
"wa" => "Walloon",
|
||||||
"vn" => "Vietnam",
|
"wo" => "Wolof",
|
||||||
"vg" => "Virgin Islands, British",
|
"xh" => "Xhosa",
|
||||||
"vi" => "Virgin Islands, U.S.",
|
"yi" => "Yiddish",
|
||||||
"ye" => "Yemen",
|
"yo" => "Yoruba",
|
||||||
"zm" => "Zambia",
|
"za" => "Zhuang",
|
||||||
"zw" => "Zimbabwe"
|
"zh" => "Chinese",
|
||||||
|
"zh-cn" => "Chinese (Simplified)",
|
||||||
|
"zh-tw" => "Chinese (Traditional)",
|
||||||
|
"zu" => "Zulu"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
"nsfw" => [
|
"nsfw" => [
|
||||||
"display" => "NSFW",
|
"display" => "NSFW",
|
||||||
"option" => [
|
"option" => [
|
||||||
"yes" => "Yes",
|
"yes" => "Yes",
|
||||||
"maybe" => "Maybe",
|
|
||||||
"no" => "No"
|
"no" => "No"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
@@ -256,21 +231,37 @@ class yep{
|
|||||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
if($use_api){
|
||||||
"Accept: */*",
|
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
$post_data = json_encode($post_data);
|
||||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
|
||||||
"Referer: https://yep.com/",
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
"Origin: https://yep.com",
|
["Content-Type: application/json",
|
||||||
"DNT: 1",
|
"Authorization: Bearer $bearer",
|
||||||
"Connection: keep-alive",
|
"Content-Length: " . strlen($post_data)]
|
||||||
"Sec-Fetch-Dest: empty",
|
);
|
||||||
"Sec-Fetch-Mode: cors",
|
|
||||||
"Sec-Fetch-Site: same-site",
|
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||||
"Priority: u=4",
|
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
|
||||||
"TE: trailers"]
|
}else{
|
||||||
);
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
|
["User-Agent: " . config::USER_AGENT,
|
||||||
|
"Accept: */*",
|
||||||
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||||
|
"Referer: https://yep.com/",
|
||||||
|
"Origin: https://yep.com",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Sec-Fetch-Dest: empty",
|
||||||
|
"Sec-Fetch-Mode: cors",
|
||||||
|
"Sec-Fetch-Site: same-site",
|
||||||
|
"Priority: u=4",
|
||||||
|
"TE: trailers"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
@@ -295,22 +286,17 @@ class yep{
|
|||||||
|
|
||||||
public function web($get){
|
public function web($get){
|
||||||
|
|
||||||
|
if(config::YEP_USE_API){
|
||||||
|
|
||||||
|
return $this->web_api($get);
|
||||||
|
}
|
||||||
|
|
||||||
$search = $get["s"];
|
$search = $get["s"];
|
||||||
if(strlen($search) === 0){
|
if(strlen($search) === 0){
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
throw new Exception("Search term is empty!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$country = $get["country"];
|
|
||||||
$nsfw = $get["nsfw"];
|
|
||||||
|
|
||||||
switch($nsfw){
|
|
||||||
|
|
||||||
case "yes": $nsfw = "off"; break;
|
|
||||||
case "maybe": $nsfw = "moderate"; break;
|
|
||||||
case "no": $nsfw = "strict"; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
$out = [
|
||||||
"status" => "ok",
|
"status" => "ok",
|
||||||
"spelling" => [
|
"spelling" => [
|
||||||
@@ -327,22 +313,23 @@ class yep{
|
|||||||
"related" => []
|
"related" => []
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// parse filters
|
||||||
|
$filters = [
|
||||||
|
"limit" => 100, // wwwwwwwwwwwwwww
|
||||||
|
"query" => $search,
|
||||||
|
];
|
||||||
|
|
||||||
|
if($get["nsfw"] == "no"){ $filters["safeSearch"] = "moderate"; }
|
||||||
|
if($get["lang"] != "any"){ $filters["hl"] = $get["lang"]; }
|
||||||
|
|
||||||
try{
|
try{
|
||||||
|
|
||||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
// https://api.yep.com/search?limit=20&query=asmr
|
||||||
$json =
|
$json =
|
||||||
$this->get(
|
$this->get(
|
||||||
$this->backend->get_ip(),
|
$this->backend->get_ip(),
|
||||||
"https://api.yep.com/fs/2/search",
|
"https://api.yep.com/search",
|
||||||
[
|
$filters
|
||||||
"client" => "web",
|
|
||||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
|
||||||
"limit" => "99999",
|
|
||||||
"no_correct" => "false",
|
|
||||||
"q" => $search,
|
|
||||||
"safeSearch" => $nsfw,
|
|
||||||
"type" => "web"
|
|
||||||
]
|
|
||||||
);
|
);
|
||||||
|
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
@@ -408,7 +395,7 @@ class yep{
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
"url" => $item["url"],
|
"url" => $item["url"],
|
||||||
"date" => strtotime($item["first_seen"]),
|
"date" => null,
|
||||||
"type" => "web",
|
"type" => "web",
|
||||||
"thumb" => [
|
"thumb" => [
|
||||||
"url" => null,
|
"url" => null,
|
||||||
@@ -422,83 +409,11 @@ class yep{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(isset($json[1]["featured_news"])){
|
|
||||||
|
|
||||||
foreach($json[1]["featured_news"] as $news){
|
|
||||||
|
|
||||||
$out["news"][] = [
|
|
||||||
"title" => $news["title"],
|
|
||||||
"description" =>
|
|
||||||
$this->titledots(
|
|
||||||
strip_tags(
|
|
||||||
html_entity_decode(
|
|
||||||
$news["snippet"]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
),
|
|
||||||
"date" => strtotime($news["first_seen"]),
|
|
||||||
"thumb" =>
|
|
||||||
isset($news["img"]) ?
|
|
||||||
[
|
|
||||||
"url" => $this->unshiturl($news["img"]),
|
|
||||||
"ratio" => "16:9"
|
|
||||||
] :
|
|
||||||
[
|
|
||||||
"url" => null,
|
|
||||||
"ratio" => null
|
|
||||||
],
|
|
||||||
"url" => $news["url"]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(isset($json[1]["featured_images"])){
|
|
||||||
|
|
||||||
foreach($json[1]["featured_images"] as $image){
|
|
||||||
|
|
||||||
if(
|
|
||||||
$image["width"] !== 0 &&
|
|
||||||
$image["height"] !== 0
|
|
||||||
){
|
|
||||||
|
|
||||||
$thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
|
|
||||||
$thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
|
|
||||||
|
|
||||||
$width = $image["width"];
|
|
||||||
$height = $image["height"];
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$thumb_width = null;
|
|
||||||
$thumb_height = null;
|
|
||||||
$width = null;
|
|
||||||
$height = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["image"][] = [
|
|
||||||
"title" => $image["title"],
|
|
||||||
"source" => [
|
|
||||||
[
|
|
||||||
"url" => $image["image_id"],
|
|
||||||
"width" => $width,
|
|
||||||
"height" => $height
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"url" => $image["src"],
|
|
||||||
"width" => $thumb_width,
|
|
||||||
"height" => $thumb_height
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"url" => $image["host_page"]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private function web_api($get){
|
||||||
public function image($get){
|
|
||||||
|
|
||||||
$search = $get["s"];
|
$search = $get["s"];
|
||||||
if(strlen($search) === 0){
|
if(strlen($search) === 0){
|
||||||
@@ -506,142 +421,53 @@ class yep{
|
|||||||
throw new Exception("Search term is empty!");
|
throw new Exception("Search term is empty!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$country = $get["country"];
|
|
||||||
$nsfw = $get["nsfw"];
|
|
||||||
|
|
||||||
switch($nsfw){
|
|
||||||
|
|
||||||
case "yes": $nsfw = "off"; break;
|
|
||||||
case "maybe": $nsfw = "moderate"; break;
|
|
||||||
case "no": $nsfw = "strict"; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
$out = [
|
||||||
"status" => "ok",
|
"status" => "ok",
|
||||||
|
"spelling" => [
|
||||||
|
"type" => "no_correction",
|
||||||
|
"using" => null,
|
||||||
|
"correction" => null
|
||||||
|
],
|
||||||
"npt" => null,
|
"npt" => null,
|
||||||
"image" => []
|
"answer" => [],
|
||||||
|
"web" => [],
|
||||||
|
"image" => [],
|
||||||
|
"video" => [],
|
||||||
|
"news" => [],
|
||||||
|
"related" => []
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// parse filters
|
||||||
|
$filters = [
|
||||||
|
"query" => $search,
|
||||||
|
"limit" => 100
|
||||||
|
];
|
||||||
|
|
||||||
|
if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
|
||||||
|
if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
|
||||||
|
|
||||||
|
// add api key
|
||||||
|
$key_data = $this->backend->get_key();
|
||||||
|
|
||||||
try{
|
try{
|
||||||
|
|
||||||
$json =
|
$json =
|
||||||
$this->get(
|
$this->get(
|
||||||
$this->backend->get_ip(), // no nextpage!
|
$this->backend->get_ip($key_data["increment"]),
|
||||||
"https://api.yep.com/fs/2/search",
|
"https://platform.yep.com/api/search",
|
||||||
[
|
[],
|
||||||
"client" => "web",
|
true,
|
||||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
$filters,
|
||||||
"no_correct" => "false",
|
$key_data["key"]
|
||||||
"q" => $search,
|
|
||||||
"safeSearch" => $nsfw,
|
|
||||||
"type" => "images"
|
|
||||||
]
|
|
||||||
);
|
);
|
||||||
|
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
throw new Exception("Failed to fetch JSON");
|
throw new Exception("Failed to fetch JSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->detect_cf($json);
|
// should never happen
|
||||||
|
//$this->detect_cf($json);
|
||||||
$json = json_decode($json, true);
|
|
||||||
|
|
||||||
if($json === null){
|
|
||||||
|
|
||||||
throw new Exception("Failed to decode JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
if(isset($json[1]["results"])){
|
|
||||||
foreach($json[1]["results"] as $item){
|
|
||||||
|
|
||||||
if(
|
|
||||||
$item["width"] !== 0 &&
|
|
||||||
$item["height"] !== 0
|
|
||||||
){
|
|
||||||
|
|
||||||
$thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
|
|
||||||
$thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
|
|
||||||
|
|
||||||
$width = $item["width"];
|
|
||||||
$height = $item["height"];
|
|
||||||
}else{
|
|
||||||
|
|
||||||
$thumb_width = null;
|
|
||||||
$thumb_height = null;
|
|
||||||
$width = null;
|
|
||||||
$height = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out["image"][] = [
|
|
||||||
"title" => $item["title"],
|
|
||||||
"source" => [
|
|
||||||
[
|
|
||||||
"url" => $item["image_id"],
|
|
||||||
"width" => $width,
|
|
||||||
"height" => $height
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"url" => $item["src"],
|
|
||||||
"width" => $thumb_width,
|
|
||||||
"height" => $thumb_height
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"url" => $item["host_page"]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public function news($get){
|
|
||||||
|
|
||||||
$search = $get["s"];
|
|
||||||
if(strlen($search) === 0){
|
|
||||||
|
|
||||||
throw new Exception("Search term is empty!");
|
|
||||||
}
|
|
||||||
|
|
||||||
$country = $get["country"];
|
|
||||||
$nsfw = $get["nsfw"];
|
|
||||||
|
|
||||||
switch($nsfw){
|
|
||||||
|
|
||||||
case "yes": $nsfw = "off"; break;
|
|
||||||
case "maybe": $nsfw = "moderate"; break;
|
|
||||||
case "no": $nsfw = "strict"; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [
|
|
||||||
"status" => "ok",
|
|
||||||
"npt" => null,
|
|
||||||
"news" => []
|
|
||||||
];
|
|
||||||
|
|
||||||
try{
|
|
||||||
|
|
||||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
|
||||||
$json =
|
|
||||||
$this->get(
|
|
||||||
$this->backend->get_ip(),
|
|
||||||
"https://api.yep.com/fs/2/search",
|
|
||||||
[
|
|
||||||
"client" => "web",
|
|
||||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
|
||||||
"limit" => "99999",
|
|
||||||
"no_correct" => "false",
|
|
||||||
"q" => $search,
|
|
||||||
"safeSearch" => $nsfw,
|
|
||||||
"type" => "news"
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}catch(Exception $error){
|
|
||||||
|
|
||||||
throw new Exception("Failed to fetch JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->detect_cf($json);
|
|
||||||
|
|
||||||
$json = json_decode($json, true);
|
$json = json_decode($json, true);
|
||||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||||
@@ -651,34 +477,53 @@ class yep{
|
|||||||
throw new Exception("Failed to decode JSON");
|
throw new Exception("Failed to decode JSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(isset($json[1]["results"])){
|
if(isset($json["error"])){
|
||||||
foreach($json[1]["results"] as $item){
|
|
||||||
|
|
||||||
$out["news"][] = [
|
throw new Exception("Yep API returned an error: " . $json["error"]);
|
||||||
"title" => $item["title"],
|
}
|
||||||
"author" => null,
|
|
||||||
"description" =>
|
if(isset($json["errors"])){
|
||||||
$this->titledots(
|
|
||||||
strip_tags(
|
throw new Exception("Yep API returned the following errors: {$json["message"]}");
|
||||||
html_entity_decode(
|
}
|
||||||
$item["snippet"]
|
|
||||||
)
|
if(
|
||||||
)
|
isset($json["success"]) &&
|
||||||
),
|
$json["success"] !== true
|
||||||
"date" => strtotime($item["first_seen"]),
|
){
|
||||||
"thumb" =>
|
|
||||||
isset($item["img"]) ?
|
throw new Exception("Yep API returned a false-y success value");
|
||||||
[
|
}
|
||||||
"url" => $this->unshiturl($item["img"]),
|
|
||||||
"ratio" => "16:9"
|
if(!isset($json["results"])){
|
||||||
] :
|
|
||||||
[
|
throw new Exception("Yep API did not return a results object");
|
||||||
"url" => null,
|
}
|
||||||
"ratio" => null
|
|
||||||
],
|
foreach($json["results"] as $item){
|
||||||
"url" => $item["url"]
|
|
||||||
];
|
if(
|
||||||
|
$item["url"] === null ||
|
||||||
|
$item["url"] == ""
|
||||||
|
){
|
||||||
|
|
||||||
|
// sometimes API fucks up
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$out["web"][] = [
|
||||||
|
"title" => $item["title"],
|
||||||
|
"description" => $item["description"],
|
||||||
|
"url" => $item["url"],
|
||||||
|
"date" => null,
|
||||||
|
"type" => "web",
|
||||||
|
"thumb" => [
|
||||||
|
"url" => null,
|
||||||
|
"ratio" => null
|
||||||
|
],
|
||||||
|
"sublink" => [],
|
||||||
|
"table" => []
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
return $out;
|
return $out;
|
||||||
|
|||||||
24
settings.php
24
settings.php
@@ -161,14 +161,6 @@ $settings = [
|
|||||||
"value" => "yep",
|
"value" => "yep",
|
||||||
"text" => "Yep"
|
"text" => "Yep"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
"value" => "greppr",
|
|
||||||
"text" => "Greppr"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"value" => "crowdview",
|
|
||||||
"text" => "Crowdview"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
"value" => "mwmbl",
|
"value" => "mwmbl",
|
||||||
"text" => "Mwmbl"
|
"text" => "Mwmbl"
|
||||||
@@ -196,10 +188,6 @@ $settings = [
|
|||||||
[
|
[
|
||||||
"value" => "wiby",
|
"value" => "wiby",
|
||||||
"text" => "wiby"
|
"text" => "wiby"
|
||||||
],
|
|
||||||
[
|
|
||||||
"value" => "curlie",
|
|
||||||
"text" => "Curlie"
|
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@@ -223,6 +211,10 @@ $settings = [
|
|||||||
"value" => "google",
|
"value" => "google",
|
||||||
"text" => "Google"
|
"text" => "Google"
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
"value" => "google_api",
|
||||||
|
"text" => "Google API"
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"value" => "google_cse",
|
"value" => "google_cse",
|
||||||
"text" => "Google CSE"
|
"text" => "Google CSE"
|
||||||
@@ -239,10 +231,6 @@ $settings = [
|
|||||||
"value" => "qwant",
|
"value" => "qwant",
|
||||||
"text" => "Qwant"
|
"text" => "Qwant"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
"value" => "yep",
|
|
||||||
"text" => "Yep"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
"value" => "baidu",
|
"value" => "baidu",
|
||||||
"text" => "Baidu"
|
"text" => "Baidu"
|
||||||
@@ -379,10 +367,6 @@ $settings = [
|
|||||||
"value" => "qwant",
|
"value" => "qwant",
|
||||||
"text" => "Qwant"
|
"text" => "Qwant"
|
||||||
],
|
],
|
||||||
[
|
|
||||||
"value" => "yep",
|
|
||||||
"text" => "Yep"
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
"value" => "mojeek",
|
"value" => "mojeek",
|
||||||
"text" => "Mojeek"
|
"text" => "Mojeek"
|
||||||
|
|||||||
Reference in New Issue
Block a user