Compare commits
18 Commits
60d6f649ee
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 95819bfe52 | |||
| e1e92d715e | |||
| 394f401921 | |||
| 25e8095d0d | |||
| cf3c77ed04 | |||
| c45f8b1e12 | |||
| 6086c63148 | |||
| d2b0a414ad | |||
| c713d52b5f | |||
| 0861450b8a | |||
| 88012f6ae2 | |||
| 0dabcea0aa | |||
| a8022d22a7 | |||
| 9ea0372bb7 | |||
| a54f212550 | |||
| b1f5974e40 | |||
| e63a17d6db | |||
| 4349bf232d |
11
Dockerfile
11
Dockerfile
@@ -1,8 +1,17 @@
|
||||
FROM lwthiker/curl-impersonate:0.6.1-ff-alpine AS curl-impersonate
|
||||
|
||||
FROM alpine:3.21
|
||||
WORKDIR /var/www/html/4get
|
||||
|
||||
RUN apk update && apk upgrade
|
||||
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg
|
||||
RUN apk add php apache2-ssl php84-fileinfo php84-openssl php84-iconv php84-common php84-dom php84-sodium php84-curl curl php84-pecl-apcu php84-apache2 imagemagick php84-pecl-imagick php84-mbstring imagemagick-webp imagemagick-jpeg nss ca-certificates
|
||||
|
||||
COPY --from=curl-impersonate /usr/local/bin /usr/local/bin
|
||||
COPY --from=curl-impersonate /usr/local/lib /usr/local/lib
|
||||
|
||||
ENV LD_PRELOAD=/usr/local/lib/libcurl-impersonate-ff.so
|
||||
ENV CURL_IMPERSONATE=ff117
|
||||
ENV CURL_IMPERSONATE_HEADERS=no
|
||||
|
||||
COPY . .
|
||||
|
||||
|
||||
52
README.md
52
README.md
@@ -13,11 +13,11 @@ _NOT to be confused with 4get.ch, 4get.lol and friends! I **don't** host these._
|
||||
|
||||
## Totally unbiased comparison between alternatives
|
||||
|
||||
| | 4get | searx(ng) | libreY | araa | hearch.co |
|
||||
|----------------------------|-------------------------|-----------|-------------|-----------|-------------------|
|
||||
| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | idk |
|
||||
| Does it suck | no (debunked by snopes) | yes | yes | a little | better than searx |
|
||||
| Does it work | ye | sometimes | sometimes | sometimes | yes |
|
||||
| | 4get | searx(ng) | whoogle | degoog |
|
||||
|----------------------------|-------------------------|-----------|------------|--------------------------------------|
|
||||
| RAM usage | 100-400mb~ | 400mb-1GB | 100mb | 200mb-1GB |
|
||||
| Does it suck | no (debunked by snopes) | yes | kind of? | its kinda cool but no search filters |
|
||||
| Does it work | ye | lmao | shits dead | works right now... |
|
||||
|
||||
## Features
|
||||
1. Rotating proxies on a per-scraper basis
|
||||
@@ -31,25 +31,29 @@ tl;dr 4get is the best way to browse for shit.
|
||||
|
||||
# Supported websites
|
||||
|
||||
| Web | Images | Videos | News | Music | Autocompleter |
|
||||
|------------|--------------|--------------|------------|------------|---------------|
|
||||
| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave |
|
||||
| Brave | Brave | Sepia Search | Brave | | DuckDuckGo |
|
||||
| Yandex | Yandex | DuckDuckGo | Google | | Yandex |
|
||||
| Google | Google | Brave | Startpage | | Google |
|
||||
| Startpage | Startpage | Yandex | Qwant | | Startpage |
|
||||
| Qwant | Qwant | Google | Mojeek | | Kagi |
|
||||
| Ghostery | Yep | Startpage | Baidu | | Qwant |
|
||||
| Yep | Baidu | Qwant | | | Ghostery |
|
||||
| Greppr | Pinterest | Baidu | | | Yep |
|
||||
| Crowdview | 500px | Coc Coc | | | Marginalia |
|
||||
| Mwmbl | VSCO | | | | YouTube |
|
||||
| Mojeek | Imgur | | | | Soundcloud |
|
||||
| Baidu | FindThatMeme | | | | |
|
||||
| Coc Coc | | | | | |
|
||||
| Marginalia | | | | | |
|
||||
| wiby | | | | | |
|
||||
| Curlie | | | | | |
|
||||
| web | images | videos | news | music | autocomplete |
|
||||
|--------------|--------------|--------------|--------------|------------|--------------|
|
||||
| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | SoundCloud | Brave |
|
||||
| Brave | Yandex | Vimeo | Brave | Swisscows | DuckDuckGo |
|
||||
| Yandex | Brave | Sepia Search | Google | | Yandex |
|
||||
| Google | Google | DuckDuckGo | Yahoo! JAPAN | | Google |
|
||||
| Google API | Google API | Brave | Startpage | | Startpage |
|
||||
| Google CSE | Google CSE | Yandex | Qwant | | Kagi |
|
||||
| Yahoo! JAPAN | Yahoo! JAPAN | Google | Mojeek | | Qwant |
|
||||
| Startpage | Startpage | Yahoo! JAPAN | Baidu | | Ghostery |
|
||||
| Qwant | Qwant | Startpage | | | Yep |
|
||||
| Ghostery | Baidu | Qwant | | | Marginalia |
|
||||
| Yep | Solofield | Baidu | | | YouTube |
|
||||
| Mwmbl | Pinterest | Coc Coc | | | SoundCloud |
|
||||
| Mojeek | Cara | Solofield | | | |
|
||||
| Baidu | Flickr | | | | |
|
||||
| Coc Coc | Pexels | | | | |
|
||||
| Solofield | Pixabay | | | | |
|
||||
| Marginalia | Unsplash | | | | |
|
||||
| wiby | 500px | | | | |
|
||||
| | VSCO | | | | |
|
||||
| | Imgur | | | | |
|
||||
| | FindThatMeme | | | | |
|
||||
|
||||
# Installation
|
||||
Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>.
|
||||
|
||||
1
data/api_keys/yep.txt
Normal file
1
data/api_keys/yep.txt
Normal file
@@ -0,0 +1 @@
|
||||
# Paste Yep API keys here
|
||||
@@ -23,6 +23,16 @@ class config{
|
||||
// Enable the API?
|
||||
const API_ENABLED = true;
|
||||
|
||||
//
|
||||
// 4play (session provider)
|
||||
//
|
||||
// Enable 4play API?
|
||||
const FPLAY_ENABLE_API = true;
|
||||
|
||||
// 4play password. Please set this to something secure if you enable the 4play API.
|
||||
// This password is used to POST sessions to /api/v2/provide_sesh
|
||||
const FPLAY_PASSWORD = "1234";
|
||||
|
||||
//
|
||||
// BOT PROTECTION
|
||||
//
|
||||
@@ -118,10 +128,10 @@ class config{
|
||||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0";
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
|
||||
|
||||
// User agent to use with 4get-friendly APIs
|
||||
const USER_AGENT_FRIENDLY = "4get-scrapist";
|
||||
const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
@@ -131,7 +141,6 @@ class config{
|
||||
const PROXY_YAHOO = false;
|
||||
const PROXY_YAHOO_JAPAN = false;
|
||||
const PROXY_BRAVE = false;
|
||||
const PROXY_FB = false; // facebook
|
||||
const PROXY_GOOGLE = false;
|
||||
const PROXY_GOOGLE_API = false;
|
||||
const PROXY_GOOGLE_CSE = false;
|
||||
@@ -155,7 +164,6 @@ class config{
|
||||
const PROXY_VIMEO = false;
|
||||
const PROXY_YEP = false;
|
||||
const PROXY_PINTEREST = false;
|
||||
const PROXY_SANKAKUCOMPLEX = false;
|
||||
const PROXY_FLICKR = false;
|
||||
const PROXY_PIXABAY = false;
|
||||
const PROXY_UNSPLASH = false;
|
||||
@@ -164,8 +172,6 @@ class config{
|
||||
const PROXY_VSCO = false;
|
||||
const PROXY_SEZNAM = false;
|
||||
const PROXY_NAVER = false;
|
||||
const PROXY_GREPPR = false;
|
||||
const PROXY_CROWDVIEW = false;
|
||||
const PROXY_MWMBL = false;
|
||||
const PROXY_FTM = false; // findthatmeme
|
||||
const PROXY_IMGUR = false;
|
||||
@@ -173,6 +179,11 @@ class config{
|
||||
const PROXY_YANDEX_W = false; // yandex web
|
||||
const PROXY_YANDEX_I = false; // yandex images
|
||||
const PROXY_YANDEX_V = false; // yandex videos
|
||||
const PROXY_SAFEBOORU = false;
|
||||
const PROXY_KONACHAN = false;
|
||||
const PROXY_YANDERE = false;
|
||||
const PROXY_TBIB = false;
|
||||
const PROXY_GELBOORU = false;
|
||||
|
||||
//
|
||||
// Scraper-specific parameters
|
||||
@@ -185,4 +196,7 @@ class config{
|
||||
// Use "null" to default out to HTML scraping OR specify a string to
|
||||
// use the API (Eg: "public"). API has less filters.
|
||||
const MARGINALIA_API_KEY = null;
|
||||
|
||||
// Yep
|
||||
const YEP_USE_API = false;
|
||||
}
|
||||
|
||||
@@ -133,6 +133,9 @@ class bot_protection{
|
||||
$answers[] = $regex;
|
||||
}
|
||||
|
||||
// dedup
|
||||
$answers = array_unique($answers);
|
||||
|
||||
if(
|
||||
!$invalid &&
|
||||
$key !== false // has captcha been gen'd?
|
||||
|
||||
@@ -2,6 +2,52 @@
|
||||
|
||||
class frontend{
|
||||
|
||||
public function validateurl($url, $net_validate = false){
|
||||
|
||||
$url_parts = parse_url($url);
|
||||
|
||||
// check if required parts are there
|
||||
if(
|
||||
!isset($url_parts["scheme"]) ||
|
||||
!(
|
||||
$url_parts["scheme"] == "http" ||
|
||||
$url_parts["scheme"] == "https"
|
||||
) ||
|
||||
!isset($url_parts["host"])
|
||||
){
|
||||
return false;
|
||||
}
|
||||
|
||||
if($net_validate){
|
||||
$ip =
|
||||
str_replace(
|
||||
["[", "]"], // handle ipv6
|
||||
"",
|
||||
$url_parts["host"]
|
||||
);
|
||||
|
||||
// if its not an IP
|
||||
if(!filter_var($ip, FILTER_VALIDATE_IP)){
|
||||
|
||||
// resolve domain's IP
|
||||
$ip = gethostbyname($url_parts["host"] . ".");
|
||||
}
|
||||
|
||||
// check if its localhost
|
||||
if(
|
||||
filter_var(
|
||||
$ip,
|
||||
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
|
||||
) === false
|
||||
){
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public function load($template, $replacements = []){
|
||||
|
||||
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
|
||||
@@ -600,16 +646,13 @@ class frontend{
|
||||
"qwant" => "Qwant",
|
||||
"ghostery" => "Ghostery",
|
||||
"yep" => "Yep",
|
||||
"greppr" => "Greppr",
|
||||
"crowdview" => "Crowdview",
|
||||
"mwmbl" => "Mwmbl",
|
||||
"mojeek" => "Mojeek",
|
||||
"baidu" => "Baidu",
|
||||
"coccoc" => "Cốc Cốc",
|
||||
"solofield" => "Solofield",
|
||||
"marginalia" => "Marginalia",
|
||||
"wiby" => "wiby",
|
||||
"curlie" => "Curlie"
|
||||
"wiby" => "wiby"
|
||||
]
|
||||
];
|
||||
break;
|
||||
@@ -622,11 +665,11 @@ class frontend{
|
||||
"yandex" => "Yandex",
|
||||
"brave" => "Brave",
|
||||
"google" => "Google",
|
||||
"google_api" => "Google API",
|
||||
"google_cse" => "Google CSE",
|
||||
"yahoo_japan" => "Yahoo! JAPAN",
|
||||
"startpage" => "Startpage",
|
||||
"qwant" => "Qwant",
|
||||
"yep" => "Yep",
|
||||
"baidu" => "Baidu",
|
||||
"solofield" => "Solofield",
|
||||
"pinterest" => "Pinterest",
|
||||
@@ -638,8 +681,7 @@ class frontend{
|
||||
"fivehpx" => "500px",
|
||||
"vsco" => "VSCO",
|
||||
"imgur" => "Imgur",
|
||||
"ftm" => "FindThatMeme",
|
||||
//"sankakucomplex" => "SankakuComplex"
|
||||
"ftm" => "FindThatMeme"
|
||||
]
|
||||
];
|
||||
break;
|
||||
@@ -678,7 +720,6 @@ class frontend{
|
||||
"yahoo_japan" => "Yahoo! JAPAN",
|
||||
"startpage" => "Startpage",
|
||||
"qwant" => "Qwant",
|
||||
"yep" => "Yep",
|
||||
"mojeek" => "Mojeek",
|
||||
"baidu" => "Baidu"
|
||||
]
|
||||
@@ -695,6 +736,22 @@ class frontend{
|
||||
]
|
||||
];
|
||||
break;
|
||||
|
||||
case "booru":
|
||||
$filters["scraper"] = [
|
||||
"display" => "Scraper",
|
||||
"option" => [
|
||||
"safebooru" => "Safebooru",
|
||||
"konachan" => "Konachan",
|
||||
"tbib" => "The Big Imageboard",
|
||||
"gelbooru" => "Gelbooru",
|
||||
"yandere" => "Yande.re",
|
||||
"tbib" => "The Big Imageboard",
|
||||
"sankakucomplex" => "SankakuComplex",
|
||||
"soybooru" => "SoyBooru"
|
||||
]
|
||||
];
|
||||
break;
|
||||
}
|
||||
|
||||
// get scraper name from user input, or default out to preferred scraper
|
||||
@@ -871,6 +928,7 @@ class frontend{
|
||||
|
||||
$html = null;
|
||||
|
||||
//foreach(["web", "images", "videos", "news", "music", "booru"] as $type){
|
||||
foreach(["web", "images", "videos", "news", "music"] as $type){
|
||||
|
||||
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
|
||||
|
||||
@@ -553,28 +553,21 @@ class fuckhtml{
|
||||
|
||||
case "\"":
|
||||
case "'":
|
||||
if(
|
||||
$i !== 0 && // only check if a quote could be there
|
||||
(
|
||||
(
|
||||
$json[$i - 1] === "\\" &&
|
||||
(
|
||||
$i === 2 ||
|
||||
$json[$i - 2] === "\\"
|
||||
)
|
||||
) ||
|
||||
$json[$i - 1] !== "\\"
|
||||
)
|
||||
){
|
||||
// found a non-escaped quote
|
||||
// count preceding backslashes
|
||||
$bsCount = 0;
|
||||
$j = $i - 1;
|
||||
|
||||
while($j >= 0 && $json[$j] === "\\"){
|
||||
$bsCount++;
|
||||
$j--;
|
||||
}
|
||||
|
||||
// quote is NOT escaped if even number of backslashes
|
||||
if($bsCount % 2 === 0){
|
||||
if($in_quote === null){
|
||||
|
||||
// open quote
|
||||
$in_quote = $json[$i];
|
||||
|
||||
}elseif($in_quote === $json[$i]){
|
||||
|
||||
// close quote
|
||||
$in_quote = null;
|
||||
}
|
||||
|
||||
@@ -347,11 +347,8 @@ class brave{
|
||||
$q["spellcheck"] = "0";
|
||||
}
|
||||
}
|
||||
/*
|
||||
$handle = fopen("scraper/brave.html", "r");
|
||||
$html = fread($handle, filesize("scraper/brave.html"));
|
||||
fclose($handle);*/
|
||||
|
||||
//$html = file_get_contents("scraper/brave.html");
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
<?php
|
||||
|
||||
class crowdview{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("crowdview");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
try{
|
||||
$json = $this->get(
|
||||
$proxy,
|
||||
"https://crowdview-next-js.onrender.com/api/search-v3",
|
||||
[
|
||||
"query" => $search
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === NULL){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
foreach($json["results"] as $item){
|
||||
|
||||
$description = explode("<b>", $item["snippet"], 2);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->sanitize($item["title"]),
|
||||
"description" => $this->sanitize($description[1]),
|
||||
"url" => $item["link"],
|
||||
"date" => strtotime($description[0]),
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function sanitize($html){
|
||||
|
||||
return
|
||||
trim(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
html_entity_decode(
|
||||
$html
|
||||
)
|
||||
),
|
||||
". "
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,309 +0,0 @@
|
||||
<?php
|
||||
|
||||
class curlie{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("curlie");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if($page != "web"){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
"lang" => [
|
||||
"display" => "Language",
|
||||
"option" => [
|
||||
"any" => "Any language",
|
||||
"en" => "English",
|
||||
"de" => "German",
|
||||
"fr" => "French",
|
||||
"ja" => "Japanese",
|
||||
"it" => "Italian",
|
||||
"es" => "Spanish",
|
||||
"ru" => "Russian",
|
||||
"nl" => "Dutch",
|
||||
"pl" => "Polish",
|
||||
"tr" => "Turkish",
|
||||
"da" => "Danish",
|
||||
"sv" => "Swedish",
|
||||
"no" => "Norwegian",
|
||||
"is" => "Icelandic",
|
||||
"fo" => "Faroese",
|
||||
"fi" => "Finnish",
|
||||
"et" => "Estonian",
|
||||
"lt" => "Lithuanian",
|
||||
"lv" => "Latvian",
|
||||
"cy" => "Welsh",
|
||||
"ga" => "Irish",
|
||||
"gd" => "Scottish Gaelic",
|
||||
"br" => "Breton",
|
||||
"fy" => "Frisian",
|
||||
"frr" => "North Frisian",
|
||||
"gem" => "Saterland Frisian",
|
||||
"lb" => "Luxembourgish",
|
||||
"rm" => "Romansh",
|
||||
"pt" => "Portuguese",
|
||||
"ca" => "Catalan",
|
||||
"gl" => "Galician",
|
||||
"eu" => "Basque",
|
||||
"ast" => "Asturian",
|
||||
"an" => "Aragonese",
|
||||
"fur" => "Friulan",
|
||||
"sc" => "Sardinian",
|
||||
"scn" => "Sicilian",
|
||||
"oc" => "Occitan",
|
||||
"be" => "Belarusian",
|
||||
"cs" => "Czech",
|
||||
"hu" => "Hungarian",
|
||||
"sk" => "Slovak",
|
||||
"uk" => "Ukrainian",
|
||||
"csb" => "Kashubian",
|
||||
"tt" => "Tatar",
|
||||
"ba" => "Bashkir",
|
||||
"os" => "Ossetian",
|
||||
"sl" => "Slovene",
|
||||
"sr" => "Serbian",
|
||||
"hr" => "Croatian",
|
||||
"bs" => "Bosnian",
|
||||
"bg" => "Bulgarian",
|
||||
"sq" => "Albanian",
|
||||
"ro" => "Romanian",
|
||||
"mk" => "Macedonian",
|
||||
"el" => "Greek",
|
||||
"iw" => "Hebrew",
|
||||
"fa" => "Persian",
|
||||
"ar" => "Arabic",
|
||||
"ku" => "Kurdish",
|
||||
"az" => "Azerbaijani",
|
||||
"hy" => "Armenian",
|
||||
"af" => "Afrikaans",
|
||||
"sw" => "Kiswahili",
|
||||
"uz" => "Uzbek",
|
||||
"kk" => "Kazakh",
|
||||
"ky" => "Kyrgyz",
|
||||
"tg" => "Tajik",
|
||||
"tk" => "Turkmen",
|
||||
"ug" => "Uyghurche",
|
||||
"hi" => "Hindi",
|
||||
"si" => "Sinhalese",
|
||||
"gu" => "Gujarati",
|
||||
"ur" => "Urdu",
|
||||
"mr" => "Marathi",
|
||||
"pa" => "Punjabi",
|
||||
"bn" => "Bengali",
|
||||
"ta" => "Tamil",
|
||||
"te" => "Telugu",
|
||||
"kn" => "Kannada",
|
||||
"zh_CN" => "Chinese Simplified",
|
||||
"zh_TW" => "Chinese Traditional",
|
||||
"ko" => "Korean",
|
||||
"cfr" => "Taiwanese",
|
||||
"th" => "Thai",
|
||||
"vi" => "Vietnamese",
|
||||
"in" => "Indonesian",
|
||||
"ms" => "Malay",
|
||||
"tl" => "Tagalog",
|
||||
"eo" => "Esperanto",
|
||||
"ia" => "Interlingua",
|
||||
"la" => "Latin"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://curlie.org/" . $query,
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
}else{
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$query = [
|
||||
"q" => $get["s"],
|
||||
"start" => 0,
|
||||
"stime" => 92452189 // ?
|
||||
];
|
||||
|
||||
if($get["lang"] !== "any"){
|
||||
|
||||
$query["lang"] = $get["lang"];
|
||||
}
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://curlie.org/search",
|
||||
$query
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$nextpage =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"next-page",
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($nextpage) !== 0){
|
||||
|
||||
$nextpage =
|
||||
$this->backend->store(
|
||||
$nextpage[0]["attributes"]["href"],
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}else{
|
||||
|
||||
$nextpage = null;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => $nextpage,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$items =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"site-item",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($items as $item){
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"target",
|
||||
"_blank",
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName("site-descr");
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
);
|
||||
}else{
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@ class google{
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("google");
|
||||
|
||||
$this->message = "Still working on a Google scraper that uses a headful browser. It will require Firefox + a webExtension running on a dedicated server. Waiting for my EDID adapter and we can get the show going. In the meantime, use the Google CSE/API or Yahoo JP/Startpage scrapers. They're all crippled in their own special ways but they're serviceable I guess.";
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
@@ -505,7 +507,7 @@ class google{
|
||||
}
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $alt_ua = false){
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
@@ -518,18 +520,6 @@ class google{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
if($alt_ua === true){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
||||
"User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 26_0_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
||||
"Accept: text/html, application/xml;q=0.9, */*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.8",
|
||||
"Accept-Encoding: gzip, deflate",
|
||||
"Connection: Keep-Alive",
|
||||
"Cache-Control: no-cache"
|
||||
]);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
@@ -546,7 +536,6 @@ class google{
|
||||
"Priority: u=1",
|
||||
"TE: trailers"
|
||||
]);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
@@ -574,228 +563,22 @@ class google{
|
||||
|
||||
public function web($get){
|
||||
|
||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
||||
}
|
||||
|
||||
|
||||
public function video($get){
|
||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
||||
}
|
||||
|
||||
|
||||
public function news($get){
|
||||
throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
|
||||
throw new Exception($this->message);
|
||||
}
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
// generate parameters
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
);
|
||||
|
||||
$params = json_decode($params, true);
|
||||
|
||||
$page = $params["page"] + 1;
|
||||
$params = $params["params"];
|
||||
$params["async"] = "_fmt:json,p:1,ijn:{$page}";
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
throw new Exception($this->message);
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
$time = $get["time"];
|
||||
$size = $get["size"];
|
||||
$ratio = $get["ratio"];
|
||||
$color = $get["color"];
|
||||
$type = $get["type"];
|
||||
$format = $get["format"];
|
||||
$rights = $get["rights"];
|
||||
|
||||
$page = 0;
|
||||
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"tbm" => "isch",
|
||||
"asearch" => "isch",
|
||||
"async" => "_fmt:json,p:0,ijn:{$page}", // ijn:0 = page 1
|
||||
];
|
||||
|
||||
// country (image search uses cr instead of gl)
|
||||
if($country != "any"){
|
||||
|
||||
$params["cr"] = "country" . strtoupper($country);
|
||||
public function video($get){
|
||||
throw new Exception($this->message);
|
||||
}
|
||||
|
||||
// nsfw
|
||||
$params["safe"] = $nsfw == "yes" ? "off" : "active";
|
||||
|
||||
// generate tbs
|
||||
$tbs = [];
|
||||
|
||||
// time
|
||||
if($time != "any"){
|
||||
|
||||
$tbs["qdr"] = $time;
|
||||
}
|
||||
|
||||
// size
|
||||
if($size != "any"){
|
||||
|
||||
$params["imgsz"] = $size;
|
||||
}
|
||||
|
||||
// ratio
|
||||
if($ratio != "any"){
|
||||
|
||||
$params["imgar"] = $ratio;
|
||||
}
|
||||
|
||||
// color
|
||||
if($color != "any"){
|
||||
|
||||
if(
|
||||
$color == "color" ||
|
||||
$color == "trans"
|
||||
){
|
||||
|
||||
$params["imgc"] = $color;
|
||||
}elseif($color == "bnw"){
|
||||
|
||||
$params["imgc"] = "gray";
|
||||
}else{
|
||||
|
||||
$tbs["ic"] = "specific";
|
||||
$tbs["isc"] = $color;
|
||||
}
|
||||
}
|
||||
|
||||
// type
|
||||
if($type != "any"){
|
||||
|
||||
$tbs["itp"] = $type;
|
||||
}
|
||||
|
||||
// format
|
||||
if($format != "any"){
|
||||
|
||||
$params["as_filetype"] = $format;
|
||||
}
|
||||
|
||||
// rights (tbs)
|
||||
if($rights != "any"){
|
||||
|
||||
$tbs["sur"] = $rights;
|
||||
}
|
||||
|
||||
// append tbs
|
||||
if(count($tbs) !== 0){
|
||||
|
||||
$params["tbs"] = "";
|
||||
|
||||
foreach($tbs as $key => $value){
|
||||
|
||||
$params["tbs"] .= $key . ":" . $value . ",";
|
||||
}
|
||||
|
||||
$params["tbs"] = rtrim($params["tbs"], ",");
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://www.google.com/search",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get search page");
|
||||
}
|
||||
|
||||
unset($params["async"]);
|
||||
|
||||
//$json = file_get_contents("scraper/google.json");
|
||||
|
||||
// detect captcha
|
||||
$this->fuckhtml->load($json);
|
||||
$this->detect_sorry();
|
||||
|
||||
// remove xssi
|
||||
$json =
|
||||
preg_replace(
|
||||
'/^[^{]*/',
|
||||
"",
|
||||
$json
|
||||
);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if(!isset($json["ischj"]["metadata"])){
|
||||
|
||||
throw new Exception("Google did not return an image array");
|
||||
}
|
||||
|
||||
foreach($json["ischj"]["metadata"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $this->titledots($image["result"]["page_title"]),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["original_image"]["url"],
|
||||
"width" => (int)$image["original_image"]["width"],
|
||||
"height" => (int)$image["original_image"]["height"]
|
||||
],
|
||||
[
|
||||
"url" => $image["thumbnail"]["url"],
|
||||
"width" => (int)$image["thumbnail"]["width"],
|
||||
"height" => (int)$image["thumbnail"]["height"]
|
||||
]
|
||||
],
|
||||
"url" => $image["result"]["referrer_url"]
|
||||
];
|
||||
}
|
||||
|
||||
$page++;
|
||||
|
||||
if(count($out["image"]) === 10){
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"params" => $params,
|
||||
"page" => $page
|
||||
]),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
public function news($get){
|
||||
throw new Exception($this->message);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -264,6 +264,25 @@ class google_api{
|
||||
"yes" => "Yes", // safe=active
|
||||
"no" => "No" // safe=off
|
||||
]
|
||||
],
|
||||
"sort" => [ // sort
|
||||
"display" => "Sort by",
|
||||
"option" => [
|
||||
"any" => "Any order",
|
||||
"date:d" => "Oldest",
|
||||
"date:a" => "Newest"
|
||||
]
|
||||
],
|
||||
"newer" => [
|
||||
"display" => "Newer than",
|
||||
"option" => "_DATE"
|
||||
],
|
||||
"rm_dupes" => [ // filter
|
||||
"display" => "Remove duplicates",
|
||||
"option" => [
|
||||
"yes" => "Yes", // 1
|
||||
"no" => "No" // 0
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
@@ -313,109 +332,29 @@ class google_api{
|
||||
"zh-CN" => "Chinese (Simplified)",
|
||||
"zh-TW" => "Chinese (Traditional)"
|
||||
]
|
||||
],
|
||||
"sort" => [
|
||||
"display" => "Sort by",
|
||||
"option" => [
|
||||
"any" => "Any order",
|
||||
"date:d" => "Oldest",
|
||||
"date:a" => "Newest"
|
||||
]
|
||||
],
|
||||
"newer" => [
|
||||
"display" => "Newer than",
|
||||
"option" => "_DATE"
|
||||
],
|
||||
"rm_dupes" => [
|
||||
"display" => "Remove duplicates",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
/*
|
||||
|
||||
case "images":
|
||||
return array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [ // tbs=qdr:<time>
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"d" => "Past 24 hours",
|
||||
"w" => "Past week",
|
||||
"m" => "Past month",
|
||||
"y" => "Past year"
|
||||
]
|
||||
],
|
||||
"size" => [ // imgsz
|
||||
"size" => [ // imgSize
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"l" => "Large",
|
||||
"m" => "Medium",
|
||||
"i" => "Icon",
|
||||
"qsvga" => "Larger than 400x300",
|
||||
"vga" => "Larger than 640x480",
|
||||
"svga" => "Larger than 800x600",
|
||||
"xga" => "Larger than 1024x768",
|
||||
"2mp" => "Larger than 2MP",
|
||||
"4mp" => "Larger than 4MP",
|
||||
"6mp" => "Larger than 6MP",
|
||||
"8mp" => "Larger than 8MP",
|
||||
"10mp" => "Larger than 10MP",
|
||||
"12mp" => "Larger than 12MP",
|
||||
"15mp" => "Larger than 15MP",
|
||||
"20mp" => "Larger than 20MP",
|
||||
"40mp" => "Larger than 40MP",
|
||||
"70mp" => "Larger than 70MP"
|
||||
"icon" => "Icon",
|
||||
"small" => "Small",
|
||||
"medium" => "Medium",
|
||||
"large" => "Large",
|
||||
"xlarge" => "X-Large",
|
||||
"xxlarge" => "XX-Large",
|
||||
"huge" => "Huge"
|
||||
]
|
||||
],
|
||||
"ratio" => [ // imgar
|
||||
"display" => "Aspect ratio",
|
||||
"option" => [
|
||||
"any" => "Any ratio",
|
||||
"t|xt" => "Tall",
|
||||
"s" => "Square",
|
||||
"w" => "Wide",
|
||||
"xw" => "Panoramic"
|
||||
]
|
||||
],
|
||||
"color" => [ // imgc
|
||||
"display" => "Color",
|
||||
"option" => [
|
||||
"any" => "Any color",
|
||||
"color" => "Full color",
|
||||
"bnw" => "Black & white",
|
||||
"trans" => "Transparent",
|
||||
// from here, imgcolor
|
||||
"red" => "Red",
|
||||
"orange" => "Orange",
|
||||
"yellow" => "Yellow",
|
||||
"green" => "Green",
|
||||
"teal" => "Teal",
|
||||
"blue" => "Blue",
|
||||
"purple" => "Purple",
|
||||
"pink" => "Pink",
|
||||
"white" => "White",
|
||||
"gray" => "Gray",
|
||||
"black" => "Black",
|
||||
"brown" => "Brown"
|
||||
]
|
||||
],
|
||||
"type" => [ // tbs=itp:<type>
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "Any type",
|
||||
"clipart" => "Clip Art",
|
||||
"lineart" => "Line Drawing",
|
||||
"animated" => "Animated"
|
||||
]
|
||||
],
|
||||
"format" => [ // as_filetype
|
||||
"format" => [ // fileType
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
@@ -429,17 +368,55 @@ class google_api{
|
||||
"craw" => "RAW"
|
||||
]
|
||||
],
|
||||
"rights" => [ // tbs=sur:<rights>
|
||||
"color" => [
|
||||
"display" => "Color",
|
||||
"option" => [
|
||||
"any" => "Any color",
|
||||
|
||||
"color" => "Full color", // imgColorType
|
||||
"mono" => "Black & White",
|
||||
"trans" => "Transparent background",
|
||||
|
||||
"red" => "Red", // imgDominantColor
|
||||
"orange" => "Orange",
|
||||
"yellow" => "Yellow",
|
||||
"green" => "Green",
|
||||
"teal" => "Teal",
|
||||
"blue" => "Blue",
|
||||
"purple" => "Purple",
|
||||
"pink" => "Pink",
|
||||
"white" => "White",
|
||||
"gray" => "Gray",
|
||||
"black" => "Black",
|
||||
"brown" => "Brown"
|
||||
]
|
||||
],
|
||||
"type" => [ // imgType
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "Any type",
|
||||
"clipart" => "Clip Art",
|
||||
"face" => "Faces",
|
||||
"lineart" => "Line Drawing",
|
||||
"stock" => "Stock photos",
|
||||
"photo" => "Photos",
|
||||
"animated" => "Animated",
|
||||
]
|
||||
],
|
||||
"rights" => [ // rights
|
||||
"display" => "Usage rights",
|
||||
"option" => [
|
||||
"any" => "Any license",
|
||||
"cl" => "Creative Commons licenses",
|
||||
"ol" => "Commercial & other licenses"
|
||||
"cc_publicdomain" => "Public domain",
|
||||
"cc_attribute" => "Attribution required",
|
||||
"cc_sharealike" => "Sharealike",
|
||||
"cc_noncommercial" => "Non-commercial use only",
|
||||
"cc_nonderived" => "Original works"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -485,6 +462,7 @@ class google_api{
|
||||
return $data;
|
||||
}
|
||||
|
||||
|
||||
public function web($get){
|
||||
|
||||
// rotate proxy + key on EVERY request
|
||||
@@ -731,6 +709,160 @@ class google_api{
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
// rotate proxy + key on EVERY request
|
||||
$keydata = $this->backend->get_key();
|
||||
$proxy = $this->backend->get_ip($keydata["increment"]);
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
// $p is never used
|
||||
[$params, $p] = $this->backend->get(
|
||||
$get["npt"],
|
||||
"web"
|
||||
);
|
||||
|
||||
$params = json_decode($params, true);
|
||||
|
||||
$params["key"] = $keydata["key"];
|
||||
|
||||
}else{
|
||||
|
||||
//$json = file_get_contents("scraper/google.json");
|
||||
$params = [
|
||||
"q" => $get["s"],
|
||||
"cx" => config::GOOGLE_CX_ENDPOINT,
|
||||
"num" => 10,
|
||||
"start" => 1,
|
||||
"searchType" => "image",
|
||||
"key" => $keydata["key"]
|
||||
];
|
||||
|
||||
//
|
||||
// parse filters
|
||||
//
|
||||
if($get["newer"] !== false){
|
||||
|
||||
$params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000));
|
||||
}
|
||||
|
||||
if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; }
|
||||
if($get["country"] != "any"){ $params["gl"] = $get["country"]; }
|
||||
|
||||
if($get["nsfw"] == "yes"){
|
||||
|
||||
$params["safe"] = "off";
|
||||
}else{
|
||||
|
||||
$params["safe"] = "active";
|
||||
}
|
||||
|
||||
if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; }
|
||||
|
||||
// image filters
|
||||
if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; }
|
||||
if($get["format"] != "any"){ $params["fileType"] = $get["format"]; }
|
||||
|
||||
switch($get["color"]){
|
||||
|
||||
case "any":
|
||||
break;
|
||||
|
||||
case "color":
|
||||
case "mono":
|
||||
case "trans":
|
||||
$params["imgColorType"] = $get["color"];
|
||||
break;
|
||||
|
||||
default:
|
||||
$params["imgDominantColor"] = $get["color"];
|
||||
break;
|
||||
}
|
||||
|
||||
if($get["type"] != "any"){ $params["imgType"] = $get["type"]; }
|
||||
if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; }
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://www.googleapis.com/customsearch/v1",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if(isset($json["error"]["message"])){
|
||||
|
||||
throw new Exception(
|
||||
"API returned an error: " .
|
||||
$json["error"]["message"] .
|
||||
" (key #" . $keydata["increment"] . ")"
|
||||
);
|
||||
}
|
||||
|
||||
if(!isset($json["items"])){
|
||||
|
||||
// google just doesnt return items when theres no results
|
||||
return $out;
|
||||
}
|
||||
|
||||
foreach($json["items"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $this->titledots($image["title"]),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["link"],
|
||||
"width" => (int)$image["image"]["width"],
|
||||
"height" => (int)$image["image"]["height"]
|
||||
],
|
||||
[
|
||||
"url" => $image["image"]["thumbnailLink"],
|
||||
"width" => (int)$image["image"]["thumbnailWidth"],
|
||||
"height" => (int)$image["image"]["thumbnailHeight"]
|
||||
]
|
||||
],
|
||||
"url" => $image["image"]["contextLink"]
|
||||
];
|
||||
}
|
||||
|
||||
// get npt
|
||||
if(isset($json["queries"]["nextPage"][0]["startIndex"])){
|
||||
|
||||
unset($params["key"]);
|
||||
$params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"];
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($params),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
return trim($title, " .\t\n\r\0\x0B…");
|
||||
|
||||
@@ -1,452 +0,0 @@
|
||||
<?php
|
||||
// greppr dev probably monitors 4get code, lol
|
||||
// hello greppr dude, add an API you moron
|
||||
|
||||
class greppr{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("greppr");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $cookies = [], $post = false){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
$cookie = [];
|
||||
foreach($cookies as $k => $v){
|
||||
|
||||
$cookie[] = "{$k}={$v}";
|
||||
}
|
||||
|
||||
$cookie = implode("; ", $cookie);
|
||||
|
||||
if($post === false){
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
if($cookie == ""){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: https://greppr.org/search",
|
||||
"Cookie: {$cookie}",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"Priority: u=0, i"]
|
||||
);
|
||||
}
|
||||
}else{
|
||||
|
||||
$get = http_build_query($get);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Content-Type: application/x-www-form-urlencoded",
|
||||
"Content-Length: " . strlen($get),
|
||||
"Origin: https://greppr.org",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: https://greppr.org/",
|
||||
"Cookie: {$cookie}",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"Priority: u=0, i"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$headers = [];
|
||||
|
||||
curl_setopt(
|
||||
$curlproc,
|
||||
CURLOPT_HEADERFUNCTION,
|
||||
function($curlproc, $header) use (&$headers){
|
||||
|
||||
$len = strlen($header);
|
||||
$header = explode(':', $header, 2);
|
||||
|
||||
if(count($header) < 2){
|
||||
|
||||
// ignore invalid headers
|
||||
return $len;
|
||||
}
|
||||
|
||||
$headers[strtolower(trim($header[0]))][] = trim($header[1]);
|
||||
|
||||
return $len;
|
||||
}
|
||||
);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
|
||||
return [
|
||||
"headers" => $headers,
|
||||
"data" => $data
|
||||
];
|
||||
}
|
||||
|
||||
public function web($get, $first_attempt = true){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$q, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
$tokens = json_decode($q, true);
|
||||
|
||||
//
|
||||
// Get paginated page
|
||||
//
|
||||
try{
|
||||
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://greppr.org" . $tokens["get"],
|
||||
[],
|
||||
$tokens["cookies"],
|
||||
false
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
//
|
||||
// get token
|
||||
//
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://greppr.org",
|
||||
[],
|
||||
[],
|
||||
false
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch homepage");
|
||||
}
|
||||
|
||||
//
|
||||
// Parse token
|
||||
//
|
||||
$this->fuckhtml->load($html["data"]);
|
||||
|
||||
$tokens = [
|
||||
"req" => null,
|
||||
"data" => null,
|
||||
"cookies" => null
|
||||
];
|
||||
|
||||
$inputs =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"input"
|
||||
);
|
||||
|
||||
foreach($inputs as $input){
|
||||
|
||||
if(!isset($input["attributes"]["name"])){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if(
|
||||
isset($input["attributes"]["value"]) &&
|
||||
!empty($input["attributes"]["value"])
|
||||
){
|
||||
|
||||
$tokens
|
||||
["data"]
|
||||
[$this->fuckhtml
|
||||
->getTextContent(
|
||||
$input["attributes"]["name"]
|
||||
)] =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$input["attributes"]["value"]
|
||||
);
|
||||
}else{
|
||||
|
||||
$tokens["req"] =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$input["attributes"]["name"]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if($tokens["req"] === null){
|
||||
|
||||
throw new Exception("Failed to get request ID");
|
||||
}
|
||||
|
||||
if(isset($html["headers"]["set-cookie"])){
|
||||
|
||||
foreach($html["headers"]["set-cookie"] as $cookie){
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/([^=]+)=([^;]+)/',
|
||||
$cookie,
|
||||
$matches
|
||||
)
|
||||
){
|
||||
|
||||
$tokens["cookies"][$matches[1]] = $matches[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Get initial search page
|
||||
//
|
||||
$tokens_req = $tokens["data"];
|
||||
$tokens_req[$tokens["req"]] = $search;
|
||||
|
||||
try{
|
||||
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://greppr.org/search",
|
||||
$tokens_req,
|
||||
$tokens["cookies"],
|
||||
true
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
//$html = file_get_contents("scraper/greppr.html");
|
||||
//$this->fuckhtml->load($html);
|
||||
$this->fuckhtml->load($html["data"]);
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// get results for later
|
||||
$results =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result",
|
||||
"div"
|
||||
);
|
||||
|
||||
// check for next page
|
||||
$next_elem =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"pagination",
|
||||
"ul"
|
||||
);
|
||||
|
||||
if(count($next_elem) !== 0){
|
||||
|
||||
$this->fuckhtml->load($next_elem[0]);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"page-link",
|
||||
"a"
|
||||
);
|
||||
|
||||
$break = false;
|
||||
foreach($as as $a){
|
||||
|
||||
if($break === true){
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"get" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
"cookies" => $tokens["cookies"]
|
||||
]),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if($a["attributes"]["href"] == "#"){
|
||||
|
||||
$break = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scrape results
|
||||
foreach($results as $result){
|
||||
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"highlightedDesc",
|
||||
"p"
|
||||
);
|
||||
|
||||
if(count($description) === 0){
|
||||
|
||||
$description = null;
|
||||
}else{
|
||||
|
||||
$description =
|
||||
$this->limitstrlen(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
$date =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"p"
|
||||
);
|
||||
|
||||
$date =
|
||||
strtotime(
|
||||
explode(
|
||||
":",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$date[count($date) - 1]["innerHTML"]
|
||||
)
|
||||
)[1]
|
||||
);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["innerHTML"]
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
"date" => $date,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
||||
}
|
||||
}
|
||||
@@ -297,6 +297,14 @@ class pinterest{
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(
|
||||
isset($json["client_context"]["is_bad_bot"]) &&
|
||||
(int)$json["client_context"]["is_bad_bot"] === 1
|
||||
){
|
||||
|
||||
throw new Exception("Pinterest blocked this instance or request proxy.");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
@@ -426,7 +434,7 @@ class pinterest{
|
||||
]
|
||||
],
|
||||
"url" =>
|
||||
$item["link"] === null ?
|
||||
!isset($item["link"]) ?
|
||||
"https://ca.pinterest.com/pin/" . $item["id"] :
|
||||
$item["link"]
|
||||
];
|
||||
|
||||
@@ -564,12 +564,16 @@ class startpage{
|
||||
break;
|
||||
|
||||
case "spellsuggest-google":
|
||||
|
||||
if(isset($category["results"][0]["query"])){
|
||||
|
||||
$out["spelling"] =
|
||||
[
|
||||
"type" => "including",
|
||||
"using" => $json["render"]["query"],
|
||||
"correction" => $category["results"][0]["query"]
|
||||
"correction" => urldecode($category["results"][0]["query"])
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "dictionary-qi":
|
||||
@@ -645,318 +649,6 @@ class startpage{
|
||||
}
|
||||
}
|
||||
|
||||
// parse instant answers
|
||||
if(
|
||||
$get["extendedsearch"] == "yes" &&
|
||||
$get_instant_answer === true
|
||||
){
|
||||
|
||||
// https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=BqZ3inqrAgF701&sr=1
|
||||
try{
|
||||
$post = [
|
||||
"se" => "n0vze2y9dqwy",
|
||||
"q" => $json["render"]["query"],
|
||||
"results" => [], // populate
|
||||
"enableKnowledgePanel" => true,
|
||||
"enableMediaThumbBar" => false,
|
||||
"enableSearchSuggestions" => false,
|
||||
"enableTripadvisorProperties" => [],
|
||||
"enableTripadvisorPlaces" => [],
|
||||
"enableTripadvisorPlacesForLocations" => [],
|
||||
"enableWebProducts" => false,
|
||||
"tripadvisorPartnerId" => null,
|
||||
"tripadvisorMapColorMode" => "light",
|
||||
"tripadvisorDisablesKnowledgePanel" => false,
|
||||
"instantAnswers" => [
|
||||
"smartAnswers",
|
||||
"youtube",
|
||||
"tripadvisor"
|
||||
],
|
||||
"iaType" => null,
|
||||
"forceEnhancedKnowledgePanel" => false,
|
||||
"shoppingOnly" => false,
|
||||
"allowAdultProducts" => true,
|
||||
"lang" => "en",
|
||||
"browserLang" => "en-US",
|
||||
"browserTimezone" => "America/New_York",
|
||||
"market" => null,
|
||||
"userLocation" => null,
|
||||
"userDate" => date("Y-m-d"),
|
||||
"userAgentType" => "unknown"
|
||||
];
|
||||
|
||||
foreach($out["web"] as $result){
|
||||
|
||||
$post["results"][] = [
|
||||
"url" => $result["url"],
|
||||
"title" => $result["title"]
|
||||
];
|
||||
}
|
||||
|
||||
$post = json_encode($post, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE);
|
||||
|
||||
$additional_data =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://www.startpage.com/sp/qi?qimsn=ex&sxap=%2Fv1%2Fquery&sc=" . $json["render"]["callback_sc"] . "&sr=1",
|
||||
$post,
|
||||
true,
|
||||
true
|
||||
);
|
||||
|
||||
$additional_data = json_decode($additional_data, true);
|
||||
|
||||
if($additional_data === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON"); // just break out, dont fail completely
|
||||
}
|
||||
|
||||
if(!isset($additional_data["knowledgePanel"])){
|
||||
|
||||
throw new Exception("Response has missing data (knowledgePanel)");
|
||||
}
|
||||
|
||||
$additional_data = $additional_data["knowledgePanel"];
|
||||
|
||||
$answer = [
|
||||
"title" => $additional_data["meta"]["title"],
|
||||
"description" => [
|
||||
[
|
||||
"type" => "quote",
|
||||
"value" => $additional_data["meta"]["description"]
|
||||
]
|
||||
],
|
||||
"url" => $additional_data["meta"]["origWikiUrl"],
|
||||
"thumb" => $additional_data["meta"]["image"],
|
||||
"table" => [],
|
||||
"sublink" => []
|
||||
];
|
||||
|
||||
// parse html for instant answer
|
||||
$this->fuckhtml->load($additional_data["html"]);
|
||||
|
||||
$div =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"div"
|
||||
);
|
||||
|
||||
// get description
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"sx-kp-short-extract sx-kp-short-extract-complete",
|
||||
$div
|
||||
);
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$answer["description"][] = [
|
||||
"type" => "text",
|
||||
"value" =>
|
||||
html_entity_decode(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
)
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
// get socials
|
||||
$socials =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"sx-wiki-social-link",
|
||||
"a"
|
||||
);
|
||||
|
||||
foreach($socials as $social){
|
||||
|
||||
$title =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$social["attributes"]["title"]
|
||||
);
|
||||
|
||||
$url =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$social["attributes"]["href"]
|
||||
);
|
||||
|
||||
switch($title){
|
||||
|
||||
case "Official Website":
|
||||
$title = "Website";
|
||||
break;
|
||||
}
|
||||
|
||||
$answer["sublink"][$title] = $url;
|
||||
}
|
||||
|
||||
// get videos
|
||||
$videos =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"sx-kp-video-grid-item",
|
||||
$div
|
||||
);
|
||||
|
||||
foreach($videos as $video){
|
||||
|
||||
$this->fuckhtml->load($video);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($as) === 0){
|
||||
|
||||
// ?? invalid
|
||||
continue;
|
||||
}
|
||||
|
||||
$image =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeName(
|
||||
"data-sx-src",
|
||||
"img"
|
||||
);
|
||||
|
||||
if(count($image) !== 0){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "16:9",
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image[0]["attributes"]["data-sx-src"]
|
||||
)
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$out["video"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[0]["attributes"]["title"]
|
||||
),
|
||||
"description" => null,
|
||||
"date" => null,
|
||||
"duration" => null,
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[0]["attributes"]["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
// reset
|
||||
$this->fuckhtml->load($additional_data["html"]);
|
||||
|
||||
// get table elements
|
||||
$table =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"sx-infobox",
|
||||
"table"
|
||||
);
|
||||
|
||||
if(count($table) !== 0){
|
||||
|
||||
$trs =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"tr"
|
||||
);
|
||||
|
||||
foreach($trs as $tr){
|
||||
|
||||
$this->fuckhtml->load($tr);
|
||||
|
||||
// ok so startpage devs cant fucking code a table
|
||||
// td = content
|
||||
// th (AAAHH) = title
|
||||
$tds =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"td"
|
||||
);
|
||||
|
||||
$ths =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"th"
|
||||
);
|
||||
|
||||
if(
|
||||
count($ths) === 1 &&
|
||||
count($tds) === 1
|
||||
){
|
||||
|
||||
$title =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$ths[0]
|
||||
);
|
||||
|
||||
$description = [];
|
||||
|
||||
$this->fuckhtml->load($tds[0]);
|
||||
|
||||
$lis =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"li"
|
||||
);
|
||||
|
||||
if(count($lis) !== 0){
|
||||
|
||||
foreach($lis as $li){
|
||||
|
||||
$description[] =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$li
|
||||
);
|
||||
}
|
||||
|
||||
$description = implode(", ", $description);
|
||||
}else{
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$tds[0]
|
||||
);
|
||||
}
|
||||
|
||||
$answer["table"][$title] = $description;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$out["answer"][] = $answer;
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
// do nothing
|
||||
//echo "error!";
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
@@ -1428,12 +1120,16 @@ class startpage{
|
||||
[
|
||||
"lui" => "english",
|
||||
"language" => "english",
|
||||
"query" => $str["q"],
|
||||
"cat" => $pagetype,
|
||||
"sc" => $str["sc"],
|
||||
"t" => "device",
|
||||
"cat" => $pagetype,
|
||||
"segment" => "startpage.udog",
|
||||
"page" => $str["page"]
|
||||
"abd" => 0,
|
||||
"abe" => 0,
|
||||
"query" => $str["q"],
|
||||
"page" => $str["page"],
|
||||
"qsr" => "all",
|
||||
"qadf" => "none" // @ todo fix (??)
|
||||
]
|
||||
),
|
||||
$pagetype,
|
||||
|
||||
@@ -868,123 +868,71 @@ class yandex{
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] =
|
||||
[$get, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"video"
|
||||
);
|
||||
|
||||
$params = json_decode($params, true);
|
||||
|
||||
$nsfw = $params["nsfw"];
|
||||
unset($params["nsfw"]);
|
||||
$get = json_decode($get, true);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
if(strlen($get["s"]) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$nsfw = $get["nsfw"];
|
||||
$time = $get["time"];
|
||||
$duration = $get["duration"];
|
||||
}
|
||||
|
||||
// https://yandex.com/video/search?text=skycamefalling&from=tabbar&format=json&ncrnd=7271&p=0&parent-reqid=&request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]}&serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL&yu=3091577281773194415&tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
|
||||
// https://yandex.com/video/search
|
||||
// ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
|
||||
// &format=json
|
||||
// &request=
|
||||
// {
|
||||
// "blocks":[
|
||||
// {"block":"extra-content","params":{},"version":2},
|
||||
// {"block":"i-global__params:ajax","params":{},"version":2},
|
||||
// {"block":"search2:ajax","params":{},"version":2},
|
||||
// {"block":"vital-incut","params":{},"version":2},
|
||||
// {"block":"content_type_search","params":{},"version":2},
|
||||
// {"block":"serp-controller","params":{},"version":2},
|
||||
// {"block":"cookies_ajax","params":{},"version":2}
|
||||
// ],
|
||||
// "metadata":{
|
||||
// "bundles":{"lb":"^G]!q<X120"},
|
||||
// "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
|
||||
// "extraContent":{"names":["i-react-ajax-adapter"]}
|
||||
// }
|
||||
// }
|
||||
// &yu=4861394161661655015
|
||||
// ?text=skycamefalling
|
||||
// &from=tabbar
|
||||
// &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
|
||||
// &suggest_reqid=486139416166165501562797413447032
|
||||
// &text=minecraft
|
||||
// &format=json
|
||||
// &ncrnd=7271
|
||||
// &p=0
|
||||
// &parent-reqid=
|
||||
// &request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]} {"blocks":[{"block":"video-app","params":{}}]}
|
||||
// &serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL
|
||||
// &yu=3091577281773194415
|
||||
// &tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
|
||||
|
||||
$params = [
|
||||
"tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
|
||||
"text" => $get["s"],
|
||||
"from" => "tabbar",
|
||||
"format" => "json",
|
||||
"request" => json_encode([
|
||||
"ncrnd" => 7271,
|
||||
"p" => 0,
|
||||
"parent-reqid" => "",
|
||||
"request" => json_encode((object)[
|
||||
"blocks" => [
|
||||
(object)[
|
||||
"block" => "extra-content",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "i-global__params:ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "search2:ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "vital-incut",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "content_type_search",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "serp-controller",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
],
|
||||
(object)[
|
||||
"block" => "cookies_ajax",
|
||||
"params" => (object)[],
|
||||
"version" => 2
|
||||
]
|
||||
],
|
||||
"metadata" => (object)[
|
||||
"bundles" => (object)[
|
||||
"lb" => "^G]!q<X120"
|
||||
],
|
||||
"assets" => (object)[
|
||||
"las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
|
||||
],
|
||||
"extraContent" => (object)[
|
||||
"names" => [
|
||||
"i-react-ajax-adapter"
|
||||
]
|
||||
"block" => "video-app",
|
||||
"params" => (object)[]
|
||||
]
|
||||
]
|
||||
]),
|
||||
"text" => $search
|
||||
"serpid" => "1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL",
|
||||
"yu" => 3091577281773194415,
|
||||
"tmpl_version" => "releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a"
|
||||
];
|
||||
|
||||
if($duration != "any"){
|
||||
if(isset($get["p"])){
|
||||
|
||||
$params["duration"] = $duration;
|
||||
$params["p"] = $get["p"];
|
||||
}
|
||||
|
||||
if($time != "any"){
|
||||
if($get["duration"] != "any"){
|
||||
|
||||
$params["within"] = $time;
|
||||
$params["duration"] = $get["duration"];
|
||||
}
|
||||
|
||||
if($get["time"] != "any"){
|
||||
|
||||
$params["within"] = $get["time"];
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex-video.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||
@@ -996,7 +944,7 @@ class yandex{
|
||||
$proxy,
|
||||
"https://yandex.com/video/search",
|
||||
$params,
|
||||
$nsfw,
|
||||
$get["nsfw"],
|
||||
"yandex_v"
|
||||
);
|
||||
}catch(Exception $error){
|
||||
@@ -1011,7 +959,7 @@ class yandex{
|
||||
throw new Exception("Could not parse JSON");
|
||||
}
|
||||
|
||||
if(!isset($json["blocks"])){
|
||||
if(!isset($json["results"]["clips"]["items"])){
|
||||
|
||||
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
|
||||
}
|
||||
@@ -1026,209 +974,120 @@ class yandex{
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
$html = null;
|
||||
foreach($json["blocks"] as $block){
|
||||
foreach($json["results"]["clips"]["items"] as $k => $data){
|
||||
|
||||
if(isset($block["html"])){
|
||||
if(isset($data["preview"]["posterSrc"])){
|
||||
|
||||
$html .= $block["html"];
|
||||
}
|
||||
$poster = $data["preview"]["posterSrc"];
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/^\/\//',
|
||||
$data["preview"]["posterSrc"]
|
||||
)
|
||||
){
|
||||
|
||||
$poster = "https:" . $poster;
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
$thumb = [
|
||||
"ratio" => "16:9",
|
||||
"url" => $poster
|
||||
];
|
||||
}else{
|
||||
|
||||
$div =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("div");
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
/*
|
||||
Get nextpage
|
||||
*/
|
||||
$npt =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more more_direction_next i-bem",
|
||||
$div
|
||||
);
|
||||
$out["video"][] = [
|
||||
"title" => $data["relatedParams"]["text"],
|
||||
"description" => $this->titledots($data["description"]),
|
||||
"author" => [
|
||||
"name" =>
|
||||
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"]) ?
|
||||
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"] : null,
|
||||
"url" =>
|
||||
isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"]) ?
|
||||
$json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"] : null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" =>
|
||||
isset($json["results"]["clips"]["dups"][$k]["date"]) ?
|
||||
strtotime($json["results"]["clips"]["dups"][$k]["date"]) : null,
|
||||
"duration" =>
|
||||
isset($json["results"]["clips"]["dups"][$k]["duration"]["value"]) ?
|
||||
(int)$json["results"]["clips"]["dups"][$k]["duration"]["value"] : null,
|
||||
"views" =>
|
||||
isset($json["results"]["clips"]["dups"][$k]["views"]["text"]) ?
|
||||
$this->parseviews($json["results"]["clips"]["dups"][$k]["views"]["text"]) : null,
|
||||
"thumb" => $thumb,
|
||||
"url" =>
|
||||
preg_replace(
|
||||
'/^http:\/\//',
|
||||
"https://",
|
||||
$data["relatedParams"]["related_url"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
if(count($npt) !== 0){
|
||||
// get npt
|
||||
if($json["results"]["search"]["hasNextPage"]){
|
||||
|
||||
$get["p"] = (int)$json["results"]["search"]["currentPage"] + 1;
|
||||
|
||||
$params["p"] = "1";
|
||||
$params["nsfw"] = $nsfw;
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($params),
|
||||
json_encode($get),
|
||||
"video",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
$items =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"serp-item",
|
||||
$div
|
||||
);
|
||||
|
||||
foreach($items as $item){
|
||||
|
||||
$data =
|
||||
json_decode(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$item["attributes"]["data-video"]
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
|
||||
$thumb =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"thumb-image__image",
|
||||
"img"
|
||||
);
|
||||
|
||||
$c = 1;
|
||||
if(count($thumb) === 0){
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" =>
|
||||
str_replace(
|
||||
"//",
|
||||
"https://",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$thumb
|
||||
[0]
|
||||
["attributes"]
|
||||
["src"]
|
||||
),
|
||||
$c
|
||||
),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}
|
||||
|
||||
$smallinfos =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"serp-item__sitelinks-item",
|
||||
"div"
|
||||
);
|
||||
|
||||
$date = null;
|
||||
$views = null;
|
||||
$first = true;
|
||||
|
||||
foreach($smallinfos as $info){
|
||||
|
||||
if($first){
|
||||
|
||||
$first = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
$info =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$info
|
||||
);
|
||||
|
||||
if($temp_date = strtotime($info)){
|
||||
|
||||
$date = $temp_date;
|
||||
}else{
|
||||
|
||||
$views = $this->parseviews($info);
|
||||
}
|
||||
}
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"serp-item__text serp-item__text_visibleText_always",
|
||||
"div"
|
||||
);
|
||||
|
||||
if(count($description) === 0){
|
||||
|
||||
$description = null;
|
||||
}else{
|
||||
|
||||
$description =
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
$out["video"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->titledots(
|
||||
$data["title"]
|
||||
)
|
||||
),
|
||||
"description" => $description,
|
||||
"author" => [
|
||||
"name" => null,
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => $date,
|
||||
"duration" =>
|
||||
(int)$data
|
||||
["counters"]
|
||||
["toHostingLoaded"]
|
||||
["stredParams"]
|
||||
["duration"],
|
||||
"views" => $views,
|
||||
"thumb" => $thumb,
|
||||
"url" =>
|
||||
str_replace(
|
||||
"http://",
|
||||
"https://",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$data["counters"]
|
||||
["toHostingLoaded"]
|
||||
["postfix"]
|
||||
["href"]
|
||||
),
|
||||
$c
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function parseviews($text){
|
||||
private function parseviews($number){
|
||||
|
||||
$text = explode(" ", $text);
|
||||
// decimal should always be 1 number long
|
||||
$number = explode(" ", $number, 2);
|
||||
$number = $number[0];
|
||||
|
||||
$num = (float)$text[0];
|
||||
$mod = $text[1];
|
||||
$unit = strtolower($number[strlen($number) - 1]);
|
||||
|
||||
switch($mod){
|
||||
$tmp = explode(".", $number, 2);
|
||||
$number = (int)$number;
|
||||
|
||||
case "bln.": $num = $num * 1000000000; break;
|
||||
case "mln.": $num = $num * 1000000; break;
|
||||
case "thsd.": $num = $num * 1000; break;
|
||||
if(count($tmp) === 2){
|
||||
|
||||
$decimal = (int)$tmp[1];
|
||||
}else{
|
||||
|
||||
$decimal = 0;
|
||||
}
|
||||
|
||||
return $num;
|
||||
switch($unit){
|
||||
|
||||
case "k":
|
||||
$exponant = 1000;
|
||||
break;
|
||||
|
||||
case "m":
|
||||
$exponant = 1000000;
|
||||
break;
|
||||
|
||||
case "b";
|
||||
$exponant = 1000000000;
|
||||
break;
|
||||
|
||||
default:
|
||||
$exponant = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
return ($number * $exponant) + ($decimal * ($exponant / 10));
|
||||
}
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
749
scraper/yep.php
749
scraper/yep.php
@@ -14,234 +14,209 @@ class yep{
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"country" => [
|
||||
"display" => "Country",
|
||||
"lang" => [
|
||||
"display" => "Language",
|
||||
"option" => [
|
||||
"all" => "All regions",
|
||||
"af" => "Afghanistan",
|
||||
"al" => "Albania",
|
||||
"dz" => "Algeria",
|
||||
"as" => "American Samoa",
|
||||
"ad" => "Andorra",
|
||||
"ao" => "Angola",
|
||||
"ai" => "Anguilla",
|
||||
"ag" => "Antigua and Barbuda",
|
||||
"ar" => "Argentina",
|
||||
"am" => "Armenia",
|
||||
"aw" => "Aruba",
|
||||
"au" => "Australia",
|
||||
"at" => "Austria",
|
||||
"az" => "Azerbaijan",
|
||||
"bs" => "Bahamas",
|
||||
"bh" => "Bahrain",
|
||||
"bd" => "Bangladesh",
|
||||
"bb" => "Barbados",
|
||||
"by" => "Belarus",
|
||||
"be" => "Belgium",
|
||||
"bz" => "Belize",
|
||||
"bj" => "Benin",
|
||||
"bt" => "Bhutan",
|
||||
"bo" => "Bolivia",
|
||||
"ba" => "Bosnia and Herzegovina",
|
||||
"bw" => "Botswana",
|
||||
"br" => "Brazil",
|
||||
"bn" => "Brunei Darussalam",
|
||||
"bg" => "Bulgaria",
|
||||
"bf" => "Burkina Faso",
|
||||
"bi" => "Burundi",
|
||||
"cv" => "Cabo Verde",
|
||||
"kh" => "Cambodia",
|
||||
"cm" => "Cameroon",
|
||||
"ca" => "Canada",
|
||||
"ky" => "Cayman Islands",
|
||||
"cf" => "Central African Republic",
|
||||
"td" => "Chad",
|
||||
"cl" => "Chile",
|
||||
"cn" => "China",
|
||||
"co" => "Colombia",
|
||||
"cg" => "Congo",
|
||||
"cd" => "Congo, Democratic Republic",
|
||||
"ck" => "Cook Islands",
|
||||
"cr" => "Costa Rica",
|
||||
"hr" => "Croatia",
|
||||
"cu" => "Cuba",
|
||||
"cy" => "Cyprus",
|
||||
"cz" => "Czechia",
|
||||
"ci" => "Côte d'Ivoire",
|
||||
"dk" => "Denmark",
|
||||
"dj" => "Djibouti",
|
||||
"dm" => "Dominica",
|
||||
"do" => "Dominican Republic",
|
||||
"ec" => "Ecuador",
|
||||
"eg" => "Egypt",
|
||||
"sv" => "El Salvador",
|
||||
"gq" => "Equatorial Guinea",
|
||||
"ee" => "Estonia",
|
||||
"et" => "Ethiopia",
|
||||
"fo" => "Faroe Islands",
|
||||
"fj" => "Fiji",
|
||||
"fi" => "Finland",
|
||||
"fr" => "France",
|
||||
"gf" => "French Guiana",
|
||||
"pf" => "French Polynesia",
|
||||
"ga" => "Gabon",
|
||||
"gm" => "Gambia",
|
||||
"ge" => "Georgia",
|
||||
"de" => "Germany",
|
||||
"gh" => "Ghana",
|
||||
"gi" => "Gibraltar",
|
||||
"gr" => "Greece",
|
||||
"gl" => "Greenland",
|
||||
"gd" => "Grenada",
|
||||
"gp" => "Guadeloupe",
|
||||
"gu" => "Guam",
|
||||
"gt" => "Guatemala",
|
||||
"gg" => "Guernsey",
|
||||
"gn" => "Guinea",
|
||||
"gy" => "Guyana",
|
||||
"ht" => "Haiti",
|
||||
"hn" => "Honduras",
|
||||
"hk" => "Hong Kong",
|
||||
"hu" => "Hungary",
|
||||
"is" => "Iceland",
|
||||
"in" => "India",
|
||||
"id" => "Indonesia",
|
||||
"iq" => "Iraq",
|
||||
"ie" => "Ireland",
|
||||
"im" => "Isle of Man",
|
||||
"il" => "Israel",
|
||||
"it" => "Italy",
|
||||
"jm" => "Jamaica",
|
||||
"jp" => "Japan",
|
||||
"je" => "Jersey",
|
||||
"jo" => "Jordan",
|
||||
"kz" => "Kazakhstan",
|
||||
"ke" => "Kenya",
|
||||
"ki" => "Kiribati",
|
||||
"kw" => "Kuwait",
|
||||
"kg" => "Kyrgyzstan",
|
||||
"la" => "Lao People's Democratic Republic",
|
||||
"lv" => "Latvia",
|
||||
"lb" => "Lebanon",
|
||||
"ls" => "Lesotho",
|
||||
"ly" => "Libya",
|
||||
"li" => "Liechtenstein",
|
||||
"lt" => "Lithuania",
|
||||
"lu" => "Luxembourg",
|
||||
"mk" => "Macedonia",
|
||||
"mg" => "Madagascar",
|
||||
"mw" => "Malawi",
|
||||
"my" => "Malaysia",
|
||||
"mv" => "Maldives",
|
||||
"ml" => "Mali",
|
||||
"mt" => "Malta",
|
||||
"mq" => "Martinique",
|
||||
"mr" => "Mauritania",
|
||||
"mu" => "Mauritius",
|
||||
"yt" => "Mayotte",
|
||||
"mx" => "Mexico",
|
||||
"fm" => "Micronesia, Federated States of",
|
||||
"md" => "Moldova",
|
||||
"mc" => "Monaco",
|
||||
"mn" => "Mongolia",
|
||||
"me" => "Montenegro",
|
||||
"ms" => "Montserrat",
|
||||
"ma" => "Morocco",
|
||||
"mz" => "Mozambique",
|
||||
"mm" => "Myanmar",
|
||||
"na" => "Namibia",
|
||||
"nr" => "Nauru",
|
||||
"np" => "Nepal",
|
||||
"nl" => "Netherlands",
|
||||
"nc" => "New Caledonia",
|
||||
"nz" => "New Zealand",
|
||||
"ni" => "Nicaragua",
|
||||
"ne" => "Niger",
|
||||
"ng" => "Nigeria",
|
||||
"nu" => "Niue",
|
||||
"no" => "Norway",
|
||||
"om" => "Oman",
|
||||
"pk" => "Pakistan",
|
||||
"ps" => "Palestine, State of",
|
||||
"pa" => "Panama",
|
||||
"pg" => "Papua New Guinea",
|
||||
"py" => "Paraguay",
|
||||
"pe" => "Peru",
|
||||
"ph" => "Philippines",
|
||||
"pn" => "Pitcairn",
|
||||
"pl" => "Poland",
|
||||
"pt" => "Portugal",
|
||||
"pr" => "Puerto Rico",
|
||||
"qa" => "Qatar",
|
||||
"ro" => "Romania",
|
||||
"ru" => "Russian Federation",
|
||||
"rw" => "Rwanda",
|
||||
"re" => "Réunion",
|
||||
"sh" => "Saint Helena",
|
||||
"kn" => "Saint Kitts and Nevis",
|
||||
"lc" => "Saint Lucia",
|
||||
"vc" => "Saint Vincent and the Grenadines",
|
||||
"ws" => "Samoa",
|
||||
"sm" => "San Marino",
|
||||
"st" => "Sao Tome and Principe",
|
||||
"sa" => "Saudi Arabia",
|
||||
"sn" => "Senegal",
|
||||
"rs" => "Serbia",
|
||||
"sc" => "Seychelles",
|
||||
"sl" => "Sierra Leone",
|
||||
"sg" => "Singapore",
|
||||
"sk" => "Slovakia",
|
||||
"si" => "Slovenia",
|
||||
"sb" => "Solomon Islands",
|
||||
"so" => "Somalia",
|
||||
"kr" => "Sourth Korea",
|
||||
"za" => "South Africa",
|
||||
"es" => "Spain",
|
||||
"lk" => "Sri Lanka",
|
||||
"sr" => "Suriname",
|
||||
"se" => "Sweden",
|
||||
"ch" => "Switzerland",
|
||||
"tw" => "Taiwan",
|
||||
"tj" => "Tajikistan",
|
||||
"tz" => "Tanzania",
|
||||
"th" => "Thailand",
|
||||
"tl" => "Timor-Leste",
|
||||
"tg" => "Togo",
|
||||
"tk" => "Tokelau",
|
||||
"any" => "Any language",
|
||||
"aa" => "Afar",
|
||||
"ab" => "Abkhazian",
|
||||
"ae" => "Avestan",
|
||||
"af" => "Afrikaans",
|
||||
"ak" => "Akan",
|
||||
"am" => "Amharic",
|
||||
"an" => "Aragonese",
|
||||
"ar" => "Arabic",
|
||||
"as" => "Assamese",
|
||||
"av" => "Avaric",
|
||||
"ay" => "Aymara",
|
||||
"az" => "Azerbaijani",
|
||||
"ba" => "Bashkir",
|
||||
"be" => "Belarusian",
|
||||
"bg" => "Bulgarian",
|
||||
"bh" => "Bihari",
|
||||
"bi" => "Bislama",
|
||||
"bm" => "Bambara",
|
||||
"bn" => "Bengali",
|
||||
"bo" => "Tibetan",
|
||||
"br" => "Breton",
|
||||
"bs" => "Bosnian",
|
||||
"ca" => "Catalan",
|
||||
"ce" => "Chechen",
|
||||
"ch" => "Chamorro",
|
||||
"co" => "Corsican",
|
||||
"cr" => "Cree",
|
||||
"cs" => "Czech",
|
||||
"cu" => "Church Slavic",
|
||||
"cv" => "Chuvash",
|
||||
"cy" => "Welsh",
|
||||
"da" => "Danish",
|
||||
"de" => "German",
|
||||
"dv" => "Divehi",
|
||||
"dz" => "Dzongkha",
|
||||
"ee" => "Ewe",
|
||||
"el" => "Greek",
|
||||
"en" => "English",
|
||||
"eo" => "Esperanto",
|
||||
"es" => "Spanish",
|
||||
"et" => "Estonian",
|
||||
"eu" => "Basque",
|
||||
"fa" => "Persian",
|
||||
"ff" => "Fulah",
|
||||
"fi" => "Finnish",
|
||||
"fj" => "Fijian",
|
||||
"fo" => "Faroese",
|
||||
"fr" => "French",
|
||||
"fy" => "Western Frisian",
|
||||
"ga" => "Irish",
|
||||
"gd" => "Scottish Gaelic",
|
||||
"gl" => "Galician",
|
||||
"gn" => "Guarani",
|
||||
"gu" => "Gujarati",
|
||||
"gv" => "Manx",
|
||||
"ha" => "Hausa",
|
||||
"he" => "Hebrew",
|
||||
"hi" => "Hindi",
|
||||
"ho" => "Hiri Motu",
|
||||
"hr" => "Croatian",
|
||||
"ht" => "Haitian",
|
||||
"hu" => "Hungarian",
|
||||
"hy" => "Armenian",
|
||||
"hz" => "Herero",
|
||||
"ia" => "Interlingua",
|
||||
"id" => "Indonesian",
|
||||
"ie" => "Interlingue",
|
||||
"ig" => "Igbo",
|
||||
"ii" => "Sichuan Yi",
|
||||
"ik" => "Inupiaq",
|
||||
"io" => "Ido",
|
||||
"is" => "Icelandic",
|
||||
"it" => "Italian",
|
||||
"iu" => "Inuktitut",
|
||||
"ja" => "Japanese",
|
||||
"jv" => "Javanese",
|
||||
"ka" => "Georgian",
|
||||
"kg" => "Kongo",
|
||||
"ki" => "Kikuyu",
|
||||
"kj" => "Kuanyama",
|
||||
"kk" => "Kazakh",
|
||||
"kl" => "Kalaallisut",
|
||||
"km" => "Central Khmer",
|
||||
"kn" => "Kannada",
|
||||
"ko" => "Korean",
|
||||
"kr" => "Kanuri",
|
||||
"ks" => "Kashmiri",
|
||||
"ku" => "Kurdish",
|
||||
"kv" => "Komi",
|
||||
"kw" => "Cornish",
|
||||
"ky" => "Kyrgyz",
|
||||
"la" => "Latin",
|
||||
"lb" => "Luxembourgish",
|
||||
"lg" => "Ganda",
|
||||
"li" => "Limburgish",
|
||||
"ln" => "Lingala",
|
||||
"lo" => "Lao",
|
||||
"lt" => "Lithuanian",
|
||||
"lu" => "Luba-Katanga",
|
||||
"lv" => "Latvian",
|
||||
"mg" => "Malagasy",
|
||||
"mh" => "Marshallese",
|
||||
"mi" => "Maori",
|
||||
"mk" => "Macedonian",
|
||||
"ml" => "Malayalam",
|
||||
"mn" => "Mongolian",
|
||||
"mr" => "Marathi",
|
||||
"ms" => "Malay",
|
||||
"mt" => "Maltese",
|
||||
"my" => "Burmese",
|
||||
"na" => "Nauru",
|
||||
"nb" => "Norwegian Bokmål",
|
||||
"nd" => "North Ndebele",
|
||||
"ne" => "Nepali",
|
||||
"ng" => "Ndonga",
|
||||
"nl" => "Dutch",
|
||||
"nn" => "Norwegian Nynorsk",
|
||||
"no" => "Norwegian",
|
||||
"nr" => "South Ndebele",
|
||||
"nv" => "Navajo",
|
||||
"ny" => "Chichewa",
|
||||
"oc" => "Occitan",
|
||||
"oj" => "Ojibwa",
|
||||
"om" => "Oromo",
|
||||
"or" => "Oriya",
|
||||
"os" => "Ossetian",
|
||||
"pa" => "Punjabi",
|
||||
"pi" => "Pali",
|
||||
"pl" => "Polish",
|
||||
"ps" => "Pashto",
|
||||
"pt" => "Portuguese",
|
||||
"qu" => "Quechua",
|
||||
"rm" => "Romansh",
|
||||
"rn" => "Rundi",
|
||||
"ro" => "Romanian",
|
||||
"ru" => "Russian",
|
||||
"rw" => "Kinyarwanda",
|
||||
"sa" => "Sanskrit",
|
||||
"sc" => "Sardinian",
|
||||
"sd" => "Sindhi",
|
||||
"se" => "Northern Sami",
|
||||
"sg" => "Sango",
|
||||
"si" => "Sinhala",
|
||||
"sk" => "Slovak",
|
||||
"sl" => "Slovenian",
|
||||
"sm" => "Samoan",
|
||||
"sn" => "Shona",
|
||||
"so" => "Somali",
|
||||
"sq" => "Albanian",
|
||||
"sr" => "Serbian",
|
||||
"ss" => "Swati",
|
||||
"st" => "Southern Sotho",
|
||||
"su" => "Sundanese",
|
||||
"sv" => "Swedish",
|
||||
"sw" => "Swahili",
|
||||
"ta" => "Tamil",
|
||||
"te" => "Telugu",
|
||||
"tg" => "Tajik",
|
||||
"th" => "Thai",
|
||||
"ti" => "Tigrinya",
|
||||
"tk" => "Turkmen",
|
||||
"tl" => "Tagalog",
|
||||
"tn" => "Tswana",
|
||||
"to" => "Tonga",
|
||||
"tt" => "Trinidad and Tobago",
|
||||
"tn" => "Tunisia",
|
||||
"tr" => "Turkey",
|
||||
"tm" => "Turkmenistan",
|
||||
"ug" => "Uganda",
|
||||
"ua" => "Ukraine",
|
||||
"ae" => "United Arab Emirates",
|
||||
"gb" => "United Kingdom",
|
||||
"us" => "United States",
|
||||
"uy" => "Uruguay",
|
||||
"uz" => "Uzbekistan",
|
||||
"vu" => "Vanuatu",
|
||||
"ve" => "Venezuela",
|
||||
"vn" => "Vietnam",
|
||||
"vg" => "Virgin Islands, British",
|
||||
"vi" => "Virgin Islands, U.S.",
|
||||
"ye" => "Yemen",
|
||||
"zm" => "Zambia",
|
||||
"zw" => "Zimbabwe"
|
||||
"tr" => "Turkish",
|
||||
"ts" => "Tsonga",
|
||||
"tt" => "Tatar",
|
||||
"tw" => "Twi",
|
||||
"ty" => "Tahitian",
|
||||
"ug" => "Uyghur",
|
||||
"uk" => "Ukrainian",
|
||||
"ur" => "Urdu",
|
||||
"uz" => "Uzbek",
|
||||
"ve" => "Venda",
|
||||
"vi" => "Vietnamese",
|
||||
"vo" => "Volapük",
|
||||
"wa" => "Walloon",
|
||||
"wo" => "Wolof",
|
||||
"xh" => "Xhosa",
|
||||
"yi" => "Yiddish",
|
||||
"yo" => "Yoruba",
|
||||
"za" => "Zhuang",
|
||||
"zh" => "Chinese",
|
||||
"zh-cn" => "Chinese (Simplified)",
|
||||
"zh-tw" => "Chinese (Traditional)",
|
||||
"zu" => "Zulu"
|
||||
]
|
||||
],
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"maybe" => "Maybe",
|
||||
"no" => "No"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
@@ -256,6 +231,21 @@ class yep{
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
if($use_api){
|
||||
|
||||
$post_data = json_encode($post_data);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["Content-Type: application/json",
|
||||
"Authorization: Bearer $bearer",
|
||||
"Content-Length: " . strlen($post_data)]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
@@ -271,6 +261,7 @@ class yep{
|
||||
"Priority: u=4",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
@@ -295,22 +286,17 @@ class yep{
|
||||
|
||||
public function web($get){
|
||||
|
||||
if(config::YEP_USE_API){
|
||||
|
||||
return $this->web_api($get);
|
||||
}
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
@@ -327,22 +313,23 @@ class yep{
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// parse filters
|
||||
$filters = [
|
||||
"limit" => 100, // wwwwwwwwwwwwwww
|
||||
"query" => $search,
|
||||
];
|
||||
|
||||
if($get["nsfw"] == "no"){ $filters["safeSearch"] = "moderate"; }
|
||||
if($get["lang"] != "any"){ $filters["hl"] = $get["lang"]; }
|
||||
|
||||
try{
|
||||
|
||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
||||
// https://api.yep.com/search?limit=20&query=asmr
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"limit" => "99999",
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "web"
|
||||
]
|
||||
"https://api.yep.com/search",
|
||||
$filters
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
@@ -408,7 +395,7 @@ class yep{
|
||||
)
|
||||
),
|
||||
"url" => $item["url"],
|
||||
"date" => strtotime($item["first_seen"]),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
@@ -422,83 +409,11 @@ class yep{
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($json[1]["featured_news"])){
|
||||
|
||||
foreach($json[1]["featured_news"] as $news){
|
||||
|
||||
$out["news"][] = [
|
||||
"title" => $news["title"],
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$news["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"date" => strtotime($news["first_seen"]),
|
||||
"thumb" =>
|
||||
isset($news["img"]) ?
|
||||
[
|
||||
"url" => $this->unshiturl($news["img"]),
|
||||
"ratio" => "16:9"
|
||||
] :
|
||||
[
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"url" => $news["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($json[1]["featured_images"])){
|
||||
|
||||
foreach($json[1]["featured_images"] as $image){
|
||||
|
||||
if(
|
||||
$image["width"] !== 0 &&
|
||||
$image["height"] !== 0
|
||||
){
|
||||
|
||||
$thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
|
||||
$thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
|
||||
|
||||
$width = $image["width"];
|
||||
$height = $image["height"];
|
||||
}else{
|
||||
|
||||
$thumb_width = null;
|
||||
$thumb_height = null;
|
||||
$width = null;
|
||||
$height = null;
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $image["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["image_id"],
|
||||
"width" => $width,
|
||||
"height" => $height
|
||||
],
|
||||
[
|
||||
"url" => $image["src"],
|
||||
"width" => $thumb_width,
|
||||
"height" => $thumb_height
|
||||
]
|
||||
],
|
||||
"url" => $image["host_page"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public function image($get){
|
||||
private function web_api($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
@@ -506,142 +421,53 @@ class yep{
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// parse filters
|
||||
$filters = [
|
||||
"query" => $search,
|
||||
"limit" => 100
|
||||
];
|
||||
|
||||
if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
|
||||
if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
|
||||
|
||||
// add api key
|
||||
$key_data = $this->backend->get_key();
|
||||
|
||||
try{
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(), // no nextpage!
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "images"
|
||||
]
|
||||
$this->backend->get_ip($key_data["increment"]),
|
||||
"https://platform.yep.com/api/search",
|
||||
[],
|
||||
true,
|
||||
$filters,
|
||||
$key_data["key"]
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json[1]["results"])){
|
||||
foreach($json[1]["results"] as $item){
|
||||
|
||||
if(
|
||||
$item["width"] !== 0 &&
|
||||
$item["height"] !== 0
|
||||
){
|
||||
|
||||
$thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
|
||||
$thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
|
||||
|
||||
$width = $item["width"];
|
||||
$height = $item["height"];
|
||||
}else{
|
||||
|
||||
$thumb_width = null;
|
||||
$thumb_height = null;
|
||||
$width = null;
|
||||
$height = null;
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $item["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $item["image_id"],
|
||||
"width" => $width,
|
||||
"height" => $height
|
||||
],
|
||||
[
|
||||
"url" => $item["src"],
|
||||
"width" => $thumb_width,
|
||||
"height" => $thumb_height
|
||||
]
|
||||
],
|
||||
"url" => $item["host_page"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function news($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"news" => []
|
||||
];
|
||||
|
||||
try{
|
||||
|
||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"limit" => "99999",
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "news"
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$this->detect_cf($json);
|
||||
// should never happen
|
||||
//$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||
@@ -651,35 +477,54 @@ class yep{
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json[1]["results"])){
|
||||
foreach($json[1]["results"] as $item){
|
||||
if(isset($json["error"])){
|
||||
|
||||
$out["news"][] = [
|
||||
throw new Exception("Yep API returned an error: " . $json["error"]);
|
||||
}
|
||||
|
||||
if(isset($json["errors"])){
|
||||
|
||||
throw new Exception("Yep API returned the following errors: {$json["message"]}");
|
||||
}
|
||||
|
||||
if(
|
||||
isset($json["success"]) &&
|
||||
$json["success"] !== true
|
||||
){
|
||||
|
||||
throw new Exception("Yep API returned a false-y success value");
|
||||
}
|
||||
|
||||
if(!isset($json["results"])){
|
||||
|
||||
throw new Exception("Yep API did not return a results object");
|
||||
}
|
||||
|
||||
foreach($json["results"] as $item){
|
||||
|
||||
if(
|
||||
$item["url"] === null ||
|
||||
$item["url"] == ""
|
||||
){
|
||||
|
||||
// sometimes API fucks up
|
||||
continue;
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $item["title"],
|
||||
"author" => null,
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$item["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"date" => strtotime($item["first_seen"]),
|
||||
"thumb" =>
|
||||
isset($item["img"]) ?
|
||||
[
|
||||
"url" => $this->unshiturl($item["img"]),
|
||||
"ratio" => "16:9"
|
||||
] :
|
||||
[
|
||||
"description" => $item["description"],
|
||||
"url" => $item["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"url" => $item["url"]
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
24
settings.php
24
settings.php
@@ -161,14 +161,6 @@ $settings = [
|
||||
"value" => "yep",
|
||||
"text" => "Yep"
|
||||
],
|
||||
[
|
||||
"value" => "greppr",
|
||||
"text" => "Greppr"
|
||||
],
|
||||
[
|
||||
"value" => "crowdview",
|
||||
"text" => "Crowdview"
|
||||
],
|
||||
[
|
||||
"value" => "mwmbl",
|
||||
"text" => "Mwmbl"
|
||||
@@ -196,10 +188,6 @@ $settings = [
|
||||
[
|
||||
"value" => "wiby",
|
||||
"text" => "wiby"
|
||||
],
|
||||
[
|
||||
"value" => "curlie",
|
||||
"text" => "Curlie"
|
||||
]
|
||||
]
|
||||
],
|
||||
@@ -223,6 +211,10 @@ $settings = [
|
||||
"value" => "google",
|
||||
"text" => "Google"
|
||||
],
|
||||
[
|
||||
"value" => "google_api",
|
||||
"text" => "Google API"
|
||||
],
|
||||
[
|
||||
"value" => "google_cse",
|
||||
"text" => "Google CSE"
|
||||
@@ -239,10 +231,6 @@ $settings = [
|
||||
"value" => "qwant",
|
||||
"text" => "Qwant"
|
||||
],
|
||||
[
|
||||
"value" => "yep",
|
||||
"text" => "Yep"
|
||||
],
|
||||
[
|
||||
"value" => "baidu",
|
||||
"text" => "Baidu"
|
||||
@@ -379,10 +367,6 @@ $settings = [
|
||||
"value" => "qwant",
|
||||
"text" => "Qwant"
|
||||
],
|
||||
[
|
||||
"value" => "yep",
|
||||
"text" => "Yep"
|
||||
],
|
||||
[
|
||||
"value" => "mojeek",
|
||||
"text" => "Mojeek"
|
||||
|
||||
Reference in New Issue
Block a user