added cock cock, removed solofield
This commit is contained in:
parent
60f7150008
commit
f7499294de
|
@ -950,7 +950,8 @@ class frontend{
|
|||
"mwmbl" => "Mwmbl",
|
||||
"mojeek" => "Mojeek",
|
||||
"baidu" => "Baidu",
|
||||
"solofield" => "Solofield",
|
||||
"coccoc" => "Cốc Cốc",
|
||||
//"solofield" => "Solofield",
|
||||
"marginalia" => "Marginalia",
|
||||
"wiby" => "wiby",
|
||||
"curlie" => "Curlie"
|
||||
|
@ -971,7 +972,7 @@ class frontend{
|
|||
"qwant" => "Qwant",
|
||||
"yep" => "Yep",
|
||||
"baidu" => "Baidu",
|
||||
"solofield" => "Solofield",
|
||||
//"solofield" => "Solofield",
|
||||
"pinterest" => "Pinterest",
|
||||
"flickr" => "Flickr",
|
||||
"fivehpx" => "500px",
|
||||
|
@ -996,7 +997,8 @@ class frontend{
|
|||
"startpage" => "Startpage",
|
||||
"qwant" => "Qwant",
|
||||
"baidu" => "Baidu",
|
||||
"solofield" => "Solofield"
|
||||
"coccoc" => "Cốc Cốc"
|
||||
//"solofield" => "Solofield"
|
||||
]
|
||||
];
|
||||
break;
|
||||
|
@ -1337,7 +1339,7 @@ class frontend{
|
|||
return htmlspecialchars($image);
|
||||
}
|
||||
|
||||
return "/proxy?i=" . urlencode($image) . "&s=" . $format;
|
||||
return "https://4get.ca/proxy?i=" . urlencode($image) . "&s=" . $format;
|
||||
}
|
||||
|
||||
public function htmlnextpage($gets, $npt, $page){
|
||||
|
|
|
@ -0,0 +1,672 @@
|
|||
<?php
|
||||
|
||||
class coccoc{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("coccoc");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
// http2 bypass
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, [
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
//"Cookie: _contentAB_15040_vi=V-06_01; split_test_search=new_search; uid=L_bauXyZBY1B; vid=uCVQJQSTgb9QGT3o; ls=1753742684; serp_version=29223843/7621a70; savedS=direct",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: cross-site",
|
||||
"Priority: u=0, i"
|
||||
]);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function getfilters($pagetype){
|
||||
|
||||
return [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes", // nsfw by default????
|
||||
"no" => "No" // &safe=1
|
||||
]
|
||||
],
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"1w" => "1 week ago",
|
||||
"2w" => "2 weeks ago",
|
||||
"1m" => "1 month ago",
|
||||
"3m" => "3 months ago",
|
||||
"6m" => "6 months ago",
|
||||
"1Y" => "1 year ago"
|
||||
]
|
||||
],
|
||||
"filter" => [
|
||||
"display" => "Remove duplicates",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes" // &filter=0
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"web"
|
||||
);
|
||||
|
||||
$query = json_decode($query, true);
|
||||
}else{
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$query = [
|
||||
"query" => $get["s"]
|
||||
];
|
||||
|
||||
// add filters
|
||||
if($get["nsfw"] == "no"){
|
||||
|
||||
$query["safe"] = 1;
|
||||
}
|
||||
|
||||
if($get["time"] != "any"){
|
||||
|
||||
$query["tbs"] = $get["time"];
|
||||
}
|
||||
|
||||
if($get["filter"] == "yes"){
|
||||
|
||||
$query["filter"] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://coccoc.com/search",
|
||||
$query
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get search page");
|
||||
}
|
||||
//$html = file_get_contents("scraper/coccoc.html");
|
||||
|
||||
|
||||
$html = explode("window.composerResponse", $html, 2);
|
||||
|
||||
if(count($html) !== 2){
|
||||
|
||||
throw new Exception("Failed to grep window.composerResponse");
|
||||
}
|
||||
|
||||
$html =
|
||||
json_decode(
|
||||
$this->fuckhtml
|
||||
->extract_json(
|
||||
ltrim($html[1], " =")
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
if($html === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(!isset($html["search"]["search_results"])){
|
||||
|
||||
throw new Exception("Coc Coc did not return a search_results object");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// word correction
|
||||
foreach($html["top"] as $element){
|
||||
|
||||
if(isset($element["spellChecker"][0]["query"])){
|
||||
|
||||
$out["spelling"] = [
|
||||
"type" => "not_many",
|
||||
"using" => $html["search"]["query"],
|
||||
"correction" => $element["spellChecker"][0]["query"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
foreach($html["search"]["search_results"] as $result){
|
||||
|
||||
if(isset($result["type"])){
|
||||
|
||||
switch($result["type"]){
|
||||
|
||||
//
|
||||
// Related searches
|
||||
//
|
||||
case "related_queries":
|
||||
$out["related"] = $result["queries"];
|
||||
continue 2;
|
||||
|
||||
//
|
||||
// Videos
|
||||
//
|
||||
case "video_hits":
|
||||
foreach($result["results"] as $video){
|
||||
|
||||
if(
|
||||
isset($video["image_url"]) &&
|
||||
!empty($video["image_url"])
|
||||
){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "16:9",
|
||||
"url" => $video["image_url"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$out["video"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$video["title"]
|
||||
)
|
||||
),
|
||||
"description" => null,
|
||||
"author" => [
|
||||
"name" => $video["uploader"],
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => (int)$video["date"],
|
||||
"duration" => (int)$video["duration"],
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" => $video["url"]
|
||||
];
|
||||
}
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
|
||||
if(
|
||||
!isset($result["title"]) ||
|
||||
!isset($result["url"])
|
||||
){
|
||||
|
||||
// should not happen
|
||||
continue;
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["image_url"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => $result["rich"]["data"]["image_url"],
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}
|
||||
|
||||
$sublinks = [];
|
||||
|
||||
if(isset($result["rich"]["data"]["linked_docs"])){
|
||||
|
||||
foreach($result["rich"]["data"]["linked_docs"] as $sub){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$sub["title"]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$sub["content"]
|
||||
)
|
||||
),
|
||||
"date" => null,
|
||||
"url" => $sub["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// get date
|
||||
if(isset($result["date"])){
|
||||
|
||||
$date = (int)$result["date"];
|
||||
}else{
|
||||
|
||||
$date = null;
|
||||
}
|
||||
|
||||
// probe for metadata
|
||||
$table = [];
|
||||
|
||||
if(isset($result["rich"]["data"]["rating"])){
|
||||
|
||||
$table["Rating"] = $result["rich"]["data"]["rating"];
|
||||
|
||||
if(isset($result["rich"]["data"]["num_rating"])){
|
||||
|
||||
$table["Rating"] .= " (" . number_format($result["rich"]["data"]["num_rating"]) . " ratings)";
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["views"])){
|
||||
|
||||
$table["Views"] = number_format($result["rich"]["data"]["views"]);
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["duration"])){
|
||||
|
||||
$table["Duration"] = $this->int2hms($result["rich"]["data"]["duration"]);
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["channel_name"])){
|
||||
|
||||
$table["Author"] = $result["rich"]["data"]["channel_name"];
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["video_quality"])){
|
||||
|
||||
$table["Quality"] = $result["rich"]["data"]["video_quality"];
|
||||
}
|
||||
|
||||
if(isset($result["rich"]["data"]["category"])){
|
||||
|
||||
$table["Category"] = $result["rich"]["data"]["category"];
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$result["title"]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$result["content"]
|
||||
)
|
||||
),
|
||||
"url" => $result["url"],
|
||||
"date" => $date,
|
||||
"type" => "web",
|
||||
"thumb" => $thumb,
|
||||
"sublink" => $sublinks,
|
||||
"table" => $table
|
||||
];
|
||||
}
|
||||
|
||||
//
|
||||
// Get wikipedia head
|
||||
//
|
||||
if(isset($html["right"])){
|
||||
|
||||
foreach($html["right"] as $wiki){
|
||||
|
||||
$description = [];
|
||||
|
||||
if(isset($wiki["short_intro"])){
|
||||
|
||||
$description[] =
|
||||
[
|
||||
"type" => "quote",
|
||||
"value" => $wiki["short_intro"],
|
||||
];
|
||||
}
|
||||
|
||||
if(isset($wiki["intro"])){
|
||||
|
||||
$description[] =
|
||||
[
|
||||
"type" => "text",
|
||||
"value" => $wiki["intro"],
|
||||
];
|
||||
}
|
||||
|
||||
// get table elements
|
||||
$table = [];
|
||||
|
||||
if(isset($wiki["fields"])){
|
||||
|
||||
foreach($wiki["fields"] as $element){
|
||||
|
||||
$table[$element["title"]] = implode(", ", $element["value"]);
|
||||
}
|
||||
}
|
||||
|
||||
// get sublinks
|
||||
$sublinks = [];
|
||||
|
||||
if(isset($wiki["website"])){
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/^http/',
|
||||
$wiki["website"]
|
||||
) === 0
|
||||
){
|
||||
|
||||
$sublinks["Website"] = "https://" . $wiki["website"];
|
||||
}else{
|
||||
|
||||
$sublinks["Website"] = $wiki["website"];
|
||||
}
|
||||
}
|
||||
|
||||
foreach($wiki["profiles"] as $sitename => $url){
|
||||
|
||||
$sitename = explode("_", $sitename);
|
||||
$sitename = ucfirst($sitename[count($sitename) - 1]);
|
||||
|
||||
$sublinks[$sitename] = $url;
|
||||
}
|
||||
|
||||
$out["answer"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$wiki["title"]
|
||||
),
|
||||
"description" => $description,
|
||||
"url" => null,
|
||||
"thumb" => isset($wiki["image"]["contentUrl"]) ? $wiki["image"]["contentUrl"] : null,
|
||||
"table" => $table,
|
||||
"sublink" => $sublinks
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// get next page
|
||||
if((int)$html["search"]["page"] < (int)$html["search"]["max_page"]){
|
||||
|
||||
// https://coccoc.com/composer?_=1754021153532&p=0&q=zbabduiqwhduwqhdnwq&reqid=bwcAs00q&s=direct&apiV=1
|
||||
// ^json endpoint, but we can just do &page=2 lol
|
||||
|
||||
if(!isset($query["page"])){
|
||||
|
||||
$query["page"] = 2;
|
||||
}else{
|
||||
|
||||
$query["page"]++;
|
||||
}
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend
|
||||
->store(
|
||||
json_encode($query),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function video($get){
|
||||
|
||||
//$html = file_get_contents("scraper/coccoc.html");
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"videos"
|
||||
);
|
||||
|
||||
$query = json_decode($query, true);
|
||||
}else{
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$query = [
|
||||
"query" => $get["s"],
|
||||
"tbm" => "vid"
|
||||
];
|
||||
|
||||
// add filters
|
||||
if($get["nsfw"] == "no"){
|
||||
|
||||
$query["safe"] = 1;
|
||||
}
|
||||
|
||||
if($get["time"] != "any"){
|
||||
|
||||
$query["tbs"] = $get["time"];
|
||||
}
|
||||
|
||||
if($get["filter"] == "yes"){
|
||||
|
||||
$query["filter"] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://coccoc.com/search",
|
||||
$query
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get search page");
|
||||
}
|
||||
|
||||
$html = explode("window.composerResponse", $html, 2);
|
||||
|
||||
if(count($html) !== 2){
|
||||
|
||||
throw new Exception("Failed to grep window.composerResponse");
|
||||
}
|
||||
|
||||
$html =
|
||||
json_decode(
|
||||
$this->fuckhtml
|
||||
->extract_json(
|
||||
ltrim($html[1], " =")
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
if($html === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"video" => [],
|
||||
"author" => [],
|
||||
"livestream" => [],
|
||||
"playlist" => [],
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
if(!isset($html["search_video"]["search_results"])){
|
||||
|
||||
if(isset($html["search_video"]["error"]["title"])){
|
||||
|
||||
if($html["search_video"]["error"]["title"] == "Không tìm thấy kết quả nào"){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
throw new Exception("Coc Coc returned an error: " . $html["search_video"]["error"]["title"]);
|
||||
}
|
||||
|
||||
throw new Exception("Coc Coc did not supply a search_results object");
|
||||
}
|
||||
|
||||
foreach($html["search_video"]["search_results"] as $video){
|
||||
|
||||
if(isset($video["rich"]["data"]["image_url"])){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "16:9",
|
||||
"url" => $video["rich"]["data"]["image_url"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$out["video"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$video["title"]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$video["content"]
|
||||
)
|
||||
),
|
||||
"author" => [
|
||||
"name" =>
|
||||
isset($video["rich"]["data"]["channel_name"]) ?
|
||||
$video["rich"]["data"]["channel_name"] : null,
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" =>
|
||||
isset($video["date"]) ?
|
||||
$video["date"] : null,
|
||||
"duration" =>
|
||||
isset($video["rich"]["data"]["duration"]) ?
|
||||
(int)$video["rich"]["data"]["duration"] : null,
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" => $video["url"]
|
||||
];
|
||||
}
|
||||
|
||||
// get next page
|
||||
if((int)$html["search_video"]["page"] < (int)$html["search_video"]["max_page"]){
|
||||
|
||||
if(!isset($query["page"])){
|
||||
|
||||
$query["page"] = 2;
|
||||
}else{
|
||||
|
||||
$query["page"]++;
|
||||
}
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend
|
||||
->store(
|
||||
json_encode($query),
|
||||
"videos",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
return trim($title, " .\t\n\r\0\x0B…");
|
||||
}
|
||||
|
||||
private function int2hms($seconds){
|
||||
|
||||
$hours = floor($seconds / 3600);
|
||||
$minutes = floor(($seconds % 3600) / 60);
|
||||
$seconds = $seconds % 60;
|
||||
|
||||
return sprintf("%02d:%02d:%02d", $hours, $minutes, $seconds);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue