soundcloud fix, for good this time
This commit is contained in:
parent
77931f3ee9
commit
e252bf4fce
103
scraper/sc.php
103
scraper/sc.php
|
@ -6,6 +6,9 @@ class sc{
|
||||||
|
|
||||||
include "lib/backend.php";
|
include "lib/backend.php";
|
||||||
$this->backend = new backend("sc");
|
$this->backend = new backend("sc");
|
||||||
|
|
||||||
|
include "lib/fuckhtml.php";
|
||||||
|
$this->fuckhtml = new fuckhtml();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getfilters($page){
|
public function getfilters($page){
|
||||||
|
@ -25,7 +28,7 @@ class sc{
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
private function get($proxy, $url, $get = [], $web_req = false){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
|
@ -37,19 +40,42 @@ class sc{
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
|
||||||
["User-Agent: " . config::USER_AGENT,
|
// use http2
|
||||||
"Accept: application/json, text/javascript, */*; q=0.01",
|
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding: gzip",
|
if($web_req === false){
|
||||||
"Referer: https://soundcloud.com/",
|
|
||||||
"Origin: https://soundcloud.com",
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
"DNT: 1",
|
["User-Agent: " . config::USER_AGENT,
|
||||||
"Connection: keep-alive",
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
"Sec-Fetch-Dest: empty",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"Sec-Fetch-Mode: cors",
|
"Accept-Encoding: gzip",
|
||||||
"Sec-Fetch-Site: same-site"]
|
"Referer: https://soundcloud.com/",
|
||||||
);
|
"Origin: https://soundcloud.com",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Sec-Fetch-Dest: empty",
|
||||||
|
"Sec-Fetch-Mode: cors",
|
||||||
|
"Sec-Fetch-Site: same-site"]
|
||||||
|
);
|
||||||
|
}else{
|
||||||
|
|
||||||
|
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||||
|
["User-Agent: " . config::USER_AGENT,
|
||||||
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding: gzip",
|
||||||
|
"DNT: 1",
|
||||||
|
"Connection: keep-alive",
|
||||||
|
"Upgrade-Insecure-Requests: 1",
|
||||||
|
"Sec-Fetch-Dest: document",
|
||||||
|
"Sec-Fetch-Mode: navigate",
|
||||||
|
"Sec-Fetch-Site: cross-site",
|
||||||
|
"Priority: u=1",
|
||||||
|
"TE: trailers"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||||
|
@ -396,13 +422,47 @@ class sc{
|
||||||
|
|
||||||
$token = apcu_fetch("sc_token");
|
$token = apcu_fetch("sc_token");
|
||||||
|
|
||||||
if($token === false){
|
if($token !== false){
|
||||||
|
|
||||||
|
return $token;
|
||||||
|
}
|
||||||
|
|
||||||
|
// search through all javascript components on the main page
|
||||||
|
try{
|
||||||
|
$html =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://soundcloud.com",
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to fetch front page");
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
$scripts =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByTagName(
|
||||||
|
"script"
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach($scripts as $script){
|
||||||
|
|
||||||
|
if(
|
||||||
|
!isset($script["attributes"]["src"]) ||
|
||||||
|
strpos($script["attributes"]["src"], "sndcdn.com") === false
|
||||||
|
){
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
try{
|
try{
|
||||||
$js =
|
$js =
|
||||||
$this->get(
|
$this->get(
|
||||||
$proxy,
|
$proxy,
|
||||||
"https://a-v2.sndcdn.com/assets/0-a901c1e0.js",
|
$script["attributes"]["src"],
|
||||||
[]
|
[]
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
@ -416,16 +476,15 @@ class sc{
|
||||||
$token
|
$token
|
||||||
);
|
);
|
||||||
|
|
||||||
if(!isset($token[1])){
|
if(isset($token[1])){
|
||||||
|
|
||||||
throw new Exception("Failed to get search token");
|
apcu_store("sc_token", $token[1]);
|
||||||
|
return $token[1];
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
apcu_store("sc_token", $token[1]);
|
|
||||||
return $token[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $token;
|
throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
|
||||||
}
|
}
|
||||||
|
|
||||||
private function limitstrlen($text){
|
private function limitstrlen($text){
|
||||||
|
|
Loading…
Reference in New Issue