diff --git a/lib/frontend.php b/lib/frontend.php index c5edc0e..82fd4bd 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -969,7 +969,7 @@ class frontend{ "qwant" => "Qwant", "yep" => "Yep", "solofield" => "Solofield", - //"pinterest" => "Pinterest", + "pinterest" => "Pinterest", "imgur" => "Imgur", "ftm" => "FindThatMeme" ] diff --git a/scraper/pinterest.php b/scraper/pinterest.php index 3787f77..4188bce 100644 --- a/scraper/pinterest.php +++ b/scraper/pinterest.php @@ -13,31 +13,104 @@ class pinterest{ return []; } - private function get($proxy, $url, $get = []){ + private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){ $curlproc = curl_init(); - if($get !== []){ + if($header_data_post === null){ + + // handling GET + + // extract cookies + $cookies_tmp = []; + curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){ + + $length = strlen($header); + + $header = explode(":", $header, 2); + + if(trim(strtolower($header[0])) == "set-cookie"){ + + $cookie_tmp = explode("=", trim($header[1]), 2); + + $cookies_tmp[trim($cookie_tmp[0])] = + explode(";", $cookie_tmp[1], 2)[0]; + } + + return $length; + }); + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: application/json, text/javascript, */*, q=0.01", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Referer: https://ca.pinterest.com/", + "X-Requested-With: XMLHttpRequest", + "X-APP-VERSION: 78f8764", + "X-Pinterest-AppState: active", + "X-Pinterest-Source-Url: /", + "X-Pinterest-PWS-Handler: www/index.js", + "screen-dpr: 1", + "is-preload-enabled: 1", + "DNT: 1", + "Sec-GPC: 1", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Connection: keep-alive", + "Alt-Used: ca.pinterest.com", + "Priority: u=0", + "TE: trailers"] + ); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + }else{ + + // handling POST (pagination) $get = http_build_query($get); - $url .= "?" . $get; + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: application/json, text/javascript, */*, q=0.01", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Content-Type: application/x-www-form-urlencoded", + "Content-Length: " . strlen($get), + "Referer: https://ca.pinterest.com/", + "X-Requested-With: XMLHttpRequest", + "X-APP-VERSION: 78f8764", + "X-CSRFToken: " . $cookies["csrf"], + "X-Pinterest-AppState: active", + "X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post), + "X-Pinterest-PWS-Handler: www/search/[scope].js", + "screen-dpr: 1", + "is-preload-enabled: 1", + "Origin: https://ca.pinterest.com", + "DNT: 1", + "Sec-GPC: 1", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Connection: keep-alive", + "Alt-Used: ca.pinterest.com", + "Cookie: " . $cookies["cookie"], + "TE: trailers"] + ); + + curl_setopt($curlproc, CURLOPT_POST, true); + curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get); } curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding - curl_setopt($curlproc, CURLOPT_HTTPHEADER, - ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", - "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", - "Accept-Language: en-US,en;q=0.5", - "Accept-Encoding: gzip", - "DNT: 1", - "Connection: keep-alive", - "Upgrade-Insecure-Requests: 1", - "Sec-Fetch-Dest: document", - "Sec-Fetch-Mode: navigate", - "Sec-Fetch-Site: none", - "Sec-Fetch-User: ?1"] - ); + + // http2 bypass + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); @@ -54,6 +127,26 @@ class pinterest{ throw new Exception(curl_error($curlproc)); } + if($header_data_post === null){ + + if(!isset($cookies_tmp["csrftoken"])){ + + throw new Exception("Failed to grep CSRF token"); + } + + $cookies = ""; + + foreach($cookies_tmp as $cookie_name => $cookie_value){ + + $cookies .= $cookie_name . "=" . $cookie_value . "; "; + } + + $cookies = [ + "csrf" => $cookies_tmp["csrftoken"], + "cookie" => rtrim($cookies, " ;") + ]; + } + curl_close($curlproc); return $data; } @@ -62,17 +155,68 @@ class pinterest{ if($get["npt"]){ - // @TODO - // post data for next page - $data = [ - "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", - "data" => - json_encode( + [$data, $proxy] = + $this->backend->get( + $get["npt"], "images" + ); + + $data = json_decode($data, true); + + $search = $data["q"]; + $cookies = $data["cookies"]; + + try{ + $json = + $this->get( + $proxy, + "https://ca.pinterest.com/resource/BaseSearchResource/get/", [ - // {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}} - ] + "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", + "data" => json_encode( + [ + "options" => [ + "applied_unified_filters" => null, + "appliedProductFilters" => "---", + "article" => null, + "auto_correction_disabled" => false, + "corpus" => null, + "customized_rerank_type" => null, + "domains" => null, + "dynamicPageSizeExpGroup" => null, + "filters" => null, + "journey_depth" => null, + "page_size" => null, + "price_max" => null, + "price_min" => null, + "query_pin_sigs" => null, + "query" => $data["q"], + "redux_normalize_feed" => true, + "request_params" => null, + "rs" => "typed", + "scope" => "pins", + "selected_one_bar_modules" => null, + "source_id" => null, + "source_module_id" => null, + "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", + "top_pin_id" => null, + "top_pin_ids" => null, + "bookmarks" => [ + $data["bookmark"] + ] + ], + "context" => [] + ], + JSON_UNESCAPED_SLASHES + ) + ], + $cookies, + $search ); - ]; + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } }else{ @@ -81,27 +225,45 @@ class pinterest{ throw new Exception("Search term is empty!"); } + + // https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987 + // source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac + // &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D + // &_=1736116313987 + + $source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search); $filter = [ - "source_url" => "/search/pins/?q=" . urlencode($search), + "source_url" => $source_url, "rs" => "typed", "data" => json_encode( [ "options" => [ - "article" => null, - "applied_filters" => null, + "applied_unified_filters" => null, "appliedProductFilters" => "---", - "auto_correction_disabled" => false, + "article" => null, "corpus" => null, "customized_rerank_type" => null, + "domains" => null, + "dynamicPageSizeExpGroup" => null, "filters" => null, - "query" => $search, + "journey_depth" => null, + "page_size" => null, + "price_max" => null, + "price_min" => null, "query_pin_sigs" => null, + "query" => $search, "redux_normalize_feed" => true, - "rs" => "typed", + "request_params" => null, + "rs" => "ac", "scope" => "pins", // pins, boards, videos, - "source_id" => null + "selected_one_bar_modules" => null, + "source_id" => null, + "source_module_id" => null, + "source_url" => $source_url, + "top_pin_id" => null, + "top_pin_ids" => null ], "context" => [] ] @@ -110,24 +272,26 @@ class pinterest{ ]; $proxy = $this->backend->get_ip(); - } - - try{ - $json = - json_decode( + $cookies = []; + + try{ + $json = $this->get( $proxy, - "https://www.pinterest.ca/resource/BaseSearchResource/get/", - $filter - ), - true - ); - - }catch(Exception $error){ - - throw new Exception("Failed to fetch JSON"); + "https://ca.pinterest.com/resource/BaseSearchResource/get/", + $filter, + $cookies, + null + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } } + $json = json_decode($json, true); + if($json === null){ throw new Exception("Failed to decode JSON"); @@ -139,6 +303,60 @@ class pinterest{ "image" => [] ]; + if( + !isset( + $json["resource_response"] + ["status"] + ) + ){ + + throw new Exception("Unknown API failure"); + } + + if($json["resource_response"]["status"] != "success"){ + + $status = "Got non-OK response: " . $json["resource_response"]["status"]; + + if( + isset( + $json["resource_response"]["message"] + ) + ){ + + $status .= " - " . $json["resource_response"]["message"]; + } + + throw new Exception($status); + } + + if( + isset( + $json["resource_response"]["sensitivity"] + ["notices"][0]["description"]["text"] + ) + ){ + + throw new Exception( + "Pinterest returned a notice: " . + $json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"] + ); + } + + // get NPT + if(isset($json["resource_response"]["bookmark"])){ + + $out["npt"] = + $this->backend->store( + json_encode([ + "q" => $search, + "bookmark" => $json["resource_response"]["bookmark"], + "cookies" => $cookies + ]), + "images", + $proxy + ); + } + foreach( $json ["resource_response"] @@ -150,6 +368,7 @@ class pinterest{ switch($item["type"]){ case "pin": + case "board": /* Handle image object @@ -206,42 +425,15 @@ class pinterest{ "height" => (int)$thumb["height"] ] ], - "url" => "https://www.pinterest.com/pin/" . $item["id"] + "url" => + $item["link"] === null ? + "https://ca.pinterest.com/pin/" . $item["id"] : + $item["link"] ]; break; - - case "board": - if(isset($item["cover_pin"]["image_url"])){ - - $image = [ - "url" => $item["cover_pin"]["image_url"], - "width" => (int)$item["cover_pin"]["size"][0], - "height" => (int)$item["cover_pin"]["size"][1] - ]; - }elseif(isset($item["image_cover_url_hd"])){ - /* - $image = [ - "url" => - "width" => null, - "height" => null - ];*/ - } - break; } } return $out; } - - private function getfullresimage($image, $has_og){ - - $has_og = $has_og ? "1200x" : "originals"; - - return - preg_replace( - '/https:\/\/i\.pinimg\.com\/[^\/]+\//', - "https://i.pinimg.com/" . $has_og . "/", - $image - ); - } } diff --git a/settings.php b/settings.php index 6bd6c0f..a3db7c4 100644 --- a/settings.php +++ b/settings.php @@ -227,10 +227,10 @@ $settings = [ "value" => "solofield", "text" => "Solofield" ], - /*[ + [ "value" => "pinterest", "text" => "Pinterest" - ],*/ + ], [ "value" => "imgur", "text" => "Imgur"