diff --git a/lib/frontend.php b/lib/frontend.php index 7bf650d..9476bf6 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -939,6 +939,7 @@ class frontend{ "brave" => "Brave", "yandex" => "Yandex", "google" => "Google", + "google_cse" => "Google CSE", "startpage" => "Startpage", "qwant" => "Qwant", "ghostery" => "Ghostery", @@ -963,6 +964,7 @@ class frontend{ "yandex" => "Yandex", "brave" => "Brave", "google" => "Google", + "google_cse" => "Google CSE", "startpage" => "Startpage", "qwant" => "Qwant", "yep" => "Yep", @@ -1326,7 +1328,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy?i=" . urlencode($image) . "&s=" . $format; + return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/scraper/google_cse.php b/scraper/google_cse.php new file mode 100644 index 0000000..d3b0408 --- /dev/null +++ b/scraper/google_cse.php @@ -0,0 +1,1008 @@ +backend = new backend("google_cse"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + } + + public function getfilters($page){ + + $base = [ + "country" => [ // gl= (image: cr=countryAF) + "display" => "Country", + "option" => [ + "any" => "Any country", + "af" => "Afghanistan", + "al" => "Albania", + "dz" => "Algeria", + "as" => "American Samoa", + "ad" => "Andorra", + "ao" => "Angola", + "ai" => "Anguilla", + "aq" => "Antarctica", + "ag" => "Antigua and Barbuda", + "ar" => "Argentina", + "am" => "Armenia", + "aw" => "Aruba", + "au" => "Australia", + "at" => "Austria", + "az" => "Azerbaijan", + "bs" => "Bahamas", + "bh" => "Bahrain", + "bd" => "Bangladesh", + "bb" => "Barbados", + "by" => "Belarus", + "be" => "Belgium", + "bz" => "Belize", + "bj" => "Benin", + "bm" => "Bermuda", + "bt" => "Bhutan", + "bo" => "Bolivia", + "ba" => "Bosnia and Herzegovina", + "bw" => "Botswana", + "bv" => "Bouvet Island", + "br" => "Brazil", + "io" => "British Indian Ocean Territory", + "bn" => "Brunei Darussalam", + "bg" => "Bulgaria", + "bf" => "Burkina Faso", + "bi" => "Burundi", + "kh" => "Cambodia", + "cm" => "Cameroon", + "ca" => "Canada", + "cv" => "Cape Verde", + "ky" => "Cayman Islands", + "cf" => "Central African Republic", + "td" => "Chad", + "cl" => "Chile", + "cn" => "China", + "cx" => "Christmas Island", + "cc" => "Cocos (Keeling) Islands", + "co" => "Colombia", + "km" => "Comoros", + "cg" => "Congo", + "cd" => "Congo, the Democratic Republic", + "ck" => "Cook Islands", + "cr" => "Costa Rica", + "ci" => "Cote D'ivoire", + "hr" => "Croatia", + "cu" => "Cuba", + "cy" => "Cyprus", + "cz" => "Czech Republic", + "dk" => "Denmark", + "dj" => "Djibouti", + "dm" => "Dominica", + "do" => "Dominican Republic", + "ec" => "Ecuador", + "eg" => "Egypt", + "sv" => "El Salvador", + "gq" => "Equatorial Guinea", + "er" => "Eritrea", + "ee" => "Estonia", + "et" => "Ethiopia", + "fk" => "Falkland Islands (Malvinas)", + "fo" => "Faroe Islands", + "fj" => "Fiji", + "fi" => "Finland", + "fr" => "France", + "gf" => "French Guiana", + "pf" => "French Polynesia", + "tf" => "French Southern Territories", + "ga" => "Gabon", + "gm" => "Gambia", + "ge" => "Georgia", + "de" => "Germany", + "gh" => "Ghana", + "gi" => "Gibraltar", + "gr" => "Greece", + "gl" => "Greenland", + "gd" => "Grenada", + "gp" => "Guadeloupe", + "gu" => "Guam", + "gt" => "Guatemala", + "gn" => "Guinea", + "gw" => "Guinea-Bissau", + "gy" => "Guyana", + "ht" => "Haiti", + "hm" => "Heard Island and Mcdonald Islands", + "va" => "Holy See (Vatican City State)", + "hn" => "Honduras", + "hk" => "Hong Kong", + "hu" => "Hungary", + "is" => "Iceland", + "in" => "India", + "id" => "Indonesia", + "ir" => "Iran, Islamic Republic", + "iq" => "Iraq", + "ie" => "Ireland", + "il" => "Israel", + "it" => "Italy", + "jm" => "Jamaica", + "jp" => "Japan", + "jo" => "Jordan", + "kz" => "Kazakhstan", + "ke" => "Kenya", + "ki" => "Kiribati", + "kp" => "Korea, Democratic People's Republic", + "kr" => "Korea, Republic", + "kw" => "Kuwait", + "kg" => "Kyrgyzstan", + "la" => "Lao People's Democratic Republic", + "lv" => "Latvia", + "lb" => "Lebanon", + "ls" => "Lesotho", + "lr" => "Liberia", + "ly" => "Libyan Arab Jamahiriya", + "li" => "Liechtenstein", + "lt" => "Lithuania", + "lu" => "Luxembourg", + "mo" => "Macao", + "mk" => "Macedonia, the Former Yugosalv Republic", + "mg" => "Madagascar", + "mw" => "Malawi", + "my" => "Malaysia", + "mv" => "Maldives", + "ml" => "Mali", + "mt" => "Malta", + "mh" => "Marshall Islands", + "mq" => "Martinique", + "mr" => "Mauritania", + "mu" => "Mauritius", + "yt" => "Mayotte", + "mx" => "Mexico", + "fm" => "Micronesia, Federated States", + "md" => "Moldova, Republic", + "mc" => "Monaco", + "mn" => "Mongolia", + "ms" => "Montserrat", + "ma" => "Morocco", + "mz" => "Mozambique", + "mm" => "Myanmar", + "na" => "Namibia", + "nr" => "Nauru", + "np" => "Nepal", + "nl" => "Netherlands", + "an" => "Netherlands Antilles", + "nc" => "New Caledonia", + "nz" => "New Zealand", + "ni" => "Nicaragua", + "ne" => "Niger", + "ng" => "Nigeria", + "nu" => "Niue", + "nf" => "Norfolk Island", + "mp" => "Northern Mariana Islands", + "no" => "Norway", + "om" => "Oman", + "pk" => "Pakistan", + "pw" => "Palau", + "ps" => "Palestinian Territory, Occupied", + "pa" => "Panama", + "pg" => "Papua New Guinea", + "py" => "Paraguay", + "pe" => "Peru", + "ph" => "Philippines", + "pn" => "Pitcairn", + "pl" => "Poland", + "pt" => "Portugal", + "pr" => "Puerto Rico", + "qa" => "Qatar", + "re" => "Reunion", + "ro" => "Romania", + "ru" => "Russian Federation", + "rw" => "Rwanda", + "sh" => "Saint Helena", + "kn" => "Saint Kitts and Nevis", + "lc" => "Saint Lucia", + "pm" => "Saint Pierre and Miquelon", + "vc" => "Saint Vincent and the Grenadines", + "ws" => "Samoa", + "sm" => "San Marino", + "st" => "Sao Tome and Principe", + "sa" => "Saudi Arabia", + "sn" => "Senegal", + "cs" => "Serbia and Montenegro", + "sc" => "Seychelles", + "sl" => "Sierra Leone", + "sg" => "Singapore", + "sk" => "Slovakia", + "si" => "Slovenia", + "sb" => "Solomon Islands", + "so" => "Somalia", + "za" => "South Africa", + "gs" => "South Georgia and the South Sandwich Islands", + "es" => "Spain", + "lk" => "Sri Lanka", + "sd" => "Sudan", + "sr" => "Suriname", + "sj" => "Svalbard and Jan Mayen", + "sz" => "Swaziland", + "se" => "Sweden", + "ch" => "Switzerland", + "sy" => "Syrian Arab Republic", + "tw" => "Taiwan, Province of China", + "tj" => "Tajikistan", + "tz" => "Tanzania, United Republic", + "th" => "Thailand", + "tl" => "Timor-Leste", + "tg" => "Togo", + "tk" => "Tokelau", + "to" => "Tonga", + "tt" => "Trinidad and Tobago", + "tn" => "Tunisia", + "tr" => "Turkey", + "tm" => "Turkmenistan", + "tc" => "Turks and Caicos Islands", + "tv" => "Tuvalu", + "ug" => "Uganda", + "ua" => "Ukraine", + "ae" => "United Arab Emirates", + "uk" => "United Kingdom", + "us" => "United States", + "um" => "United States Minor Outlying Islands", + "uy" => "Uruguay", + "uz" => "Uzbekistan", + "vu" => "Vanuatu", + "ve" => "Venezuela", + "vn" => "Viet Nam", + "vg" => "Virgin Islands, British", + "vi" => "Virgin Islands, U.S.", + "wf" => "Wallis and Futuna", + "eh" => "Western Sahara", + "ye" => "Yemen", + "zm" => "Zambia", + "zw" => "Zimbabwe" + ] + ], + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", // safe=active + "no" => "No" // safe=off + ] + ] + ]; + + switch($page){ + + case "web": + return array_merge( + $base, + [ + "lang" => [ // lr= (prefix lang with "lang_") + "display" => "Language", + "option" => [ + "any" => "Any language", + "ar" => "Arabic", + "bg" => "Bulgarian", + "ca" => "Catalan", + "cs" => "Czech", + "da" => "Danish", + "de" => "German", + "el" => "Greek", + "en" => "English", + "es" => "Spanish", + "et" => "Estonian", + "fi" => "Finnish", + "fr" => "French", + "hr" => "Croatian", + "hu" => "Hungarian", + "id" => "Indonesian", + "is" => "Icelandic", + "it" => "Italian", + "iw" => "Hebrew", + "ja" => "Japanese", + "ko" => "Korean", + "lt" => "Lithuanian", + "lv" => "Latvian", + "nl" => "Dutch", + "no" => "Norwegian", + "pl" => "Polish", + "pt" => "Portuguese", + "ro" => "Romanian", + "ru" => "Russian", + "sk" => "Slovak", + "sl" => "Slovenian", + "sr" => "Serbian", + "sv" => "Swedish", + "tr" => "Turkish", + "zh-CN" => "Chinese (Simplified)", + "zh-TW" => "Chinese (Traditional)" + ] + ], + "sort" => [ + "display" => "Sort by", + "option" => [ + "relevance" => "Relevance", + "date" => "Date" + ] + ], + "redundant" => [ + "display" => "Remove redundant", + "option" => [ + "yes" => "Yes", + "no" => "No", + ] + ] + ] + ); + break; + + case "images": + return array_merge( + $base, + [ + "size" => [ // imgsz + "display" => "Size", + "option" => [ + "any" => "Any size", + "l" => "Large", + "m" => "Medium", + "i" => "Icon", + "qsvga" => "Larger than 400x300", + "vga" => "Larger than 640x480", + "svga" => "Larger than 800x600", + "xga" => "Larger than 1024x768", + "2mp" => "Larger than 2MP", + "4mp" => "Larger than 4MP", + "6mp" => "Larger than 6MP", + "8mp" => "Larger than 8MP", + "10mp" => "Larger than 10MP", + "12mp" => "Larger than 12MP", + "15mp" => "Larger than 15MP", + "20mp" => "Larger than 20MP", + "40mp" => "Larger than 40MP", + "70mp" => "Larger than 70MP" + ] + ], + "color" => [ // imgc + "display" => "Color", + "option" => [ + "any" => "Any color", + "color" => "Full color", + "bnw" => "Black & white", + "trans" => "Transparent", + // from here, imgcolor + "red" => "Red", + "orange" => "Orange", + "yellow" => "Yellow", + "green" => "Green", + "teal" => "Teal", + "blue" => "Blue", + "purple" => "Purple", + "pink" => "Pink", + "white" => "White", + "gray" => "Gray", + "black" => "Black", + "brown" => "Brown" + ] + ], + "format" => [ // as_filetype + "display" => "Format", + "option" => [ + "any" => "Any format", + "jpg" => "JPG", + "gif" => "GIF", + "png" => "PNG", + "bmp" => "BMP", + "svg" => "SVG", + "webp" => "WEBP", + "ico" => "ICO", + "craw" => "RAW" + ] + ] + ] + ); + break; + } + } + + private function get($proxy, $url, $get = [], $reqtype = self::req_js){ + + $curlproc = curl_init(); + + if($get !== []){ + + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + // http2 bypass + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + + if($reqtype === self::req_js){ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Sec-GPC: 1", + "Alt-Used: cse.google.com", + "Connection: keep-alive", + "Referer: https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT, + "Sec-Fetch-Dest: script", + "Sec-Fetch-Mode: no-cors", + "Sec-Fetch-Site: same-origin", + "TE: trailers"] + ); + }else{ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: none", + "Sec-Fetch-User: ?1", + "Priority: u=0, i"] + ); + } + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function web($get){ + + // page 1 + // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&oq=asmr&gs_l=partner-web.3..0i512i433j0i512i433i131l2j0i512i433j0i512i433i131j0i512i433j0i512i433i131l2j0i512l2.10902.266627.5.267157.11.10.0.0.0.0.188.1108.2j7.9.0.csems%2Cnrl%3D10...0....1.34.partner-web..42.14.1500.WJQvMvfXkx4&cseclient=hosted-page-client&callback=google.search.cse.api8223&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D + + // page 2 + // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&start=10&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&callback=google.search.cse.api3595&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D + + if($get["npt"]){ + + [$req_params, $proxy] = + $this->backend->get( + $get["npt"], + "web" + ); + + $req_params = + json_decode( + $req_params, + true + ); + + $json = + $this->get( + $proxy, + "https://cse.google.com/cse/element/v1", + $req_params, + self::req_js + ); + + }else{ + + $proxy = $this->backend->get_ip(); + $params = $this->generate_token($proxy); + + //$json = file_get_contents("scraper/google_cse.txt"); + $req_params = [ + "rsz" => "filtered_cse", + "num" => 20, + "hl" => "en", + "source" => "gcsc", + "cselibv" => $params["lib"], + "cx" => config::GOOGLE_CX_ENDPOINT, + "q" => $get["s"], + "safe" => $get["nsfw"] == "yes" ? "off" : "active", + "cse_tok" => $params["token"], + "lr" => $get["lang"] == "any" ? "" : "lang_" . $get["lang"], + "cr" => $get["country"] == "any" ? "" : "country" . strtoupper($get["country"]), + "gl" => "", + "filter" => $get["redundant"] == "yes" ? "1" : "0", + "sort" => $get["sort"] == "relevance" ? "" : "date", + "as_oq" => "", + "as_sitesearch" => "", + "exp" => "cc,apo", + "oq" => $get["s"], + "gs_l" => "partner-web.3...33294.34225.3.34597.26.11.0.0.0.0.201.1132.6j4j1.11.0.csems,nrl=10...0....1.34.partner-web..34.19.1897.FKEeG5yh2iw", + "cseclient" => "hosted-page-client", + "callback" => "google.search.cse.api" . random_int(4000, 99999), + "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=0&gsc.q=" . $get["s"] . "&gsc.sort=" + ]; + + $json = + $this->get( + $proxy, + "https://cse.google.com/cse/element/v1", + $req_params, + self::req_js + ); + + unset($req_params["gs_l"]); + $req_params["start"] = 0; + } + + $req_params["start"] += 20; + + if( + !preg_match( + '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i', + $json, + $json + ) + ){ + + throw new Exception("Failed to grep JSON"); + } + + $json = json_decode($json[1], true); + + if(isset($json["error"])){ + + if(isset($json["error"]["errors"][0]["message"])){ + + throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]); + } + + if(isset($json["error"]["message"])){ + + throw new Exception("Google returned an error: " . $json["error"]["message"]); + } + + throw new Exception("Google returned an error object"); + } + + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + if(!isset($json["results"])){ + + return $out; + } + + foreach($json["results"] as $result){ + + // get date from description + $description = + explode( + "...", + trim($result["contentNoFormatting"], " ."), + 2 + ); + + if(count($description) === 2){ + + if($date = strtotime($description[0])){ + + $description = ltrim($description[1]); + }else{ + + $date = null; + $description = implode("...", $description); + } + }else{ + + $description = implode("...", $description); + $date = null; + } + + $description = trim($description, " ."); + + // get thumbnails + if(isset($result["richSnippet"]["cseThumbnail"]["src"])){ + + $thumb = [ + "url" => $this->unshit_thumb($result["richSnippet"]["cseThumbnail"]["src"]), + "ratio" => "1:1" + ]; + } + elseif(isset($result["richSnippet"]["cseImage"]["src"])){ + + $thumb = [ + "url" => $result["richSnippet"]["cseImage"]["src"], + "ratio" => "1:1" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + } + + if($thumb["url"] !== null){ + + $found_size = false; + + // find correct ratio + + if( + isset($result["richSnippet"]["cseThumbnail"]["width"]) && + isset($result["richSnippet"]["cseThumbnail"]["height"]) + ){ + $found_size = true; + $width = (int)$result["richSnippet"]["cseThumbnail"]["width"]; + $height = (int)$result["richSnippet"]["cseThumbnail"]["height"]; + } + elseif( + isset($result["richSnippet"]["metatags"]["ogImageWidth"]) && + isset($result["richSnippet"]["metatags"]["ogImageHeight"]) + ){ + $found_size = true; + $width = (int)$result["richSnippet"]["metatags"]["ogImageWidth"]; + $height = (int)$result["richSnippet"]["metatags"]["ogImageHeight"]; + } + + // calculate rounded ratio + if($found_size){ + + $aspect_ratio = $width / $height; + + if($aspect_ratio >= 1.5){ + + $thumb["ratio"] = "16:9"; + } + elseif($aspect_ratio >= 0.8){ + + $thumb["ratio"] = "1:1"; + }else{ + + $thumb["ratio"] = "9:16"; + } + } + } + + $out["web"][] = [ + "title" => rtrim($result["titleNoFormatting"], " ."), + "description" => $description, + "url" => $result["unescapedUrl"], + "date" => $date, + "type" => "web", + "thumb" => $thumb, + "sublink" => [], + "table" => [] + ]; + } + + // detect next page + if( + isset($json["cursor"]["isExactTotalResults"]) || // detects last page + !isset($json["cursor"]["pages"]) // detects no results on page + ){ + + return $out; + } + + // get next page + $out["npt"] = + $this->backend->store( + json_encode( + $req_params + ), + "web", + $proxy + ); + + return $out; + } + + public function image($get){ + + + + if($get["npt"]){ + + [$req_params, $proxy] = + $this->backend->get( + $get["npt"], + "images" + ); + + $req_params = + json_decode( + $req_params, + true + ); + + $json = + $this->get( + $proxy, + "https://cse.google.com/cse/element/v1", + $req_params, + self::req_js + ); + + }else{ + + $proxy = $this->backend->get_ip(); + $params = $this->generate_token($proxy); + + //$json = file_get_contents("scraper/google_cse.txt"); + $req_params = [ + "rsz" => "filtered_cse", + "num" => 20, + "hl" => "en", + "source" => "gcsc", + "cselibv" => $params["lib"], + "searchtype" => "image", + "cx" => config::GOOGLE_CX_ENDPOINT, + "q" => $get["s"], + "safe" => $get["nsfw"] == "yes" ? "off" : "active", + "cse_tok" => $params["token"], + "exp" => "cc,apo", + "cseclient" => "hosted-page-client", + "callback" => "google.search.cse.api" . random_int(4000, 99999), + "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=1&gsc.q=" . $get["s"] . "&gsc.sort=" + ]; + + // add additional hidden filters + + // country (image search uses cr instead of gl) + if($get["country"] != "any"){ + + $req_params["cr"] = "country" . strtoupper($get["country"]); + } + + // nsfw + $req_params["safe"] = $get["nsfw"] == "yes" ? "off" : "active"; + + // size + if($get["size"] != "any"){ + + $req_params["imgsz"] = $get["size"]; + } + + // format + if($get["format"] != "any"){ + + $req_params["as_filetype"] = $get["format"]; + } + + // color + if($get["color"] != "any"){ + + if( + $get["color"] == "color" || + $get["color"] == "trans" + ){ + + $req_params["imgc"] = $get["color"]; + }elseif($get["color"] == "bnw"){ + + $req_params["imgc"] = "gray"; + }else{ + + $req_params["imgcolor"] = $get["color"]; + } + } + + $json = + $this->get( + $proxy, + "https://cse.google.com/cse/element/v1", + $req_params, + self::req_js + ); + + $req_params["start"] = 0; + } + + $req_params["start"] += 20; + + if( + !preg_match( + '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i', + $json, + $json + ) + ){ + + throw new Exception("Failed to grep JSON"); + } + + $json = json_decode($json[1], true); + + if(isset($json["error"])){ + + if(isset($json["error"]["errors"][0]["message"])){ + + throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]); + } + + if(isset($json["error"]["message"])){ + + throw new Exception("Google returned an error: " . $json["error"]["message"]); + } + + throw new Exception("Google returned an error object"); + } + + // detect next page + if( + isset($json["cursor"]["isExactTotalResults"]) || // detects last page + !isset($json["cursor"]["pages"]) // detects no results on page + ){ + + return $out; + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + foreach($json["results"] as $result){ + + $out["image"][] = [ + "title" => rtrim($result["titleNoFormatting"], " ."), + "source" => [ + [ + "url" => $result["unescapedUrl"], + "width" => (int)$result["width"], + "height" => (int)$result["height"] + ], + [ + "url" => $result["tbLargeUrl"], + "width" => (int)$result["tbLargeWidth"], + "height" => (int)$result["tbLargeHeight"] + ] + ], + "url" => $result["originalContextUrl"] + ]; + } + + // get next page + $out["npt"] = + $this->backend->store( + json_encode( + $req_params + ), + "images", + $proxy + ); + + return $out; + } + + private function generate_token($proxy){ + + $html = + $this->get( + $proxy, + "https://cse.google.com/cse", + [ + "cx" => config::GOOGLE_CX_ENDPOINT + ], + self::req_html + ); + + // detect captcha + $this->fuckhtml->load($html); + + $title = + $this->fuckhtml + ->getElementsByTagName( + "title" + ); + + if( + count($title) !== 0 && + $title[0]["innerHTML"] == "302 Moved" + ){ + + throw new Exception("Google returned a captcha"); + } + + // get token + preg_match( + '/relativeUrl=\'([^\']+)\';/i', + $html, + $js_uri + ); + + if(!isset($js_uri[1])){ + + throw new Exception("Failed to grep search token"); + } + + $js_uri = + $this->fuckhtml + ->parseJsString( + $js_uri[1] + ); + + // get parameters + $js = + $this->get( + $proxy, + "https://cse.google.com" . $js_uri, + [], + self::req_js + ); + + preg_match( + '/}\)\(({[\S\s]+})\);/', + $js, + $json + ); + + if(!isset($json[1])){ + + throw new Exception("Failed to grep JSON parameters"); + } + + $json = json_decode($json[1], true); + + return [ + "token" => $json["cse_token"], + "lib" => $json["cselibVersion"] + ]; + } + + private function unshit_thumb($url){ + // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj + // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA + + $parts = parse_url($url); + + if( + isset($parts["host"]) && + preg_match( + '/tbn.*\.gstatic\.com/', + $parts["host"] + ) + ){ + + parse_str($parts["query"], $params); + + if(isset($params["q"])){ + + return "https://" . $parts["host"] . "/images?q=" . $params["q"]; + } + } + + return $url; + } +}