diff --git a/about.php b/about.php index b3fd6d9..1a7734a 100644 --- a/about.php +++ b/about.php @@ -81,7 +81,7 @@ $left =
Escape
to exit the image viewer.

Instances

- 4get is open source, anyone can create their own 4get instance! If you wish to add your website to this list, please contact me. + 4get is open source, anyone can create their own 4get instance! If you wish to add your website to this list, please contact me. @@ -102,7 +102,7 @@ $left = Message to all DMCA enforcers: I don\'t host any of the content. Everything you see here is proxied trough my shitbox with no moderation. Please reach out to the people hosting the infringing content instead.

- Click here to contact me!

+ Click here to contact me!

Valid W3C HTML 4.01 diff --git a/api/v1/images.php b/api/v1/images.php index e05ba26..694658e 100644 --- a/api/v1/images.php +++ b/api/v1/images.php @@ -16,7 +16,8 @@ $get = $frontend->parsegetfilters($_GET, $filters); try{ echo json_encode( - $scraper->image($get) + $scraper->image($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); }catch(Exception $e){ diff --git a/api/v1/news.php b/api/v1/news.php index 7e24247..775ef94 100644 --- a/api/v1/news.php +++ b/api/v1/news.php @@ -16,7 +16,8 @@ $get = $frontend->parsegetfilters($_GET, $filters); try{ echo json_encode( - $scraper->news($get) + $scraper->news($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); }catch(Exception $e){ diff --git a/api/v1/videos.php b/api/v1/videos.php index 60c105a..225611a 100644 --- a/api/v1/videos.php +++ b/api/v1/videos.php @@ -16,7 +16,8 @@ $get = $frontend->parsegetfilters($_GET, $filters); try{ echo json_encode( - $scraper->video($get) + $scraper->video($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); }catch(Exception $e){ diff --git a/api/v1/web.php b/api/v1/web.php index 7895183..7f6d769 100644 --- a/api/v1/web.php +++ b/api/v1/web.php @@ -21,7 +21,8 @@ if(!isset($_GET["extendedsearch"])){ try{ echo json_encode( - $scraper->web($get) + $scraper->web($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); }catch(Exception $e){ diff --git a/banner/aves.png b/banner/aves.png index ace604f..2b28e79 100644 Binary files a/banner/aves.png and b/banner/aves.png differ diff --git a/banner/aves_2.png b/banner/aves_2.png index c78839f..30f17a6 100644 Binary files a/banner/aves_2.png and b/banner/aves_2.png differ diff --git a/banner/bibblebop.png b/banner/bibblebop.png index 0c061e0..0fe95c4 100644 Binary files a/banner/bibblebop.png and b/banner/bibblebop.png differ diff --git a/banner/birds birds birdsw_4.jpg b/banner/birds birds birdsw_4.jpg index ba7d637..c4a52d8 100644 Binary files a/banner/birds birds birdsw_4.jpg and b/banner/birds birds birdsw_4.jpg differ diff --git a/banner/birds_birds_birdsw.jpg b/banner/birds_birds_birdsw.jpg index ff04b23..d820465 100644 Binary files a/banner/birds_birds_birdsw.jpg and b/banner/birds_birds_birdsw.jpg differ diff --git a/banner/birds_birds_birdsw_2.jpg b/banner/birds_birds_birdsw_2.jpg index dcd6125..a8d9164 100644 Binary files a/banner/birds_birds_birdsw_2.jpg and b/banner/birds_birds_birdsw_2.jpg differ diff --git a/banner/birds_birds_birdsw_3.jpg b/banner/birds_birds_birdsw_3.jpg index 1446207..239f6c8 100644 Binary files a/banner/birds_birds_birdsw_3.jpg and b/banner/birds_birds_birdsw_3.jpg differ diff --git a/banner/birds_birds_birdsw_4.jpg b/banner/birds_birds_birdsw_4.jpg new file mode 100644 index 0000000..ba7d637 Binary files /dev/null and b/banner/birds_birds_birdsw_4.jpg differ diff --git a/banner/deek.png b/banner/deek.png index ef80354..850416d 100644 Binary files a/banner/deek.png and b/banner/deek.png differ diff --git a/banner/deekchat.gif b/banner/deekchat.gif index bba01da..604e2fa 100644 Binary files a/banner/deekchat.gif and b/banner/deekchat.gif differ diff --git a/banner/eagle.png b/banner/eagle.png index f074341..705cf6d 100644 Binary files a/banner/eagle.png and b/banner/eagle.png differ diff --git a/banner/eagle2.png b/banner/eagle2.png index 175366b..d5bdda6 100644 Binary files a/banner/eagle2.png and b/banner/eagle2.png differ diff --git a/banner/eagle3.jpg b/banner/eagle3.jpg index 1e65b59..5c3d44d 100644 Binary files a/banner/eagle3.jpg and b/banner/eagle3.jpg differ diff --git a/banner/eddd_1.png b/banner/eddd_1.png index fab460b..4dd69b0 100644 Binary files a/banner/eddd_1.png and b/banner/eddd_1.png differ diff --git a/banner/eddd_2.png b/banner/eddd_2.png index 5ce4c2c..c2a59d1 100644 Binary files a/banner/eddd_2.png and b/banner/eddd_2.png differ diff --git a/banner/eddd_3.png b/banner/eddd_3.png index b4ca48d..8531a88 100644 Binary files a/banner/eddd_3.png and b/banner/eddd_3.png differ diff --git a/banner/gnuwu.png b/banner/gnuwu.png index 634b59d..6b95ca4 100644 Binary files a/banner/gnuwu.png and b/banner/gnuwu.png differ diff --git a/banner/gnuwu_2.png b/banner/gnuwu_2.png index 493a6d9..1612132 100644 Binary files a/banner/gnuwu_2.png and b/banner/gnuwu_2.png differ diff --git a/banner/horse.png b/banner/horse.png index 0075a9c..e4cd7f5 100644 Binary files a/banner/horse.png and b/banner/horse.png differ diff --git a/banner/linucks.jpg b/banner/linucks.jpg index 8874451..3148c6f 100644 Binary files a/banner/linucks.jpg and b/banner/linucks.jpg differ diff --git a/banner/real_nig_3.jpg b/banner/real_nig_3.jpg index 8091146..60bac39 100644 Binary files a/banner/real_nig_3.jpg and b/banner/real_nig_3.jpg differ diff --git a/banner/sec.png b/banner/sec.png index 3c1a49e..b02b978 100644 Binary files a/banner/sec.png and b/banner/sec.png differ diff --git a/banner/tagmachine.png b/banner/tagmachine.png index c8b82a0..2fca9a2 100644 Binary files a/banner/tagmachine.png and b/banner/tagmachine.png differ diff --git a/icons/lolcat.ca.png b/icons/lolcat.ca.png index c7e4785..bf47a44 100644 Binary files a/icons/lolcat.ca.png and b/icons/lolcat.ca.png differ diff --git a/images.php b/images.php index 2c0799e..ff420c9 100644 --- a/images.php +++ b/images.php @@ -62,8 +62,13 @@ foreach($results["image"] as $image){ '
' . '
' . '' . - 'thumbnail' . - '
' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '
' . + 'thumbnail'; + + if($image["source"][0]["width"] !== null){ + $payload["images"] .= '
' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '
'; + } + + $payload["images"] .= '
' . '' . '
' . htmlspecialchars(parse_url($image["url"], PHP_URL_HOST)) . '
' . diff --git a/lib/frontend.php b/lib/frontend.php index 16e5693..7f65354 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -892,6 +892,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "yandex" => "Yandex", + "brave" => "Brave"//, //"google" => "Google" ] ]; @@ -903,6 +904,7 @@ class frontend{ "option" => [ "yt" => "YouTube", "ddg" => "DuckDuckGo", + "brave" => "Brave"//, //"google" => "Google" ] ]; @@ -1285,7 +1287,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy?i=" . urlencode($image) . "&s=" . $format; + return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php index 8802511..5c65417 100644 --- a/lib/fuckhtml.php +++ b/lib/fuckhtml.php @@ -356,6 +356,91 @@ class fuckhtml{ return $out; } + + public function parseJsObject(string $json){ + + $bracket = false; + $is_close_bracket = false; + $escape = false; + $json_out = null; + $last_char = null; + + for($i=0; $i diff --git a/proxy.php b/proxy.php index d8b3c1b..bcf552e 100644 --- a/proxy.php +++ b/proxy.php @@ -57,6 +57,7 @@ try{ } $image->readImageBlob($payload["body"]); + $image_width = $image->getImageWidth(); $image_height = $image->getImageHeight(); @@ -102,16 +103,16 @@ try{ $image_width = $image_height * $ratio; } - $image->setImageBackgroundColor(new ImagickPixel("#504945")); - $image->mergeImageLayers(Imagick::LAYERMETHOD_FLATTEN); - - $image->resizeImage($image_width, $image_height, Imagick::FILTER_LANCZOS, 1); + $image->setImageBackgroundColor("#504945"); + $image->setImageAlphaChannel(Imagick::ALPHACHANNEL_REMOVE); $image->stripImage(); $image->setFormat("jpeg"); $image->setImageCompressionQuality(90); $image->setImageCompression(Imagick::COMPRESSION_JPEG2000); + $image->resizeImage($image_width, $image_height, Imagick::FILTER_LANCZOS, 1); + $proxy->getfilenameheader($payload["headers"], $_GET["i"]); header("Content-Type: image/jpeg"); diff --git a/scraper/brave.php b/scraper/brave.php index 4d48c33..c598c80 100644 --- a/scraper/brave.php +++ b/scraper/brave.php @@ -86,6 +86,8 @@ class brave{ ]; break; + case "images": + case "videos": case "news": return [ "country" => [ @@ -143,7 +145,7 @@ class brave{ } } - private function get($url, $get = [], $nsfw, $country/*, $is_post = false, $additional_cookies = null*/){ + private function get($url, $get = [], $nsfw, $country){ switch($nsfw){ @@ -152,13 +154,6 @@ class brave{ case "no": $nsfw = "strict"; break; } - //$cookie = "safesearch={$nsfw}; country={$country}; useLocation=0"; - /* - if($additional_cookies !== null){ - - $cookie = $additional_cookies . "; " . $cookie; - }*/ - $headers = [ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", @@ -171,8 +166,7 @@ class brave{ "Sec-Fetch-Dest: document", "Sec-Fetch-Mode: navigate", "Sec-Fetch-Site: none", - "Sec-Fetch-User: ?1"//, - //"Content-Type: application/json" + "Sec-Fetch-User: ?1" ]; if($country == "any"){ @@ -182,22 +176,10 @@ class brave{ $curlproc = curl_init(); - /*if($is_post){ - - curl_setopt($curlproc, CURLOPT_POST, true); - curl_setopt( - $curlproc, - CURLOPT_POSTFIELDS, - json_encode($get) - ); - - }else{ - */ - if($get !== []){ - $get = http_build_query($get); - $url .= "?" . $get; - } - //} + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } curl_setopt($curlproc, CURLOPT_URL, $url); @@ -1950,18 +1932,24 @@ class brave{ return $out; } - /* - public function bypasscaptcha($html, $nsfw, $country){ + public function image($get){ - // @TODO figure out why I still cant go trough - // the captcha wall even after breaking it + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; try{ $html = $this->get( - "https://search.brave.com/goggles", + "https://search.brave.com/images", [ - "q" => "site:dailymotion.com my bloody valentine" + "q" => $search ], $nsfw, $country @@ -1969,177 +1957,192 @@ class brave{ }catch(Exception $error){ - throw new Exception("Could not fetch html"); + throw new Exception("Could not fetch search page"); } + /* + $handle = fopen("scraper/brave-image.html", "r"); + $html = fread($handle, filesize("scraper/brave-image.html")); + fclose($handle);*/ - // Bypass brave search captcha - // this captcha only appears on the goggles page preg_match( - '/this\.img\.src = "(.*)"/', + '/const data = (\[{.*}\]);/', $html, - $image + $json ); - $image = - base64_decode( - explode( - "data:image/png;base64,", - $image[1] - )[1] + if(!isset($json[1])){ + + throw new Exception("Failed to get data object"); + } + + $json = + $this->fuckhtml + ->parseJsObject( + $json[1] ); - $im = new Imagick(); - $im->readImageBlob($image); - - $im->blurImage(20, 20); - $im->posterizeImage(2, imagick::IMGTYPE_COLORSEPARATION); - - // if we encounter a white line thats longer than 45px - // we found the circle position - $iterator = $im->getPixelRegionIterator(0, 77, 310, 1); - - $found = null; foreach( - $iterator as $row + $json[1] + ["data"] + ["body"] + ["response"] + ["results"] + as $result ){ - $whitecount = 0; - $count = 0; - - foreach($row as $pixel){ - - if($pixel->getColor()["r"] === 255){ - - $whitecount++; - $pixel->setColor("rgba(255,0,0,0)"); - - if($whitecount === 45){ - - $found = $count - 45; - break 2; - } - }else{ - - $whitecount = 0; - } - - $count++; - $iterator->syncIterator(); - } - } - - $found = $found + 10; - - //header("Content-Type: image/png"); - //echo $im; - //die(); - - if($found === null){ - - throw new Exception("Could not bypass captcha"); - } - - preg_match( - '/data="{"captcha_id":"([0-9A-z-]+)"}"/', - $html, - $key - ); - - $key = $key[1]; - // we bypassed captcha, send POST data - $order = - $this->get( - "https://search.brave.com/api/captcha?brave=0&captcha_id={$key}", - [ - "solution" => (string)$found - ], - $nsfw, - $country, - true - ); - - $order = json_decode($order, true)["orderId"]; - - $orderpayload = - $this->get( - "https://search.brave.com/api/rewards/v1/orders/{$order}", - [], - $nsfw, - $country - ); - - $orderpayload = json_decode($orderpayload, true); - - $creds = - $this->get( - "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials", - [ - "itemId" => $orderpayload["items"][0]["id"], - "blindedCreds" => [ - "fuYAVcB/m7BU66vf3wkNGxJCSaRhshB9o+8km3F1h2c=", - "uswvcWJuPK/1qFlVdzBP3eQd0+V1EQgfAtnEoMIK+Uk=", - "fJWKGLBxl3Gyn4n9FjTLq1PjupfABT7Ni8MeB+iGzUs=", - "Aq9enJ/VZP9GxQIza3n65ZK7xQhY4VwDxv53BCb/Txg=", - "FMJA9eSLHq71K+Pcwgm4gIQOmdR/6KMy5cMgXhpd5Ro=", - "2NVhIAbvI317SP9/xXbVe/U57eWgvHyqVbHL/5+Gdmw=", - "6mpjsjSCmYEzK2xlbL8DI2P4LuhWUOxjTLvsTAL9l24=", - "kAn4wuHvIlKWhfuFfPTSfD4tZ5le9t7/61YbdEc/L3k=", - "BjjUyG16aTfd1c0h4oBzgQQOekrH1f+a5CmcXqMPTR4=", - "SBNgpCt4/V44yaQTfh+D027Yv1GJFHkjUEpPw6rAwRI=", - "XDENAtdQ7PyYx+Qx1wQGQtDWgg8WpIMgWGmd4RDOVWE=", - "tF7rB4sqamsiUk3K7fojdQSI0Q6iip72yKyhnvg/bC0=", - "VsAqflirAd/u4VsLdfRS2UvnH24ZNkFh6YN3DctLjzQ=", - "MntLbXkoI0LdcisCbNazmooiHXJyX91L1KERDAu1JRU=", - "TH6Zs8JBvFDbTDWgKbfGE4M5/cSwCtHD8ms5Y/U8zHQ=", - "jsZg0Z+qDPHymrbhdnesodhLNJ26QdunyMko1aVe4So=", - "rpKsyj6/vdnuMgLI2BApeijtGq9g5USRDL0w6X2bnlQ=", - "vCzliGT8A9vcLXj2sFf2kavOuYw69d70NpfgA22B4lI=", - "7OWoxSCtYXWcaBSifF7AXNBif/sjcuO0IelzXG/3PFk=", - "iiXtByNlT6nDMN9De5B58Jl8J0p6LCjnZ9aS3w2FEQU=", - "zDhd7gsJ4h4JkDeGK0Y0mfFd8IBdkLhMOANzwO+4Dig=", - "qANZ+AikwFReEA61JF009d/c3IHM/aSfIYwljckhJWE=", - "nNC30pDLxtXvUr+WDwfDSrAInNBpfSZkPsV2JlpheWI=", - "kGXE1pkt25P71kdJzmKIg4+yMR1VA5wNmbpBb/FhJQ8=", - "aLqPsY1Qiz2UCa2Jx3YNNt8r4JINMphks/43EiyZfXU=", - "bHGYZoQARZEM5LdFF6B74PkRqNd9EKxzuTvGYxjq+hk=", - "JOsYQjfE/9Y1u29hR+GvEkNyxUI8blgLhX1iJI/aGRQ=", - "yKjHjH5j600TJD/3WPsA1N3OmItDLifdjlysq4H6NV0=", - "9lTnUbsPp7BJ7XVN5/T4yGfzD9DJdqWB7xk72s19MAA=", - "5KHG8iY45em7zDhO/HlI0ydcZ0Ubn+XSyjifMmy7qXM=" + $out["image"][] = [ + "title" => $result["title"], + "source" => [ + [ + "url" => $result["properties"]["url"], + "width" => null, + "height" => null + ], + [ + "url" => $result["thumbnail"]["src"], + "width" => null, + "height" => null ] ], - $nsfw, - $country, - true + "url" => $result["url"] + ]; + } + + return $out; + } + + public function video($get){ + + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + + $out = [ + "status" => "ok", + "npt" => null, + "video" => [], + "author" => [], + "livestream" => [], + "playlist" => [], + "reel" => [] + ]; + + try{ + $html = + $this->get( + "https://search.brave.com/videos", + [ + "q" => $search + ], + $nsfw, + $country + ); + + }catch(Exception $error){ + + throw new Exception("Could not fetch search page"); + } + /* + $handle = fopen("scraper/brave-video.html", "r"); + $html = fread($handle, filesize("scraper/brave-video.html")); + fclose($handle);*/ + + preg_match( + '/const data = (\[{.*}\]);/', + $html, + $json + ); + + if(!isset($json[1])){ + + throw new Exception("Failed to get data object"); + } + + $json = + $this->fuckhtml + ->parseJsObject( + $json[1] ); - var_dump($creds); + foreach( + $json + [1] + ["data"] + ["body"] + ["response"] + ["results"] + as $result + ){ + + if($result["video"]["author"] != "null"){ + + $author = [ + "name" => $result["video"]["author"]["name"] == "null" ? null : $result["video"]["author"]["name"], + "url" => $result["video"]["author"]["url"] == "null" ? null : $result["video"]["author"]["url"], + "avatar" => $result["video"]["author"]["img"] == "null" ? null : $result["video"]["author"]["img"] + ]; + }else{ + + $author = [ + "name" => null, + "url" => null, + "avatar" => null + ]; + } + + if($result["thumbnail"] != "null"){ + + $thumb = [ + "url" => $result["thumbnail"]["original"], + "ratio" => "16:9" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + } + + $out["video"][] = [ + "title" => $result["title"], + "description" => $result["description"] == "null" ? null : $this->titledots($result["description"]), + "author" => $author, + "date" => $result["age"] == "null" ? null : strtotime($result["age"]), + "duration" => $result["video"]["duration"] == "null" ? null : $this->hms2int($result["video"]["duration"]), + "views" => $result["video"]["views"] == "null" ? null : (int)$result["video"]["views"], + "thumb" => $thumb, + "url" => $result["url"] + ]; + } + + return $out; + } + + private function hms2int($time){ - sleep(2); - $test = - $this->get( - "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials", - [], - $nsfw, - $country - ); + $parts = explode(":", $time, 3); + $time = 0; - var_dump($test); + if(count($parts) === 3){ + + // hours + $time = $time + ((int)$parts[0] * 3600); + array_shift($parts); + } - $html = - $this->get( - "https://search.brave.com/goggles", - [ - "q" => "site:dailymotion.com my bloody valentine" - ], - $nsfw, - $country, - false, - "__Secure-sku#brave-search-captcha=eyJ0eXBlIjoic2luZ2xlLXVzZSIsInZlcnNpb24iOjEsInNrdSI6ImJyYXZlLXNlYXJjaC1jYXB0Y2hhIiwicHJlc2VudGF0aW9uIjoiZXlKcGMzTjFaWElpT2lKaWNtRjJaUzVqYjIwL2MydDFQV0p5WVhabExYTmxZWEpqYUMxallYQjBZMmhoSWl3aWMybG5ibUYwZFhKbElqb2lNRzl0VDBneWQxZ3dTazkzU0VFMVJ6QTJaR1V5WjFOQ1dDdGhSM3B2Y2xsTVQwVTJZVVJtTUc5a1IweG1Wa3RhZEd0cU4xbHdia3BPT0VOVGNGbE5lVWR2YmpGRlNTOUhhMlZYU1RWNGQxTjJPWGxJTTNjOVBTSXNJblFpT2lKWlJWWldaVzR5TTJwQ01tSnZkakJ2U1hGNGJtSndUMGxEUW5Kd1drRjBRbWQxVnpoRlNURTNVREY2UVRaQlpUTXJSVGRFYm5NeVFqUmhka0pGYTFWM2FGY3JWRVZJVjNWcE9TdFllRU1yYlVSTVkyMTBRVDA5SW4wPSJ9" - ); + if(count($parts) === 2){ + + // minutes + $time = $time + ((int)$parts[0] * 60); + array_shift($parts); + } - var_dump($html); - }*/ + // seconds + $time = $time + (int)$parts[0]; + + return $time; + } private function appendtext($payload, &$text, &$index){ diff --git a/scraper/google.php b/scraper/google.php index 28ede6d..af243ba 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -808,6 +808,7 @@ class google{ ->getElementsByTagName("style"); $this->computedstyle = []; + $this->ask = []; foreach($styles as $style){ @@ -860,6 +861,22 @@ class google{ $image_grep[1][0] ); } + + // even more javascript crap + // "People also ask" node is loaded trough javascript + preg_match_all( + '/window\.jsl\.dh\(\'([^\']+)\',\'(.+)\'\);/', + $script["innerHTML"], + $ask_grep + ); + + for($i=0; $iask[trim($ask_grep[1][$i])] = + stripcslashes( + $ask_grep[2][$i] + ); + } } // get nodes @@ -926,22 +943,22 @@ class google{ "div" ); - $carousel_title = - $this->fuckhtml - ->getElementsByClassName( - $this->findstyles( - [ - "font-size" => "16px", - "line-height" => "20px", - "font-weight" => "400" - ], - self::is_class - ), - "div" - ); - if(count($carousel) !== 0){ + $carousel_title = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "font-size" => "16px", + "line-height" => "20px", + "font-weight" => "400" + ], + self::is_class + ), + "div" + ); + $sublink = []; // twitter carousel sublinks foreach($carousel as $item){ @@ -1212,6 +1229,136 @@ class google{ continue; } + $people_title = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "font-weight" => "bold", + "font-size" => "16px", + "color" => "#000", + "margin" => "0", + "padding" => "12px 16px 0 16px" + ], + self::is_class + ), + "div" + ); + + if( + count($people_title) !== 0 && + strtolower( + $this->fuckhtml + ->getTextContent( + $people_title[0] + ) + ) == "people also ask" + ){ + /* + Parse "people also ask" node + */ + + $div = + $this->fuckhtml + ->getElementsByTagName("div"); + + // add suggestions + $suggestions = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "display" => "inline-block", + "padding-right" => "26px" + ], + self::is_class + ), + $div + ); + + foreach($suggestions as $suggestion){ + + $out["related"][] = + $this->fuckhtml + ->getTextContent($suggestion); + } + + // parse websites + foreach($div as $d){ + + if( + isset($d["attributes"]["id"]) && + strpos( + $d["attributes"]["id"], + "accdef_" + ) !== false + ){ + + $this->fuckhtml->load( + $this->ask[ + $d["attributes"]["id"] + ] + ); + + $description = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + )[0] + ) + ); + + $a = + $this->fuckhtml + ->getElementsByTagName("a") + [0]; + + $this->fuckhtml->load($a); + + $out["web"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByTagName("span")[0] + ) + ), + "description" => $description, + "url" => + $this->decodeurl( + $this->fuckhtml + ->getTextContent( + $a + ["attributes"] + ["href"] + ) + ), + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + } + } + + continue; + } + if(count($title) !== 0){ /* @@ -1231,6 +1378,19 @@ class google{ "url" => $this->getimage($thumb[0]["attributes"]["id"]), "ratio" => "1:1" ]; + + if(parse_url($thumb["url"], PHP_URL_HOST) == "i.ytimg.com"){ + + $thumb = [ + "url" => + str_replace( + "default.jpg", + "maxresdefault.jpg", + $thumb["url"] + ), + "ratio" => "16:9" + ]; + } }else{ $thumb = [ @@ -1287,18 +1447,33 @@ class google{ $cat = explode(":", $cat, 2); - $table[ + $name = $this->fuckhtml ->getTextContent( $cat[0] - ) - ] = - $this->titledots( - $this->fuckhtml - ->getTextContent( - $cat[1] - ) ); + + if(strtolower($name) != "posted"){ + + $table[$name] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $cat[1] + ) + ); + }else{ + + $date = + strtotime( + $this->titledots( + $this->fuckhtml + ->getTextContent( + $cat[1] + ) + ) + ); + } } continue; } @@ -1307,6 +1482,7 @@ class google{ $this->fuckhtml ->getElementsByTagName("span"); + $encounter_rating = false; foreach($spans as $span){ // replace element with nothing @@ -1319,10 +1495,53 @@ class google{ ); } + if($encounter_rating !== false){ + + switch($encounter_rating){ + + case 3: + $table["Votes"] = + number_format( + str_replace( + [ + "(", + ")", + "," + ], + "", + $this->fuckhtml + ->getTextContent( + $span["innerHTML"] + ) + ) + ); + break; + + case 6: + $table["Price"] = + $this->fuckhtml + ->getTextContent( + $span["innerHTML"] + ); + break; + + case 8: + $table["Support"] = + $this->fuckhtml + ->getTextContent( + $span["innerHTML"] + ); + break; + } + + $encounter_rating++; + } + // get rating if(isset($span["attributes"]["aria-hidden"])){ $table["Rating"] = $span["innerHTML"]; + $encounter_rating = 0; continue; } } @@ -1565,16 +1784,7 @@ class google{ } /* - Detect if its a wikipedia thing - */ - $h3 = - $this->fuckhtml - ->getElementsByTagName("h3"); - - - - /* - Fallback to parsing the word definitions + Parse instant answers with parts */ $parts = $this->fuckhtml @@ -1588,15 +1798,8 @@ class google{ "div" ); - if(count($parts) === 0){ - - continue; - } + if(count($parts) !== 0){ - $head = $parts[0]; - - if(count($h3) !== 0){ - $table = [ "title" => null, "description" => [], @@ -1606,30 +1809,130 @@ class google{ "sublink" => [] ]; - $h3 = $h3[0]; - - $table["title"] = + // get thumb + $thumb = $this->fuckhtml - ->getTextContent( - $h3 + ->getElementsByClassName( + $this->findstyles( + [ + "float" => "right", + "padding-left" => "16px" + ], + self::is_class + ), + "div" + ); + + if(count($thumb) !== 0){ + + $this->fuckhtml->load($thumb[0]); + + $img = + $this->fuckhtml + ->getElementsByTagName("img"); + + if(count($img) !== 0){ + + $table["thumb"] = + $this->getimage( + $img[0]["attributes"]["id"] + ); + } + + $this->fuckhtml->load($container); + } + + $h = + $this->fuckhtml + ->getElementsByTagName("h3"); + + if(count($h) === 0){ + + $h = + $this->fuckhtml + ->getElementsByTagName("h2"); + } + + if(count($h) !== 0){ + // set title + subtext for when a word definition + // appears + $h = $h[0]; + + $table["title"] = + $this->fuckhtml + ->getTextContent( + $h + ); + + $parts[0]["innerHTML"] = + str_replace( + $h["outerHTML"], + "", + $parts[0]["innerHTML"] + ); + + $table["description"][] = + [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $parts[0] + ) + ]; + }else{ + + // parse it as a wikipedia header + + } + + // get table elements + $tables = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "display" => "table", + "width" => "100%", + "padding-right" => "16px", + "-webkit-box-sizing" => "border-box" + ], + self::is_class + ), + "div" ); - $head["innerHTML"] = - str_replace( - $h3["outerHTML"], - "", - $head["innerHTML"] - ); - - $table["description"][] = - [ - "type" => "quote", - "value" => + foreach($tables as $tbl){ + + $this->fuckhtml->load($tbl); + + $images = + $this->fuckhtml + ->getElementsByTagName("img"); + + if(count($images) !== 0){ + + $image = $this->getimage($images[0]["attributes"]["id"]); + + $text = $this->fuckhtml ->getTextContent( - $head - ) - ]; + $tbl + ); + + $table["description"][] = [ + "type" => "link", + "value" => $text, + "url" => "?s=" . urlencode($text) . "&scraper=google" + ]; + + $table["description"][] = [ + "type" => "image", + "url" => $image + ]; + } + + } $audio = $this->fuckhtml @@ -1828,9 +2131,9 @@ class google{ } } } + + $out["answer"][] = $table; } - - $out["answer"][] = $table; } if($dmca_table){ @@ -2136,20 +2439,65 @@ class google{ $match ); - if(count($match) !== 0){ + if(count($match) === 0){ - if(!empty($match[1])){ - - return urldecode($match[1]); - } - - if(!empty($match[2])){ - - return urldecode($match[2]); - } + return null; } - return null; + $url = empty($match[1]) ? urldecode($match[2]) : urldecode($match[1]); + + $domain = parse_url($url, PHP_URL_HOST); + + if( + preg_match( + '/wikipedia.org$/', + $domain + ) + ){ + + // rewrite wikipedia mobile URLs to desktop + $url = + $this->replacedomain( + $url, + preg_replace( + '/([a-z0-9]+)(\.m\.)/', + '$1.', + $domain + ) + ); + } + + if( + preg_match( + '/imdb\.com$|youtube\.[^.]+$/', + $domain + ) + ){ + + // rewrite imdb and youtube mobile URLs too + $url = + $this->replacedomain( + $url, + preg_replace( + '/^m\./', + "", + $domain + ) + ); + + } + + return $url; + } + + private function replacedomain($url, $domain){ + + return + preg_replace( + '/(https?:\/\/)([^\/]+)/', + '$1' . $domain, + $url + ); } private function titledots($title){ diff --git a/scraper/mojeek.php b/scraper/mojeek.php index a0b5016..e7e8abc 100644 --- a/scraper/mojeek.php +++ b/scraper/mojeek.php @@ -909,6 +909,23 @@ class mojeek{ $a = $a[0]; + $date = + explode( + " - ", + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByTagName( + "span" + )[0] + ) + ); + + $date = + strtotime( + $date[count($date) - 1] + ); + $out["news"][] = [ "title" => html_entity_decode( @@ -918,20 +935,7 @@ class mojeek{ ) ), "description" => null, - "date" => - strtotime( - explode( - " - ", - $this->fuckhtml - ->getTextContent( - $this->fuckhtml - ->getElementsByTagName( - "span" - )[0] - ), - 2 - )[1] - ), + "date" => $date, "thumb" => [ "url" => null, "ratio" => null diff --git a/settings.php b/settings.php index 96c31c8..c2d06e7 100644 --- a/settings.php +++ b/settings.php @@ -70,10 +70,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - //[ - // "value" => "google", - // "text" => "Google" - //], + /*[ + "value" => "google", + "text" => "Google" + ],*/ [ "value" => "mojeek", "text" => "Mojeek" @@ -99,11 +99,15 @@ $settings = [ [ "value" => "yandex", "text" => "Yandex" - ]//, - //[ - // "value" => "google", - // "text" => "Google" - //] + ], + [ + "value" => "brave", + "text" => "Brave" + ]/*, + [ + "value" => "google", + "text" => "Google" + ]*/ ] ], [ @@ -117,11 +121,15 @@ $settings = [ [ "value" => "ddg", "text" => "DuckDuckGo" - ]//, - //[ - // "value" => "google", - // "text" => "Google" - //] + ], + [ + "value" => "brave", + "text" => "Brave" + ]/*, + [ + "value" => "google", + "text" => "Google" + ]*/ ] ], [ @@ -136,10 +144,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - //[ - // "value" => "google", - // "text" => "Google" - //], + /*[ + "value" => "google", + "text" => "Google" + ],*/ [ "value" => "mojeek", "text" => "Mojeek" @@ -219,7 +227,7 @@ echo '' . '' . 'Settings' . - '' . + '' . '' . '' . '' . diff --git a/static/client.js b/static/client.js index 545f0bc..89e9a5e 100644 --- a/static/client.js +++ b/static/client.js @@ -360,6 +360,23 @@ if(image_class !== null){ elem.getAttribute("data-json") ); + var imagesize = elem.getElementsByTagName("img")[0]; + + if(imagesize.complete){ + + var imagesize_w = imagesize.naturalWidth; + var imagesize_h = imagesize.naturalHeight; + } + + for(var i=0; i
- + diff --git a/template/images.html b/template/images.html index 61e319c..1c5b23a 100644 --- a/template/images.html +++ b/template/images.html @@ -2,6 +2,6 @@ {%images%}
{%nextpage%} - + diff --git a/template/search.html b/template/search.html index c187102..5210807 100644 --- a/template/search.html +++ b/template/search.html @@ -11,6 +11,6 @@ {%left%}
- +