forked from lolcat/4get
		
	Compare commits
	
		
			23 Commits
		
	
	
		
			fuck
			...
			86f8edda34
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 86f8edda34 | |||
| 36b0c570aa | |||
| 47a7a2a224 | |||
| 0180cf5224 | |||
| eed32a153c | |||
| f9f3c919d6 | |||
| 4b0d8f75dc | |||
| 033e4cb959 | |||
| 91f621e105 | |||
| 9f60900875 | |||
| 631aa58565 | |||
| b892f90b13 | |||
| 463ba0775f | |||
| cfad4fb035 | |||
| 4e968b4b1c | |||
| 81df52235c | |||
| 1ca2626ad9 | |||
| 9ca93f34c6 | |||
| 0a43b9c849 | |||
| b636fec319 | |||
| 774f7113df | |||
| 0b3bbe0f15 | |||
| 5f0b0a7b83 | 
							
								
								
									
										1
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | .git | ||||||
							
								
								
									
										48
									
								
								.gitea/workflows/ci.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.gitea/workflows/ci.yml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | |||||||
|  | name: '4get CI' | ||||||
|  |  | ||||||
|  | on: | ||||||
|  |   workflow_dispatch: | ||||||
|  |   push: | ||||||
|  |     branches: | ||||||
|  |       - '*' | ||||||
|  |     paths-ignore: | ||||||
|  |       - 'README.md' | ||||||
|  |       - 'docker-compose.yaml' | ||||||
|  |       - '.gitignore' | ||||||
|  |       - 'docs/**' | ||||||
|  |  | ||||||
|  | jobs: | ||||||
|  |   build: | ||||||
|  |     runs-on: docker | ||||||
|  |  | ||||||
|  |     steps: | ||||||
|  |     - uses: actions/checkout@v4 | ||||||
|  |       name: Checkout 4get repository | ||||||
|  |  | ||||||
|  |     - uses: docker/setup-buildx-action@v3 | ||||||
|  |       name: Setup Docker BuildX system | ||||||
|  |  | ||||||
|  |     - name: Login to Docker Container Registry | ||||||
|  |       uses: docker/login-action@v3 | ||||||
|  |       with: | ||||||
|  |         registry: git.lolcat.ca | ||||||
|  |         username: ${{ secrets.USERNAME }} | ||||||
|  |         password: ${{ secrets.TOKEN }} | ||||||
|  |  | ||||||
|  |     - name: Docker meta | ||||||
|  |       id: meta | ||||||
|  |       uses: docker/metadata-action@v5 | ||||||
|  |       with: | ||||||
|  |         images: git.lolcat.ca/lolcat/4get | ||||||
|  |         tags: | | ||||||
|  |           type=sha,format=short,prefix={{date 'YYYY.MM.DD'}}-,enable=${{ github.ref == format('refs/heads/{0}', 'master') }} | ||||||
|  |           type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'master') }} | ||||||
|  |  | ||||||
|  |     - uses: docker/build-push-action@v6 | ||||||
|  |       name: Build images | ||||||
|  |       with: | ||||||
|  |         context: . | ||||||
|  |         file: Dockerfile | ||||||
|  |         tags: ${{ steps.meta.outputs.tags }} | ||||||
|  |         platforms: linux/amd64 | ||||||
|  |         push: true | ||||||
| @@ -4,7 +4,6 @@ WORKDIR /var/www/html/4get | |||||||
| RUN apk update && apk upgrade | RUN apk update && apk upgrade | ||||||
| RUN apk add php apache2-ssl php83-fileinfo php83-openssl php83-iconv php83-common php83-dom php83-sodium php83-curl curl php83-pecl-apcu php83-apache2 imagemagick php83-pecl-imagick php-mbstring imagemagick-webp imagemagick-jpeg | RUN apk add php apache2-ssl php83-fileinfo php83-openssl php83-iconv php83-common php83-dom php83-sodium php83-curl curl php83-pecl-apcu php83-apache2 imagemagick php83-pecl-imagick php-mbstring imagemagick-webp imagemagick-jpeg | ||||||
|  |  | ||||||
| COPY ./docker/apache/ /etc/apache2/ |  | ||||||
| COPY . . | COPY . . | ||||||
|  |  | ||||||
| RUN chmod 777 /var/www/html/4get/icons | RUN chmod 777 /var/www/html/4get/icons | ||||||
| @@ -14,4 +13,5 @@ EXPOSE 443 | |||||||
|  |  | ||||||
| ENV FOURGET_PROTO=http | ENV FOURGET_PROTO=http | ||||||
|  |  | ||||||
| CMD  ["./docker/docker-entrypoint.sh"] | ENTRYPOINT  ["./docker/docker-entrypoint.sh"] | ||||||
|  | CMD ["start"] | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								README.md
									
									
									
									
									
								
							| @@ -9,9 +9,11 @@ https://4get.ca/about | |||||||
| ## Official instance | ## Official instance | ||||||
| https://4get.ca , or visit the official instance list: https://4get.ca/instances | https://4get.ca , or visit the official instance list: https://4get.ca/instances | ||||||
|  |  | ||||||
|  | _NOT to be confused with 4get.ch, 4get.lol and friends! I **don't** host these._ | ||||||
|  |  | ||||||
| ## Totally unbiased comparison between alternatives | ## Totally unbiased comparison between alternatives | ||||||
|  |  | ||||||
| |                            | 4get                    | searx(ng) | libreY      | araa      | hearch            | | |                            | 4get                    | searx(ng) | libreY      | araa      | hearch.co         | | ||||||
| |----------------------------|-------------------------|-----------|-------------|-----------|-------------------| | |----------------------------|-------------------------|-----------|-------------|-----------|-------------------| | ||||||
| | RAM usage                  | 200-400mb~              | 2GB~      | 200-400mb~  | 2GB~      | idk               | | | RAM usage                  | 200-400mb~              | 2GB~      | 200-400mb~  | 2GB~      | idk               | | ||||||
| | Does it suck               | no (debunked by snopes) | yes       | yes         | a little  | better than searx | | | Does it suck               | no (debunked by snopes) | yes       | yes         | a little  | better than searx | | ||||||
| @@ -23,9 +25,9 @@ https://4get.ca , or visit the official instance list: https://4get.ca/instances | |||||||
| 3. Bot protection that *actually* filters out the bots (when configured) | 3. Bot protection that *actually* filters out the bots (when configured) | ||||||
| 4. Interface doesn't require javascript | 4. Interface doesn't require javascript | ||||||
| 5. Favicon fetcher with caching support & image proxy | 5. Favicon fetcher with caching support & image proxy | ||||||
| 6. Bunch of other shit | 6. Bunch of other shits | ||||||
|  |  | ||||||
| tl;dr the best way to actually browse for shit. | tl;dr 4get is the best way to browse for shit. | ||||||
|  |  | ||||||
| # Supported websites | # Supported websites | ||||||
|  |  | ||||||
| @@ -39,11 +41,11 @@ tl;dr the best way to actually browse for shit. | |||||||
| | Qwant      | Qwant        | Startpage  | Mojeek     |            | Kagi          | | | Qwant      | Qwant        | Startpage  | Mojeek     |            | Kagi          | | ||||||
| | Ghostery   | Yep          | Qwant      |            |            | Qwant         | | | Ghostery   | Yep          | Qwant      |            |            | Qwant         | | ||||||
| | Yep        | Solofield    | Solofield  |            |            | Ghostery      | | | Yep        | Solofield    | Solofield  |            |            | Ghostery      | | ||||||
| | Greppr     | Imgur        |            |            |            | Yep           | | | Greppr     | Pinterest    |            |            |            | Yep           | | ||||||
| | Crowdview  | FindThatMeme |            |            |            | Marginalia    | | | Crowdview  | 500px        |            |            |            | Marginalia    | | ||||||
| | Mwmbl      |              |            |            |            | YouTube       | | | Mwmbl      | VSCO         |            |            |            | YouTube       | | ||||||
| | Mojeek     |              |            |            |            | Soundcloud    | | | Mojeek     | Imgur        |            |            |            | Soundcloud    | | ||||||
| | Solofield  |              |            |            |            |               | | | Solofield  | FindThatMeme |            |            |            |               | | ||||||
| | Marginalia |              |            |            |            |               | | | Marginalia |              |            |            |            |               | | ||||||
| | wiby       |              |            |            |            |               | | | wiby       |              |            |            |            |               | | ||||||
| | Curlie     |              |            |            |            |               | | | Curlie     |              |            |            |            |               | | ||||||
| @@ -52,7 +54,7 @@ tl;dr the best way to actually browse for shit. | |||||||
| Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>. | Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>. | ||||||
|  |  | ||||||
| ## Contact | ## Contact | ||||||
| Shit breaks all the time but I repair it all the time too... Email me here: <b>will (at) lolcat.ca</b> or create an issue. | Shit breaks all the time but I repair it all the time too. Email me here: <b>will (at) lolcat.ca</b> or create an issue. | ||||||
|  |  | ||||||
| ## License | ## License | ||||||
| AGPL | AGPL | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								api.txt
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								api.txt
									
									
									
									
									
								
							| @@ -1,9 +1,16 @@ | |||||||
|                         __ __             __ |                    44 | ||||||
|                        / // / ____ ____  / /_ |                  4444444      44    | ||||||
|                       / // /_/ __ `/ _ \/ __/ |                  44444444   44444       444 | ||||||
|                      /__  __/ /_/ /  __/ /_  |                  44444444  444444    444444444 | ||||||
|                        /_/  \__, /\___/\__/ |                   44444   44444444  444444444 | ||||||
|                            /____/          |                          444444444   4444444 | ||||||
|  |                         4444444444    444444 | ||||||
|  |                       4444444444444 | ||||||
|  |                     444444444444444444 | ||||||
|  |                        444444444444444 | ||||||
|  |                           44444444 | ||||||
|  |                           4444 | ||||||
|  |                            44 | ||||||
|                   |                   | ||||||
|            + Welcome to the 4get API documentation + |            + Welcome to the 4get API documentation + | ||||||
|  |  | ||||||
|   | |||||||
| @@ -119,7 +119,7 @@ class config{ | |||||||
| 	 | 	 | ||||||
| 	// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages | 	// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages | ||||||
| 	// Changing this might break things. | 	// Changing this might break things. | ||||||
| 	const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0"; | 	const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0"; | ||||||
| 	 | 	 | ||||||
| 	// Proxy pool assignments for each scraper | 	// Proxy pool assignments for each scraper | ||||||
| 	// false = Use server's raw IP | 	// false = Use server's raw IP | ||||||
| @@ -143,6 +143,8 @@ class config{ | |||||||
| 	const PROXY_YT = false; // youtube | 	const PROXY_YT = false; // youtube | ||||||
| 	const PROXY_YEP = false; | 	const PROXY_YEP = false; | ||||||
| 	const PROXY_PINTEREST = false; | 	const PROXY_PINTEREST = false; | ||||||
|  | 	const PROXY_FIVEHPX = false; | ||||||
|  | 	const PROXY_VSCO = false; | ||||||
| 	const PROXY_SEZNAM = false; | 	const PROXY_SEZNAM = false; | ||||||
| 	const PROXY_NAVER = false; | 	const PROXY_NAVER = false; | ||||||
| 	const PROXY_GREPPR = false; | 	const PROXY_GREPPR = false; | ||||||
|   | |||||||
| @@ -6,14 +6,15 @@ services: | |||||||
|     image: luuul/4get:latest |     image: luuul/4get:latest | ||||||
|     restart: unless-stopped |     restart: unless-stopped | ||||||
|     environment: |     environment: | ||||||
|  |       - FOURGET_PROTO=http | ||||||
|       - FOURGET_SERVER_NAME=4get.ca |       - FOURGET_SERVER_NAME=4get.ca | ||||||
|  |       - FOURGET_INSTANCES=https://4get.ca | ||||||
|  |  | ||||||
|     ports: |     ports: | ||||||
|       - "80:80" |       - "80:80" | ||||||
|       - "443:443" |       - "443:443" | ||||||
|  |  | ||||||
|     volumes: |     # volumes: | ||||||
|       - /etc/letsencrypt/live/domain.tld:/etc/4get/certs |     # - /etc/letsencrypt/live/domain.tld:/etc/4get/certs # mount ssl | ||||||
|       # mount custom banners and captcha |     # - ./banners:/var/www/html/4get/banner # mount custom banners | ||||||
|       - ./banners:/var/www/html/4get/banner |     # - ./captcha:/var/www/html/4get/data/captcha # mount captcha images | ||||||
|       - ./captcha:/var/www/html/4get/data/captcha |  | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								docker/apache/http/conf.d/ssl.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								docker/apache/http/conf.d/ssl.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | # intentionally blank | ||||||
| @@ -8,18 +8,27 @@ FOURGET_PROTO="${FOURGET_PROTO#\"}" | |||||||
| # make lowercase | # make lowercase | ||||||
| FOURGET_PROTO=`echo $FOURGET_PROTO | awk '{print tolower($0)}'` | FOURGET_PROTO=`echo $FOURGET_PROTO | awk '{print tolower($0)}'` | ||||||
|  |  | ||||||
|  | FOURGET_SRC='/var/www/html/4get' | ||||||
|  |  | ||||||
|  | mkdir -p /etc/apache2 | ||||||
|  |  | ||||||
| if [ "$FOURGET_PROTO" = "https" ]; then | if [ "$FOURGET_PROTO" = "https" ]; then | ||||||
|         echo "Using https configuration" |         echo "Using https configuration" | ||||||
|         cp /etc/apache2/https.conf /etc/apache2/httpd.conf |         cp -r ${FOURGET_SRC}/docker/apache/https/httpd.conf /etc/apache2 | ||||||
|  |         cp -r ${FOURGET_SRC}/docker/apache/https/conf.d/* /etc/apache2/conf.d | ||||||
|  |  | ||||||
| else | else | ||||||
|         echo "Using http configuration" |         echo "Using http configuration" | ||||||
|         cp /etc/apache2/http.conf /etc/apache2/httpd.conf |         cp -r ${FOURGET_SRC}/docker/apache/http/httpd.conf /etc/apache2 | ||||||
|  |         cp -r ${FOURGET_SRC}/docker/apache/http/conf.d/* /etc/apache2/conf.d | ||||||
| fi | fi | ||||||
|  |  | ||||||
| php ./docker/gen_config.php | php ./docker/gen_config.php | ||||||
|  |  | ||||||
|  | if [ "$@" = "start" ]; then | ||||||
|         echo "4get is running" |         echo "4get is running" | ||||||
|         exec httpd -DFOREGROUND |         exec httpd -DFOREGROUND | ||||||
|  | else  | ||||||
|  |         exec "$@" | ||||||
|  | fi | ||||||
|  |  | ||||||
|   | |||||||
| @@ -75,6 +75,7 @@ class backend{ | |||||||
| 				break; | 				break; | ||||||
| 			 | 			 | ||||||
| 			case "socks5_hostname": | 			case "socks5_hostname": | ||||||
|  | 			case "socks5h": | ||||||
| 			case "socks5a": | 			case "socks5a": | ||||||
| 				curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); | 				curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); | ||||||
| 				curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); | 				curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); | ||||||
|   | |||||||
| @@ -838,10 +838,10 @@ class frontend{ | |||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
| 		$payload .= | 		$payload .= | ||||||
| 				'<a href="https://webcache.googleusercontent.com/search?q=cache:' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://google.com" alt="go">Google cache</a>' . |  | ||||||
| 				'<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' . | 				'<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' . | ||||||
| 				'<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' . | 				'<a href="https://archive.ph/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' . | ||||||
| 				'<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' . | 				'<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' . | ||||||
|  | 				'<a href="https://arquivo.pt/wayback/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://arquivo.pt" alt="ar">Arquivo.pt</a>' . | ||||||
| 				'<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' . | 				'<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' . | ||||||
| 				'<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' . | 				'<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' . | ||||||
| 			'</div>'; | 			'</div>'; | ||||||
| @@ -969,7 +969,9 @@ class frontend{ | |||||||
| 						"qwant" => "Qwant", | 						"qwant" => "Qwant", | ||||||
| 						"yep" => "Yep", | 						"yep" => "Yep", | ||||||
| 						"solofield" => "Solofield", | 						"solofield" => "Solofield", | ||||||
| 						//"pinterest" => "Pinterest", | 						"pinterest" => "Pinterest", | ||||||
|  | 						"fivehpx" => "500px", | ||||||
|  | 						"vsco" => "VSCO", | ||||||
| 						"imgur" => "Imgur", | 						"imgur" => "Imgur", | ||||||
| 						"ftm" => "FindThatMeme" | 						"ftm" => "FindThatMeme" | ||||||
| 					] | 					] | ||||||
|   | |||||||
| @@ -526,4 +526,85 @@ class fuckhtml{ | |||||||
| 				$string | 				$string | ||||||
| 			); | 			); | ||||||
| 	} | 	} | ||||||
|  | 	 | ||||||
|  | 	public function extract_json($json){ | ||||||
|  | 		 | ||||||
|  | 		$len = strlen($json); | ||||||
|  | 		$array_level = 0; | ||||||
|  | 		$object_level = 0; | ||||||
|  | 		$in_quote = null; | ||||||
|  | 		$start = null; | ||||||
|  | 		 | ||||||
|  | 		for($i=0; $i<$len; $i++){ | ||||||
|  | 			 | ||||||
|  | 			switch($json[$i]){ | ||||||
|  | 				 | ||||||
|  | 				case "[": | ||||||
|  | 					if($in_quote === null){ | ||||||
|  | 						 | ||||||
|  | 						$array_level++; | ||||||
|  | 						if($start === null){ | ||||||
|  | 							 | ||||||
|  | 							$start = $i; | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 					break; | ||||||
|  | 				 | ||||||
|  | 				case "]": | ||||||
|  | 					if($in_quote === null){ | ||||||
|  | 						 | ||||||
|  | 						$array_level--; | ||||||
|  | 					} | ||||||
|  | 					break; | ||||||
|  | 				 | ||||||
|  | 				case "{": | ||||||
|  | 					if($in_quote === null){ | ||||||
|  | 						 | ||||||
|  | 						$object_level++; | ||||||
|  | 						if($start === null){ | ||||||
|  | 							 | ||||||
|  | 							$start = $i; | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 					break; | ||||||
|  | 				 | ||||||
|  | 				case "}": | ||||||
|  | 					if($in_quote === null){ | ||||||
|  | 						 | ||||||
|  | 						$object_level--; | ||||||
|  | 					} | ||||||
|  | 					break; | ||||||
|  | 				 | ||||||
|  | 				case "\"": | ||||||
|  | 				case "'": | ||||||
|  | 					if( | ||||||
|  | 						$i !== 0 && | ||||||
|  | 						$json[$i - 1] !== "\\" | ||||||
|  | 					){ | ||||||
|  | 						// found a non-escaped quote | ||||||
|  | 						 | ||||||
|  | 						if($in_quote === null){ | ||||||
|  | 							 | ||||||
|  | 							// open quote | ||||||
|  | 							$in_quote = $json[$i]; | ||||||
|  | 						}elseif($in_quote === $json[$i]){ | ||||||
|  | 							 | ||||||
|  | 							// close quote | ||||||
|  | 							$in_quote = null; | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 					break; | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			if( | ||||||
|  | 				$start !== null && | ||||||
|  | 				$array_level === 0 && | ||||||
|  | 				$object_level === 0 | ||||||
|  | 			){ | ||||||
|  | 				 | ||||||
|  | 				return substr($json, $start, $i - $start + 1); | ||||||
|  | 				break; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| } | } | ||||||
|   | |||||||
| @@ -210,6 +210,63 @@ class brave{ | |||||||
| 		return $data; | 		return $data; | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
|  | 	private function get_js(){ | ||||||
|  | 		 | ||||||
|  | 		$script_disc = | ||||||
|  | 			$this->fuckhtml | ||||||
|  | 			->getElementsByTagName( | ||||||
|  | 				"script" | ||||||
|  | 			); | ||||||
|  | 		 | ||||||
|  | 		$data = null; | ||||||
|  | 		foreach($script_disc as &$discs){ | ||||||
|  | 			 | ||||||
|  | 			if( | ||||||
|  | 				preg_match( | ||||||
|  | 					'/kit\.start\(/', | ||||||
|  | 					$discs["innerHTML"] | ||||||
|  | 				) | ||||||
|  | 			){ | ||||||
|  | 				 | ||||||
|  | 				$data = | ||||||
|  | 					explode( | ||||||
|  | 						"data:", | ||||||
|  | 						$discs["innerHTML"], | ||||||
|  | 						2 | ||||||
|  | 					); | ||||||
|  | 				 | ||||||
|  | 				if(count($data) !== 2){ | ||||||
|  | 					 | ||||||
|  | 					throw new Exception("Failed to split up data field"); | ||||||
|  | 				} | ||||||
|  | 				 | ||||||
|  | 				$data = $data[1]; | ||||||
|  | 				break; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		if($data === null){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Could not grep JavaScript object"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$data = | ||||||
|  | 			$this->fuckhtml | ||||||
|  | 			->parseJsObject( | ||||||
|  | 				$this->fuckhtml | ||||||
|  | 				->extract_json( | ||||||
|  | 					$data | ||||||
|  | 				) | ||||||
|  | 			); | ||||||
|  | 		 | ||||||
|  | 		if($data === null){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to decode JavaScript object"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		return $data; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
| 	public function web($get){ | 	public function web($get){ | ||||||
| 		 | 		 | ||||||
| 		if($get["npt"]){ | 		if($get["npt"]){ | ||||||
| @@ -346,7 +403,7 @@ class brave{ | |||||||
| 			 | 			 | ||||||
| 			$nextpage = | 			$nextpage = | ||||||
| 				$this->fuckhtml | 				$this->fuckhtml | ||||||
| 				->getElementsByClassName("btn", "a"); | 				->getElementsByClassName("button", "a"); | ||||||
| 			 | 			 | ||||||
| 			if(count($nextpage) !== 0){ | 			if(count($nextpage) !== 0){ | ||||||
| 				 | 				 | ||||||
| @@ -382,55 +439,9 @@ class brave{ | |||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
|  | 		// do some magic | ||||||
| 		$this->fuckhtml->load($html); | 		$this->fuckhtml->load($html); | ||||||
| 		 | 		$data = $this->get_js(); | ||||||
| 		$script_disc = |  | ||||||
| 			$this->fuckhtml |  | ||||||
| 			->getElementsByTagName( |  | ||||||
| 				"script" |  | ||||||
| 			); |  | ||||||
| 		 |  | ||||||
| 		$grep = []; |  | ||||||
| 		foreach($script_disc as $discs){ |  | ||||||
| 			 |  | ||||||
| 			preg_match( |  | ||||||
| 				'/const data ?= ?(\[{.*}]);/', |  | ||||||
| 				$discs["innerHTML"], |  | ||||||
| 				$grep |  | ||||||
| 			); |  | ||||||
| 			 |  | ||||||
| 			if(isset($grep[1])){ |  | ||||||
| 				 |  | ||||||
| 				break; |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 		 |  | ||||||
| 		if(!isset($grep[1])){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Could not grep JavaScript object"); |  | ||||||
| 		} |  | ||||||
| 		 |  | ||||||
| 		$data = |  | ||||||
| 			rtrim( |  | ||||||
| 				preg_replace( |  | ||||||
| 					'/\(Array\(0\)\)\).*$/', |  | ||||||
| 					"", |  | ||||||
| 					$grep[1] |  | ||||||
| 				), |  | ||||||
| 				" ]" |  | ||||||
| 			) . "]"; |  | ||||||
| 		 |  | ||||||
| 		$data = |  | ||||||
| 			$this->fuckhtml |  | ||||||
| 			->parseJsObject( |  | ||||||
| 				$data |  | ||||||
| 			); |  | ||||||
| 		unset($grep); |  | ||||||
| 		 |  | ||||||
| 		if($data === null){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Failed to decode JavaScript object"); |  | ||||||
| 		} |  | ||||||
| 		 | 		 | ||||||
| 		if( | 		if( | ||||||
| 			isset($data[2]["data"]["title"]) && | 			isset($data[2]["data"]["title"]) && | ||||||
| @@ -1179,23 +1190,8 @@ class brave{ | |||||||
| 				$proxy | 				$proxy | ||||||
| 			); | 			); | ||||||
| 		 | 		 | ||||||
| 		preg_match( | 		$this->fuckhtml->load($html); | ||||||
| 			'/const data ?= ?(\[{.*}]);/', | 		$json = $this->get_js(); | ||||||
| 			$html, |  | ||||||
| 			$json |  | ||||||
| 		); |  | ||||||
| 		 |  | ||||||
| 		if(!isset($json[1])){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Failed to grep javascript object"); |  | ||||||
| 		} |  | ||||||
| 		 |  | ||||||
| 		$json = $this->fuckhtml->parseJsObject($json[1], true); |  | ||||||
| 		 |  | ||||||
| 		if($json === null){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Failed to parse javascript object"); |  | ||||||
| 		} |  | ||||||
| 		 | 		 | ||||||
| 		foreach( | 		foreach( | ||||||
| 			$json[1]["data"]["body"]["response"]["news"]["results"] | 			$json[1]["data"]["body"]["response"]["news"]["results"] | ||||||
| @@ -1277,22 +1273,8 @@ class brave{ | |||||||
| 		$html = fread($handle, filesize("scraper/brave-image.html")); | 		$html = fread($handle, filesize("scraper/brave-image.html")); | ||||||
| 		fclose($handle);*/ | 		fclose($handle);*/ | ||||||
| 		 | 		 | ||||||
| 		preg_match( | 		$this->fuckhtml->load($html); | ||||||
| 			'/const data = (\[{.*}\]);/', | 		$json = $this->get_js(); | ||||||
| 			$html, |  | ||||||
| 			$json |  | ||||||
| 		); |  | ||||||
| 		 |  | ||||||
| 		if(!isset($json[1])){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Failed to get data object"); |  | ||||||
| 		} |  | ||||||
| 		 |  | ||||||
| 		$json = |  | ||||||
| 			$this->fuckhtml |  | ||||||
| 			->parseJsObject( |  | ||||||
| 				$json[1] |  | ||||||
| 			); |  | ||||||
| 		 | 		 | ||||||
| 		foreach( | 		foreach( | ||||||
| 			$json[1] | 			$json[1] | ||||||
| @@ -1422,22 +1404,8 @@ class brave{ | |||||||
| 		$html = fread($handle, filesize("scraper/brave-video.html")); | 		$html = fread($handle, filesize("scraper/brave-video.html")); | ||||||
| 		fclose($handle);*/ | 		fclose($handle);*/ | ||||||
| 		 | 		 | ||||||
| 		preg_match( | 		$this->fuckhtml->load($html); | ||||||
| 			'/const data = (\[{.*}\]);/', | 		$json = $this->get_js(); | ||||||
| 			$html, |  | ||||||
| 			$json |  | ||||||
| 		); |  | ||||||
| 		 |  | ||||||
| 		if(!isset($json[1])){ |  | ||||||
| 			 |  | ||||||
| 			throw new Exception("Failed to get data object"); |  | ||||||
| 		} |  | ||||||
| 		 |  | ||||||
| 		$json = |  | ||||||
| 			$this->fuckhtml |  | ||||||
| 			->parseJsObject( |  | ||||||
| 				$json[1] |  | ||||||
| 			); |  | ||||||
| 		 | 		 | ||||||
| 		foreach( | 		foreach( | ||||||
| 			$json | 			$json | ||||||
| @@ -1809,7 +1777,21 @@ class brave{ | |||||||
| 		 | 		 | ||||||
| 		$nextpage = | 		$nextpage = | ||||||
| 			$this->fuckhtml | 			$this->fuckhtml | ||||||
| 			->getElementsByClassName("btn", "a"); | 			->getElementById( | ||||||
|  | 				"pagination", | ||||||
|  | 				"div" | ||||||
|  | 			); | ||||||
|  | 		 | ||||||
|  | 		if($nextpage){ | ||||||
|  | 			 | ||||||
|  | 			$this->fuckhtml->load($nextpage); | ||||||
|  | 			 | ||||||
|  | 			$nextpage = | ||||||
|  | 				$this->fuckhtml | ||||||
|  | 				->getElementsByClassName( | ||||||
|  | 					"button", | ||||||
|  | 					"a" | ||||||
|  | 				); | ||||||
| 			 | 			 | ||||||
| 			if(count($nextpage) !== 0){ | 			if(count($nextpage) !== 0){ | ||||||
| 				 | 				 | ||||||
| @@ -1847,6 +1829,7 @@ class brave{ | |||||||
| 						); | 						); | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  | 		} | ||||||
| 		 | 		 | ||||||
| 		return null; | 		return null; | ||||||
| 	} | 	} | ||||||
|   | |||||||
							
								
								
									
										2996
									
								
								scraper/ddg.php
									
									
									
									
									
								
							
							
						
						
									
										2996
									
								
								scraper/ddg.php
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										262
									
								
								scraper/fivehpx.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								scraper/fivehpx.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | class fivehpx{ | ||||||
|  | 	 | ||||||
|  | 	public function __construct(){ | ||||||
|  | 		 | ||||||
|  | 		include "lib/backend.php"; | ||||||
|  | 		$this->backend = new backend("fivehpx"); | ||||||
|  | 		 | ||||||
|  | 		include "lib/fuckhtml.php"; | ||||||
|  | 		$this->fuckhtml = new fuckhtml(); | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public function getfilters($page){ | ||||||
|  | 		 | ||||||
|  | 		return [ | ||||||
|  | 			"sort" => [ | ||||||
|  | 				"display" => "Sort", | ||||||
|  | 				"option" => [ | ||||||
|  | 					"relevance" => "Relevance", | ||||||
|  | 					"pulse" => "Pulse", | ||||||
|  | 					"newest" => "Newest" | ||||||
|  | 				] | ||||||
|  | 			] | ||||||
|  | 		]; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	private function get($proxy, $url, $get = [], $post_data = null){ | ||||||
|  | 		 | ||||||
|  | 		$curlproc = curl_init(); | ||||||
|  | 		 | ||||||
|  | 		if($get !== []){ | ||||||
|  | 			$get = http_build_query($get); | ||||||
|  | 			$url .= "?" . $get; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_URL, $url); | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding | ||||||
|  | 		 | ||||||
|  | 		if($post_data === null){ | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Upgrade-Insecure-Requests: 1", | ||||||
|  | 				"Sec-Fetch-Dest: document", | ||||||
|  | 				"Sec-Fetch-Mode: navigate", | ||||||
|  | 				"Sec-Fetch-Site: same-origin", | ||||||
|  | 				"Sec-Fetch-User: ?1", | ||||||
|  | 				"Priority: u=0, i", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: */*", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"Referer: https://500px.com/", | ||||||
|  | 				"content-type: application/json", | ||||||
|  | 				//"x-csrf-token: undefined", | ||||||
|  | 				"x-500px-source: Search", | ||||||
|  | 				"Content-Length: " . strlen($post_data), | ||||||
|  | 				"Origin: https://500px.com", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				// "Cookie: _pin_unauth, _fbp, _sharedID, _sharedID_cst", | ||||||
|  | 				"Sec-Fetch-Dest: empty", | ||||||
|  | 				"Sec-Fetch-Mode: cors", | ||||||
|  | 				"Sec-Fetch-Site: same-site", | ||||||
|  | 				"Priority: u=4", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 						 | ||||||
|  | 			// set post data | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_POST, true); | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); | ||||||
|  | 		 | ||||||
|  | 		// http2 bypass | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); | ||||||
|  | 		 | ||||||
|  | 		$this->backend->assign_proxy($curlproc, $proxy); | ||||||
|  | 		 | ||||||
|  | 		$data = curl_exec($curlproc); | ||||||
|  | 		 | ||||||
|  | 		if(curl_errno($curlproc)){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception(curl_error($curlproc)); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_close($curlproc); | ||||||
|  | 		return $data; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public function image($get){ | ||||||
|  | 		 | ||||||
|  | 		if($get["npt"]){ | ||||||
|  | 			 | ||||||
|  | 			[$pagination, $proxy] = | ||||||
|  | 				$this->backend->get( | ||||||
|  | 					$get["npt"], "images" | ||||||
|  | 				); | ||||||
|  | 			 | ||||||
|  | 			$pagination = json_decode($pagination, true); | ||||||
|  | 			$search = $pagination["search"]; | ||||||
|  | 			 | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			$search = $get["s"]; | ||||||
|  | 			if(strlen($search) === 0){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Search term is empty!"); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$proxy = $this->backend->get_ip(); | ||||||
|  | 			$pagination = [ | ||||||
|  | 				"sort" => strtoupper($get["sort"]), | ||||||
|  | 				"search" => $search, | ||||||
|  | 				"filters" => [], | ||||||
|  | 				"nlp" => false, | ||||||
|  | 			]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		try{ | ||||||
|  | 			 | ||||||
|  | 			$json = | ||||||
|  | 				$this->get( | ||||||
|  | 					$proxy, | ||||||
|  | 					"https://api.500px.com/graphql", | ||||||
|  | 					[], | ||||||
|  | 					json_encode([ | ||||||
|  | 						"operationName" => "PhotoSearchPaginationContainerQuery", | ||||||
|  | 						"variables" => $pagination, | ||||||
|  | 						"query" => | ||||||
|  | 							'query PhotoSearchPaginationContainerQuery(' . | ||||||
|  | 							(isset($pagination["cursor"]) ? '$cursor: String, ' : "") . | ||||||
|  | 							'$sort: PhotoSort, $search: String!, $filters: [PhotoSearchFilter!], $nlp: Boolean) {  ...PhotoSearchPaginationContainer_query_1vzAZD} fragment PhotoSearchPaginationContainer_query_1vzAZD on Query { photoSearch(sort: $sort, first: 100, ' . | ||||||
|  | 							(isset($pagination["cursor"]) ? 'after: $cursor, ' : "") . | ||||||
|  | 							'search: $search, filters: $filters, nlp: $nlp) { edges { node { id legacyId canonicalPath name description width height images(sizes: [33, 36]) { size url id } } } totalCount pageInfo { endCursor hasNextPage } }}' | ||||||
|  | 					]) | ||||||
|  | 				); | ||||||
|  | 		}catch(Exception $error){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to fetch graphQL object"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$json = json_decode($json, true); | ||||||
|  | 		 | ||||||
|  | 		if($json === null){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to decode graphQL object"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		if(isset($json["errors"][0]["message"])){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("500px returned an API error: " . $json["errors"][0]["message"]); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		if(!isset($json["data"]["photoSearch"]["edges"])){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("No edges returned by API"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$out = [ | ||||||
|  | 			"status" => "ok", | ||||||
|  | 			"npt" => null, | ||||||
|  | 			"image" => [] | ||||||
|  | 		]; | ||||||
|  | 		 | ||||||
|  | 		foreach($json["data"]["photoSearch"]["edges"] as $image){ | ||||||
|  | 			 | ||||||
|  | 			$image = $image["node"]; | ||||||
|  | 			$title = | ||||||
|  | 				trim( | ||||||
|  | 					$this->fuckhtml | ||||||
|  | 					->getTextContent( | ||||||
|  | 						$image["name"] | ||||||
|  | 					) . ": " . | ||||||
|  | 					$this->fuckhtml | ||||||
|  | 					->getTextContent( | ||||||
|  | 						$image["description"] | ||||||
|  | 					) | ||||||
|  | 					, " :" | ||||||
|  | 				); | ||||||
|  | 			 | ||||||
|  | 			$small = $this->image_ratio(600, $image["width"], $image["height"]); | ||||||
|  | 			$large = $this->image_ratio(2048, $image["width"], $image["height"]); | ||||||
|  | 			 | ||||||
|  | 			$out["image"][] = [ | ||||||
|  | 				"title" => $title, | ||||||
|  | 				"source" => [ | ||||||
|  | 					[ | ||||||
|  | 						"url" => $image["images"][1]["url"], | ||||||
|  | 						"width" => $large[0], | ||||||
|  | 						"height" => $large[1] | ||||||
|  | 					], | ||||||
|  | 					[ | ||||||
|  | 						"url" => $image["images"][0]["url"], | ||||||
|  | 						"width" => $small[0], | ||||||
|  | 						"height" => $small[1] | ||||||
|  | 					] | ||||||
|  | 				], | ||||||
|  | 				"url" => "https://500px.com" . $image["canonicalPath"] | ||||||
|  | 			]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		// get NPT token | ||||||
|  | 		if($json["data"]["photoSearch"]["pageInfo"]["hasNextPage"] === true){ | ||||||
|  | 			 | ||||||
|  | 			$out["npt"] = | ||||||
|  | 				$this->backend->store( | ||||||
|  | 					json_encode([ | ||||||
|  | 						"cursor" => $json["data"]["photoSearch"]["pageInfo"]["endCursor"], | ||||||
|  | 						"search" => $search, | ||||||
|  | 						"sort" => $pagination["sort"], | ||||||
|  | 						"filters" => [], | ||||||
|  | 						"nlp" => false | ||||||
|  | 					]), | ||||||
|  | 					"images", | ||||||
|  | 					$proxy | ||||||
|  | 				); | ||||||
|  | 		} | ||||||
|  | 			 | ||||||
|  | 		return $out; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	private function image_ratio($longest_edge, $width, $height){ | ||||||
|  | 		 | ||||||
|  | 		$ratio = [ | ||||||
|  | 			$longest_edge / $width, | ||||||
|  | 			$longest_edge / $height | ||||||
|  | 		]; | ||||||
|  | 		 | ||||||
|  | 		if($ratio[0] < $ratio[1]){ | ||||||
|  | 			 | ||||||
|  | 			$ratio = $ratio[0]; | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			$ratio = $ratio[1]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		return [ | ||||||
|  | 			floor($width * $ratio), | ||||||
|  | 			floor($height * $ratio) | ||||||
|  | 		]; | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -136,7 +136,7 @@ class ftm{ | |||||||
| 				"source" => [ | 				"source" => [ | ||||||
| 					[ | 					[ | ||||||
| 						"url" => | 						"url" => | ||||||
| 							"https://findthatmeme.us-southeast-1.linodeobjects.com/" . | 							"https://s3.thehackerblog.com/findthatmeme/" . | ||||||
| 							$thumb, | 							$thumb, | ||||||
| 						"width" => null, | 						"width" => null, | ||||||
| 						"height" => null | 						"height" => null | ||||||
|   | |||||||
							
								
								
									
										3585
									
								
								scraper/google.php
									
									
									
									
									
								
							
							
						
						
									
										3585
									
								
								scraper/google.php
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -227,7 +227,7 @@ class marginalia{ | |||||||
| 				$json = | 				$json = | ||||||
| 					$this->get( | 					$this->get( | ||||||
| 						$this->backend->get_ip(), // no nextpage | 						$this->backend->get_ip(), // no nextpage | ||||||
| 						"https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search), | 						"https://api.marginalia-search.com/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search), | ||||||
| 						[ | 						[ | ||||||
| 							"count" => 20 | 							"count" => 20 | ||||||
| 						] | 						] | ||||||
| @@ -279,7 +279,7 @@ class marginalia{ | |||||||
| 				$html = | 				$html = | ||||||
| 					$this->get( | 					$this->get( | ||||||
| 						$proxy, | 						$proxy, | ||||||
| 						"https://search.marginalia.nu/search?" . $params | 						"https://old-search.marginalia.nu/search?" . $params | ||||||
| 					); | 					); | ||||||
| 			}catch(Exception $error){ | 			}catch(Exception $error){ | ||||||
| 				 | 				 | ||||||
| @@ -308,7 +308,7 @@ class marginalia{ | |||||||
| 				$html = | 				$html = | ||||||
| 					$this->get( | 					$this->get( | ||||||
| 						$proxy, | 						$proxy, | ||||||
| 						"https://search.marginalia.nu/search", | 						"https://old-search.marginalia.nu/search", | ||||||
| 						$params | 						$params | ||||||
| 					); | 					); | ||||||
| 			}catch(Exception $error){ | 			}catch(Exception $error){ | ||||||
|   | |||||||
| @@ -13,31 +13,104 @@ class pinterest{ | |||||||
| 		return []; | 		return []; | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
| 	private function get($proxy, $url, $get = []){ | 	private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){ | ||||||
| 		 | 		 | ||||||
| 		$curlproc = curl_init(); | 		$curlproc = curl_init(); | ||||||
| 		 | 		 | ||||||
|  | 		if($header_data_post === null){ | ||||||
|  | 			 | ||||||
|  | 			// handling GET | ||||||
|  | 						 | ||||||
|  | 			// extract cookies | ||||||
|  | 			$cookies_tmp = []; | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){ | ||||||
|  | 				 | ||||||
|  | 				$length = strlen($header); | ||||||
|  | 				 | ||||||
|  | 				$header = explode(":", $header, 2); | ||||||
|  | 				 | ||||||
|  | 				if(trim(strtolower($header[0])) == "set-cookie"){ | ||||||
|  | 					 | ||||||
|  | 					$cookie_tmp = explode("=", trim($header[1]), 2); | ||||||
|  | 					 | ||||||
|  | 					$cookies_tmp[trim($cookie_tmp[0])] = | ||||||
|  | 						explode(";", $cookie_tmp[1], 2)[0]; | ||||||
|  | 				} | ||||||
|  | 				 | ||||||
|  | 				return $length; | ||||||
|  | 			}); | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: application/json, text/javascript, */*, q=0.01", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"Referer: https://ca.pinterest.com/", | ||||||
|  | 				"X-Requested-With: XMLHttpRequest", | ||||||
|  | 				"X-APP-VERSION: 78f8764", | ||||||
|  | 				"X-Pinterest-AppState: active", | ||||||
|  | 				"X-Pinterest-Source-Url: /", | ||||||
|  | 				"X-Pinterest-PWS-Handler: www/index.js", | ||||||
|  | 				"screen-dpr: 1", | ||||||
|  | 				"is-preload-enabled: 1", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Sec-Fetch-Dest: empty", | ||||||
|  | 				"Sec-Fetch-Mode: cors", | ||||||
|  | 				"Sec-Fetch-Site: same-origin", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Alt-Used: ca.pinterest.com", | ||||||
|  | 				"Priority: u=0", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 			 | ||||||
| 			if($get !== []){ | 			if($get !== []){ | ||||||
| 				$get = http_build_query($get); | 				$get = http_build_query($get); | ||||||
| 				$url .= "?" . $get; | 				$url .= "?" . $get; | ||||||
| 			} | 			} | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			// handling POST (pagination) | ||||||
|  | 			$get = http_build_query($get); | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: application/json, text/javascript, */*, q=0.01", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"Content-Type: application/x-www-form-urlencoded", | ||||||
|  | 				"Content-Length: " . strlen($get), | ||||||
|  | 				"Referer: https://ca.pinterest.com/", | ||||||
|  | 				"X-Requested-With: XMLHttpRequest", | ||||||
|  | 				"X-APP-VERSION: 78f8764", | ||||||
|  | 				"X-CSRFToken: " . $cookies["csrf"], | ||||||
|  | 				"X-Pinterest-AppState: active", | ||||||
|  | 				"X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post), | ||||||
|  | 				"X-Pinterest-PWS-Handler: www/search/[scope].js", | ||||||
|  | 				"screen-dpr: 1", | ||||||
|  | 				"is-preload-enabled: 1", | ||||||
|  | 				"Origin: https://ca.pinterest.com", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Sec-Fetch-Dest: empty", | ||||||
|  | 				"Sec-Fetch-Mode: cors", | ||||||
|  | 				"Sec-Fetch-Site: same-origin", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Alt-Used: ca.pinterest.com", | ||||||
|  | 				"Cookie: " . $cookies["cookie"], | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_POST, true); | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get); | ||||||
|  | 		} | ||||||
| 		 | 		 | ||||||
| 		curl_setopt($curlproc, CURLOPT_URL, $url); | 		curl_setopt($curlproc, CURLOPT_URL, $url); | ||||||
| 		 | 		 | ||||||
| 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding | 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding | ||||||
| 		curl_setopt($curlproc, CURLOPT_HTTPHEADER, | 		 | ||||||
| 			["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", | 		// http2 bypass | ||||||
| 			"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | 		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); | ||||||
| 			"Accept-Language: en-US,en;q=0.5", |  | ||||||
| 			"Accept-Encoding: gzip", |  | ||||||
| 			"DNT: 1", |  | ||||||
| 			"Connection: keep-alive", |  | ||||||
| 			"Upgrade-Insecure-Requests: 1", |  | ||||||
| 			"Sec-Fetch-Dest: document", |  | ||||||
| 			"Sec-Fetch-Mode: navigate", |  | ||||||
| 			"Sec-Fetch-Site: none", |  | ||||||
| 			"Sec-Fetch-User: ?1"] |  | ||||||
| 		); |  | ||||||
| 		 | 		 | ||||||
| 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | ||||||
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | ||||||
| @@ -54,6 +127,26 @@ class pinterest{ | |||||||
| 			throw new Exception(curl_error($curlproc)); | 			throw new Exception(curl_error($curlproc)); | ||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
|  | 		if($header_data_post === null){ | ||||||
|  | 			 | ||||||
|  | 			if(!isset($cookies_tmp["csrftoken"])){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Failed to grep CSRF token"); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$cookies = ""; | ||||||
|  | 			 | ||||||
|  | 			foreach($cookies_tmp as $cookie_name => $cookie_value){ | ||||||
|  | 				 | ||||||
|  | 				$cookies .= $cookie_name . "=" . $cookie_value . "; "; | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$cookies = [ | ||||||
|  | 				"csrf" => $cookies_tmp["csrftoken"], | ||||||
|  | 				"cookie" => rtrim($cookies, " ;") | ||||||
|  | 			]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
| 		curl_close($curlproc); | 		curl_close($curlproc); | ||||||
| 		return $data; | 		return $data; | ||||||
| 	} | 	} | ||||||
| @@ -62,17 +155,68 @@ class pinterest{ | |||||||
| 		 | 		 | ||||||
| 		if($get["npt"]){ | 		if($get["npt"]){ | ||||||
| 			 | 			 | ||||||
| 			// @TODO | 			[$data, $proxy] = | ||||||
| 			// post data for next page | 				$this->backend->get( | ||||||
| 			$data = [ | 					$get["npt"], "images" | ||||||
| 				"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", |  | ||||||
| 				"data" => |  | ||||||
| 					json_encode( |  | ||||||
| 						[ |  | ||||||
| 							// {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}} |  | ||||||
| 						] |  | ||||||
| 				); | 				); | ||||||
| 			]; | 			 | ||||||
|  | 			$data = json_decode($data, true); | ||||||
|  | 			 | ||||||
|  | 			$search = $data["q"]; | ||||||
|  | 			$cookies = $data["cookies"]; | ||||||
|  | 			 | ||||||
|  | 			try{ | ||||||
|  | 				$json = | ||||||
|  | 					$this->get( | ||||||
|  | 						$proxy, | ||||||
|  | 						"https://ca.pinterest.com/resource/BaseSearchResource/get/", | ||||||
|  | 						[ | ||||||
|  | 							"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", | ||||||
|  | 							"data" => json_encode( | ||||||
|  | 								[ | ||||||
|  | 									"options" => [ | ||||||
|  | 										"applied_unified_filters" => null, | ||||||
|  | 										"appliedProductFilters" => "---", | ||||||
|  | 										"article" => null, | ||||||
|  | 										"auto_correction_disabled" => false, | ||||||
|  | 										"corpus" => null, | ||||||
|  | 										"customized_rerank_type" => null, | ||||||
|  | 										"domains" => null, | ||||||
|  | 										"dynamicPageSizeExpGroup" => null, | ||||||
|  | 										"filters" => null, | ||||||
|  | 										"journey_depth" => null, | ||||||
|  | 										"page_size" => null, | ||||||
|  | 										"price_max" => null, | ||||||
|  | 										"price_min" => null, | ||||||
|  | 										"query_pin_sigs" => null, | ||||||
|  | 										"query" => $data["q"], | ||||||
|  | 										"redux_normalize_feed" => true, | ||||||
|  | 										"request_params" => null, | ||||||
|  | 										"rs" => "typed", | ||||||
|  | 										"scope" => "pins", | ||||||
|  | 										"selected_one_bar_modules" => null, | ||||||
|  | 										"source_id" => null, | ||||||
|  | 										"source_module_id" => null, | ||||||
|  | 										"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", | ||||||
|  | 										"top_pin_id" => null, | ||||||
|  | 										"top_pin_ids" => null, | ||||||
|  | 										"bookmarks" => [ | ||||||
|  | 											$data["bookmark"] | ||||||
|  | 										] | ||||||
|  | 									], | ||||||
|  | 									"context" => [] | ||||||
|  | 								], | ||||||
|  | 								JSON_UNESCAPED_SLASHES | ||||||
|  | 							) | ||||||
|  | 						], | ||||||
|  | 						$cookies, | ||||||
|  | 						$search | ||||||
|  | 					); | ||||||
|  | 				 | ||||||
|  | 			}catch(Exception $error){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Failed to fetch JSON"); | ||||||
|  | 			} | ||||||
| 			 | 			 | ||||||
| 		}else{ | 		}else{ | ||||||
| 			 | 			 | ||||||
| @@ -82,26 +226,44 @@ class pinterest{ | |||||||
| 				throw new Exception("Search term is empty!"); | 				throw new Exception("Search term is empty!"); | ||||||
| 			} | 			} | ||||||
| 						 | 						 | ||||||
|  | 			// https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987 | ||||||
|  | 			// source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac | ||||||
|  | 			// &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D | ||||||
|  | 			// &_=1736116313987 | ||||||
|  | 			 | ||||||
|  | 			$source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search); | ||||||
|  | 			 | ||||||
| 			$filter = [ | 			$filter = [ | ||||||
| 				"source_url" => "/search/pins/?q=" . urlencode($search), | 				"source_url" => $source_url, | ||||||
| 				"rs" => "typed", | 				"rs" => "typed", | ||||||
| 				"data" => | 				"data" => | ||||||
| 					json_encode( | 					json_encode( | ||||||
| 						[ | 						[ | ||||||
| 							"options" => [ | 							"options" => [ | ||||||
| 								"article" => null, | 								"applied_unified_filters" => null, | ||||||
| 								"applied_filters" => null, |  | ||||||
| 								"appliedProductFilters" => "---", | 								"appliedProductFilters" => "---", | ||||||
| 								"auto_correction_disabled" => false, | 								"article" => null, | ||||||
| 								"corpus" => null, | 								"corpus" => null, | ||||||
| 								"customized_rerank_type" => null, | 								"customized_rerank_type" => null, | ||||||
|  | 								"domains" => null, | ||||||
|  | 								"dynamicPageSizeExpGroup" => null, | ||||||
| 								"filters" => null, | 								"filters" => null, | ||||||
| 								"query" => $search, | 								"journey_depth" => null, | ||||||
|  | 								"page_size" => null, | ||||||
|  | 								"price_max" => null, | ||||||
|  | 								"price_min" => null, | ||||||
| 								"query_pin_sigs" => null, | 								"query_pin_sigs" => null, | ||||||
|  | 								"query" => $search, | ||||||
| 								"redux_normalize_feed" => true, | 								"redux_normalize_feed" => true, | ||||||
| 								"rs" => "typed", | 								"request_params" => null, | ||||||
|  | 								"rs" => "ac", | ||||||
| 								"scope" => "pins", // pins, boards, videos,  | 								"scope" => "pins", // pins, boards, videos,  | ||||||
| 								"source_id" => null | 								"selected_one_bar_modules" => null, | ||||||
|  | 								"source_id" => null, | ||||||
|  | 								"source_module_id" => null, | ||||||
|  | 								"source_url" => $source_url, | ||||||
|  | 								"top_pin_id" => null, | ||||||
|  | 								"top_pin_ids" => null | ||||||
| 							], | 							], | ||||||
| 							"context" => [] | 							"context" => [] | ||||||
| 						] | 						] | ||||||
| @@ -110,23 +272,25 @@ class pinterest{ | |||||||
| 			]; | 			]; | ||||||
| 			 | 			 | ||||||
| 			$proxy = $this->backend->get_ip(); | 			$proxy = $this->backend->get_ip(); | ||||||
| 		} | 			$cookies = []; | ||||||
| 			 | 			 | ||||||
| 			try{ | 			try{ | ||||||
| 				$json = | 				$json = | ||||||
| 				json_decode( |  | ||||||
| 					$this->get( | 					$this->get( | ||||||
| 						$proxy, | 						$proxy, | ||||||
| 						"https://www.pinterest.ca/resource/BaseSearchResource/get/", | 						"https://ca.pinterest.com/resource/BaseSearchResource/get/", | ||||||
| 						$filter | 						$filter, | ||||||
| 					), | 						$cookies, | ||||||
| 					true | 						null | ||||||
| 					); | 					); | ||||||
| 				 | 				 | ||||||
| 			}catch(Exception $error){ | 			}catch(Exception $error){ | ||||||
| 				 | 				 | ||||||
| 				throw new Exception("Failed to fetch JSON"); | 				throw new Exception("Failed to fetch JSON"); | ||||||
| 			} | 			} | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$json = json_decode($json, true); | ||||||
| 		 | 		 | ||||||
| 		if($json === null){ | 		if($json === null){ | ||||||
| 			 | 			 | ||||||
| @@ -139,6 +303,60 @@ class pinterest{ | |||||||
| 			"image" => [] | 			"image" => [] | ||||||
| 		]; | 		]; | ||||||
| 		 | 		 | ||||||
|  | 		if( | ||||||
|  | 			!isset( | ||||||
|  | 				$json["resource_response"] | ||||||
|  | 				["status"] | ||||||
|  | 			) | ||||||
|  | 		){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Unknown API failure"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		if($json["resource_response"]["status"] != "success"){ | ||||||
|  | 			 | ||||||
|  | 			$status = "Got non-OK response: " . $json["resource_response"]["status"]; | ||||||
|  | 			 | ||||||
|  | 			if( | ||||||
|  | 				isset( | ||||||
|  | 					$json["resource_response"]["message"] | ||||||
|  | 				) | ||||||
|  | 			){ | ||||||
|  | 				 | ||||||
|  | 				$status .= " - " . $json["resource_response"]["message"]; | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			throw new Exception($status); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		if( | ||||||
|  | 			isset( | ||||||
|  | 				$json["resource_response"]["sensitivity"] | ||||||
|  | 				["notices"][0]["description"]["text"] | ||||||
|  | 			) | ||||||
|  | 		){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception( | ||||||
|  | 				"Pinterest returned a notice: " . | ||||||
|  | 				$json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"] | ||||||
|  | 			); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		// get NPT | ||||||
|  | 		if(isset($json["resource_response"]["bookmark"])){ | ||||||
|  | 			 | ||||||
|  | 			$out["npt"] = | ||||||
|  | 				$this->backend->store( | ||||||
|  | 					json_encode([ | ||||||
|  | 						"q" => $search, | ||||||
|  | 						"bookmark" => $json["resource_response"]["bookmark"], | ||||||
|  | 						"cookies" => $cookies | ||||||
|  | 					]), | ||||||
|  | 					"images", | ||||||
|  | 					$proxy | ||||||
|  | 				); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
| 		foreach( | 		foreach( | ||||||
| 			$json | 			$json | ||||||
| 			["resource_response"] | 			["resource_response"] | ||||||
| @@ -150,6 +368,7 @@ class pinterest{ | |||||||
| 			switch($item["type"]){ | 			switch($item["type"]){ | ||||||
| 				 | 				 | ||||||
| 				case "pin": | 				case "pin": | ||||||
|  | 				case "board": | ||||||
| 					 | 					 | ||||||
| 					/* | 					/* | ||||||
| 						Handle image object | 						Handle image object | ||||||
| @@ -206,42 +425,15 @@ class pinterest{ | |||||||
| 								"height" => (int)$thumb["height"] | 								"height" => (int)$thumb["height"] | ||||||
| 							] | 							] | ||||||
| 						], | 						], | ||||||
| 						"url" => "https://www.pinterest.com/pin/" . $item["id"] |  | ||||||
| 					]; |  | ||||||
| 					break; |  | ||||||
| 				 |  | ||||||
| 				case "board": |  | ||||||
| 					if(isset($item["cover_pin"]["image_url"])){ |  | ||||||
| 						 |  | ||||||
| 						$image = [ |  | ||||||
| 							"url" => $item["cover_pin"]["image_url"], |  | ||||||
| 							"width" => (int)$item["cover_pin"]["size"][0], |  | ||||||
| 							"height" => (int)$item["cover_pin"]["size"][1] |  | ||||||
| 						]; |  | ||||||
| 					}elseif(isset($item["image_cover_url_hd"])){ |  | ||||||
| 						/* |  | ||||||
| 						$image = [ |  | ||||||
| 						"url" => | 						"url" => | ||||||
| 							"width" => null, | 							$item["link"] === null ? | ||||||
| 							"height" => null | 							"https://ca.pinterest.com/pin/" . $item["id"] : | ||||||
| 						];*/ | 							$item["link"] | ||||||
| 					} | 					]; | ||||||
| 					break; | 					break; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
| 		return $out; | 		return $out; | ||||||
| 	} | 	} | ||||||
| 	 |  | ||||||
| 	private function getfullresimage($image, $has_og){ |  | ||||||
| 		 |  | ||||||
| 		$has_og = $has_og ? "1200x" : "originals"; |  | ||||||
| 		 |  | ||||||
| 		return |  | ||||||
| 			preg_replace( |  | ||||||
| 				'/https:\/\/i\.pinimg\.com\/[^\/]+\//', |  | ||||||
| 				"https://i.pinimg.com/" . $has_og . "/", |  | ||||||
| 				$image |  | ||||||
| 			); |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										257
									
								
								scraper/vsco.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										257
									
								
								scraper/vsco.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,257 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | class vsco{ | ||||||
|  | 	 | ||||||
|  | 	public function __construct(){ | ||||||
|  | 		 | ||||||
|  | 		include "lib/backend.php"; | ||||||
|  | 		$this->backend = new backend("vsco"); | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public function getfilters($page){ | ||||||
|  | 		 | ||||||
|  | 		return []; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	private function get($proxy, $url, $get = [], $bearer = null){ | ||||||
|  | 		 | ||||||
|  | 		$curlproc = curl_init(); | ||||||
|  | 		 | ||||||
|  | 		if($get !== []){ | ||||||
|  | 			$get_tmp = http_build_query($get); | ||||||
|  | 			$url .= "?" . $get_tmp; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_URL, $url); | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding | ||||||
|  | 		 | ||||||
|  | 		if($bearer === null){ | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Upgrade-Insecure-Requests: 1", | ||||||
|  | 				"Sec-Fetch-Dest: document", | ||||||
|  | 				"Sec-Fetch-Mode: navigate", | ||||||
|  | 				"Sec-Fetch-Site: same-origin", | ||||||
|  | 				"Sec-Fetch-User: ?1", | ||||||
|  | 				"Priority: u=0, i", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: */*", | ||||||
|  | 				"Accept-Language: en-US", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"Referer: https://vsco.co/search/images/" . urlencode($get["query"]), | ||||||
|  | 				"authorization: Bearer " . $bearer, | ||||||
|  | 				"content-type: application/json", | ||||||
|  | 				"x-client-build: 1", | ||||||
|  | 				"x-client-platform: web", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Sec-GPC: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Sec-Fetch-Dest: empty", | ||||||
|  | 				"Sec-Fetch-Mode: cors", | ||||||
|  | 				"Sec-Fetch-Site: same-origin", | ||||||
|  | 				"Priority: u=0", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); | ||||||
|  | 		 | ||||||
|  | 		// http2 bypass | ||||||
|  | 		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); | ||||||
|  | 		 | ||||||
|  | 		$this->backend->assign_proxy($curlproc, $proxy); | ||||||
|  | 		 | ||||||
|  | 		$data = curl_exec($curlproc); | ||||||
|  | 		 | ||||||
|  | 		if(curl_errno($curlproc)){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception(curl_error($curlproc)); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		curl_close($curlproc); | ||||||
|  | 		return $data; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public function image($get){ | ||||||
|  | 		 | ||||||
|  | 		if($get["npt"]){ | ||||||
|  | 			 | ||||||
|  | 			[$data, $proxy] = | ||||||
|  | 				$this->backend->get( | ||||||
|  | 					$get["npt"], "images" | ||||||
|  | 				); | ||||||
|  | 			 | ||||||
|  | 			$data = json_decode($data, true); | ||||||
|  | 			 | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			$search = $get["s"]; | ||||||
|  | 			if(strlen($search) === 0){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Search term is empty!"); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$proxy = $this->backend->get_ip(); | ||||||
|  | 			 | ||||||
|  | 			// get bearer token | ||||||
|  | 			try{ | ||||||
|  | 				 | ||||||
|  | 				$html = | ||||||
|  | 					$this->get( | ||||||
|  | 						$proxy, | ||||||
|  | 						"https://vsco.co/feed" | ||||||
|  | 					); | ||||||
|  | 				 | ||||||
|  | 			}catch(Exception $error){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Failed to fetch feed page"); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			preg_match( | ||||||
|  | 				'/"tkn":"([A-z0-9]+)"/', | ||||||
|  | 				$html, | ||||||
|  | 				$bearer | ||||||
|  | 			); | ||||||
|  | 			 | ||||||
|  | 			if(!isset($bearer[1])){ | ||||||
|  | 				 | ||||||
|  | 				throw new Exception("Failed to grep bearer token"); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$data = [ | ||||||
|  | 				"pagination" => [ | ||||||
|  | 					"query" => $search, | ||||||
|  | 					"page" => 0, | ||||||
|  | 					"size" => 100 | ||||||
|  | 				], | ||||||
|  | 				"bearer" => $bearer[1] | ||||||
|  | 			]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		try{ | ||||||
|  | 			 | ||||||
|  | 			$json = | ||||||
|  | 				$this->get( | ||||||
|  | 					$proxy, | ||||||
|  | 					"https://vsco.co/api/2.0/search/images", | ||||||
|  | 					$data["pagination"], | ||||||
|  | 					$data["bearer"] | ||||||
|  | 				); | ||||||
|  | 		}catch(Exception $error){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to fetch JSON"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$json = json_decode($json, true); | ||||||
|  | 		 | ||||||
|  | 		if($json === null){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to decode JSON"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$out = [ | ||||||
|  | 			"status" => "ok", | ||||||
|  | 			"npt" => null, | ||||||
|  | 			"image" => [] | ||||||
|  | 		]; | ||||||
|  | 		 | ||||||
|  | 		if(!isset($json["results"])){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to access results object"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		foreach($json["results"] as $image){ | ||||||
|  | 			 | ||||||
|  | 			$image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST); | ||||||
|  | 			$thumbnail = explode($image_domain, $image["responsive_url"], 2)[1]; | ||||||
|  | 			 | ||||||
|  | 			if(substr($thumbnail, 0, 3) != "/1/"){ | ||||||
|  | 				 | ||||||
|  | 				$thumbnail = | ||||||
|  | 					preg_replace( | ||||||
|  | 						'/^\/[^\/]+/', | ||||||
|  | 						"", | ||||||
|  | 						$thumbnail | ||||||
|  | 					); | ||||||
|  | 			} | ||||||
|  | 			 | ||||||
|  | 			$thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail; | ||||||
|  | 			$size = | ||||||
|  | 				$this->image_ratio( | ||||||
|  | 					(int)$image["dimensions"]["width"], | ||||||
|  | 					(int)$image["dimensions"]["height"] | ||||||
|  | 				); | ||||||
|  | 			 | ||||||
|  | 			$out["image"][] = [ | ||||||
|  | 				"title" => $image["description"], | ||||||
|  | 				"source" => [ | ||||||
|  | 					[ | ||||||
|  | 						"url" => "https://" . $image["responsive_url"], | ||||||
|  | 						"width" => (int)$image["dimensions"]["width"], | ||||||
|  | 						"height" => (int)$image["dimensions"]["height"] | ||||||
|  | 					], | ||||||
|  | 					[ | ||||||
|  | 						"url" => $thumbnail, | ||||||
|  | 						"width" => $size[0], | ||||||
|  | 						"height" => $size[1] | ||||||
|  | 					] | ||||||
|  | 				], | ||||||
|  | 				"url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"] | ||||||
|  | 			]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		// get NPT | ||||||
|  | 		$max_page = ceil($json["total"] / 100); | ||||||
|  | 		$data["pagination"]["page"]++; | ||||||
|  | 		 | ||||||
|  | 		if($max_page > $data["pagination"]["page"]){ | ||||||
|  | 			 | ||||||
|  | 			$out["npt"] = | ||||||
|  | 				$this->backend->store( | ||||||
|  | 					json_encode($data), | ||||||
|  | 					"images", | ||||||
|  | 					$proxy | ||||||
|  | 				); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		return $out; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	private function image_ratio($width, $height){ | ||||||
|  | 		 | ||||||
|  | 		$ratio = [ | ||||||
|  | 			480 / $width, | ||||||
|  | 			360 / $height | ||||||
|  | 		]; | ||||||
|  | 		 | ||||||
|  | 		if($ratio[0] < $ratio[1]){ | ||||||
|  | 			 | ||||||
|  | 			$ratio = $ratio[0]; | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			$ratio = $ratio[1]; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		return [ | ||||||
|  | 			floor($width * $ratio), | ||||||
|  | 			floor($height * $ratio) | ||||||
|  | 		]; | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -1209,15 +1209,16 @@ class yt{ | |||||||
| 				 | 				 | ||||||
| 				$reel = | 				$reel = | ||||||
| 					$reel | 					$reel | ||||||
| 					->reelItemRenderer; | 					->shortsLockupViewModel; | ||||||
| 				 | 				 | ||||||
| 				array_push( | 				array_push( | ||||||
| 					$this->out["reel"], | 					$this->out["reel"], | ||||||
| 					[ | 					[ | ||||||
| 						"title" => | 						"title" => | ||||||
| 							$reel | 							$reel | ||||||
| 							->headline | 							->overlayMetadata | ||||||
| 							->simpleText, | 							->primaryText | ||||||
|  | 							->content, | ||||||
| 						"description" => null, | 						"description" => null, | ||||||
| 						"author" => [ | 						"author" => [ | ||||||
| 							"name" => null, | 							"name" => null, | ||||||
| @@ -1225,30 +1226,22 @@ class yt{ | |||||||
| 							"avatar" => null | 							"avatar" => null | ||||||
| 						], | 						], | ||||||
| 						"date" => null, | 						"date" => null, | ||||||
| 						"duration" => | 						"duration" => null, | ||||||
| 							$this->textualtime2int( | 						"views" => null, | ||||||
| 								$reel |  | ||||||
| 								->accessibility |  | ||||||
| 								->accessibilityData |  | ||||||
| 								->label |  | ||||||
| 							), |  | ||||||
| 						"views" => |  | ||||||
| 							$this->truncatedcount2int( |  | ||||||
| 								$reel |  | ||||||
| 								->viewCountText |  | ||||||
| 								->simpleText |  | ||||||
| 							), |  | ||||||
| 						"thumb" => [ | 						"thumb" => [ | ||||||
| 							"url" => | 							"url" => | ||||||
| 								$reel | 								$reel | ||||||
| 								->thumbnail | 								->thumbnail | ||||||
| 								->thumbnails[0] | 								->sources[0] | ||||||
| 								->url, | 								->url, | ||||||
| 							"ratio" => "9:16" | 							"ratio" => "9:16" | ||||||
| 						], | 						], | ||||||
| 						"url" => | 						"url" => | ||||||
| 							"https://www.youtube.com/watch?v=" . | 							"https://www.youtube.com/watch?v=" . | ||||||
| 							$reel | 							$reel | ||||||
|  | 							->onTap | ||||||
|  | 							->innertubeCommand | ||||||
|  | 							->reelWatchEndpoint | ||||||
| 							->videoId | 							->videoId | ||||||
| 					] | 					] | ||||||
| 				); | 				); | ||||||
|   | |||||||
							
								
								
									
										12
									
								
								settings.php
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								settings.php
									
									
									
									
									
								
							| @@ -227,10 +227,18 @@ $settings = [ | |||||||
| 						"value" => "solofield", | 						"value" => "solofield", | ||||||
| 						"text" => "Solofield" | 						"text" => "Solofield" | ||||||
| 					], | 					], | ||||||
| 					/*[ | 					[ | ||||||
| 						"value" => "pinterest", | 						"value" => "pinterest", | ||||||
| 						"text" => "Pinterest" | 						"text" => "Pinterest" | ||||||
| 					],*/ | 					], | ||||||
|  | 					[ | ||||||
|  | 						"value" => "fivehpx", | ||||||
|  | 						"text" => "500px" | ||||||
|  | 					], | ||||||
|  | 					[ | ||||||
|  | 						"value" => "vsco", | ||||||
|  | 						"text" => "VSCO" | ||||||
|  | 					], | ||||||
| 					[ | 					[ | ||||||
| 						"value" => "imgur", | 						"value" => "imgur", | ||||||
| 						"text" => "Imgur" | 						"text" => "Imgur" | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ | |||||||
|  |  | ||||||
| body{ | body{ | ||||||
| 	padding:15px 4% 40px; | 	padding:15px 4% 40px; | ||||||
|  | 	margin:unset; | ||||||
| } | } | ||||||
|  |  | ||||||
| h1,h2,h3,h4,h5,h6{ | h1,h2,h3,h4,h5,h6{ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user