forked from lolcat/4get
		
	added faceberg videos
This commit is contained in:
		
							
								
								
									
										28
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								README.md
									
									
									
									
									
								
							| @@ -7,7 +7,35 @@ https://4get.ca/about | ||||
| ## Try it out | ||||
| https://4get.ca | ||||
|  | ||||
| ## Supported websites | ||||
| 1. Web | ||||
| 	- DuckDuckGo | ||||
| 	- Brave | ||||
| 	- Mojeek | ||||
| 	- Marginalia | ||||
| 	- wiby | ||||
|  | ||||
| 2. Images | ||||
| 	- DuckDuckGo | ||||
| 	- Yandex | ||||
| 	- Brave | ||||
|  | ||||
| 3. Videos | ||||
| 	- YouTube | ||||
| 	- Facebook videos | ||||
| 	- DuckDuckgo | ||||
| 	- Brave | ||||
|  | ||||
| 4. News | ||||
| 	- DuckDuckGo | ||||
| 	- Brave | ||||
| 	- Mojeek | ||||
|  | ||||
| More scrapers are coming soon. I currently want to add Google, Hackernews, Qwant and find a way to scrape Yandex web without those fucking captchas. A shopping, music and files tab is also in my todo list. | ||||
|  | ||||
| # Setup | ||||
| This section is still to-do. You will need to figure shit out for some of the apache2 stuff. Everything else should be OK. | ||||
|  | ||||
| Login as root. | ||||
|  | ||||
| ```sh | ||||
|   | ||||
							
								
								
									
										11
									
								
								about.php
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								about.php
									
									
									
									
									
								
							| @@ -26,7 +26,9 @@ $left = | ||||
| 	To set this as your default search engine on Firefox, right click the URL bar and select <div class="code-inline">Add "4get"</div>. Then, visit <a href="about:preferences#search" target="_BLANK" class="link">about:preferences#search</a> and select <div class="code-inline">4get</div> in the dropdown menu. | ||||
| 	 | ||||
| 	<a href="#chrome"><h2 id="chrome">On Chromium and Blink based browsers</h2></a> | ||||
| 	Right click the URL bar and click <div class="code-inline">Manage search engines and site search</div>, or visit <a href="chrome://settings/searchEngines" target="_BLANK" class="link">chrome://settings/searchEngines</a>. Then, create a new entry under <div class="code-inline">Search engines</div> and fill in the following details: | ||||
| 	Click the 3 superpositioned dots at the top right of the screen and click on <div class="code-inline">Settings</div>, then search for <div class="code-inline">default search engine</div>, or visit <a href="chrome://settings/searchEngines">chrome://settings/searchEngines</a>.<br><br> | ||||
| 	 | ||||
| 	Once you\'re there, click the pencil on the last entry under "Search engines" (it\'s probably DuckDuckGo). Once you do that, a popup will appear. Populate it with the following information:  | ||||
| 	 | ||||
| 	<table> | ||||
| 		<tr> | ||||
| @@ -39,19 +41,16 @@ $left = | ||||
| 		</tr> | ||||
| 		<tr> | ||||
| 			<td>Shortcut</td> | ||||
| 			<td>4get.ca</td> | ||||
| 			<td>4get</td> | ||||
| 		</tr> | ||||
| 		<tr> | ||||
| 			<td>URL with %s in place of query</td> | ||||
| 			<td>https://4get.ca/web?q=%s</td> | ||||
| 			<td>https://4get.ca/web?s=%s</td> | ||||
| 		</tr> | ||||
| 	</table> | ||||
| 	 | ||||
| 	Once that\'s done, click <div class="code-inline">Save</div>. Then, on the right handside of the newly created entry, open the dropdown menu and select <div class="code-inline">Make default</div>. | ||||
| 	 | ||||
| 	<a href="#other-browsers"><h2 id="other-browsers">Other browsers</h2></a> | ||||
| 	Get a real browser. | ||||
| 	 | ||||
| 	<h1>Frequently asked questions</h1> | ||||
| 	<a href="#what-is-this"><h2 id="what-is-this">What is this?</h2></a> | ||||
| 	This is a metasearch engine that gets results from other engines, and strips away all of the tracking parameters and Microsoft/globohomo bullshit they add. Most of the other alternatives to Google jack themselves off about being ""privacy respecting"" or whatever the fuck but it always turns out to be a total lie, and I just got fed up with their shit honestly. Alternatives like Searx or YaCy all fucking sucks so I made my own thing. | ||||
|   | ||||
| @@ -169,7 +169,7 @@ class frontend{ | ||||
| 		} | ||||
| 		 | ||||
| 		$payload .= | ||||
| 			htmlspecialchars($site["title"]) . | ||||
| 			$this->highlighttext($keywords, $site["title"]) . | ||||
| 		'</div>'; | ||||
| 		 | ||||
| 		if($greentext !== null){ | ||||
| @@ -903,6 +903,7 @@ class frontend{ | ||||
| 					"display" => "Scraper", | ||||
| 					"option" => [ | ||||
| 						"yt" => "YouTube", | ||||
| 						"fb" => "Facebook videos", | ||||
| 						"ddg" => "DuckDuckGo", | ||||
| 						"brave" => "Brave"//, | ||||
| 						//"google" => "Google" | ||||
| @@ -972,6 +973,11 @@ class frontend{ | ||||
| 				$lib = new google(); | ||||
| 				break; | ||||
| 			 | ||||
| 			case "fb": | ||||
| 				include "scraper/facebook.php"; | ||||
| 				$lib = new facebook(); | ||||
| 				break; | ||||
| 			 | ||||
| 			case "mojeek": | ||||
| 				include "scraper/mojeek.php"; | ||||
| 				$lib = new mojeek(); | ||||
| @@ -1269,6 +1275,14 @@ class frontend{ | ||||
| 				continue; | ||||
| 			} | ||||
| 			 | ||||
| 			if( | ||||
| 				$key == "older" || | ||||
| 				$key == "newer" | ||||
| 			){ | ||||
| 				 | ||||
| 				$value = date("Y-m-d", (int)$value); | ||||
| 			} | ||||
| 			 | ||||
| 			$out[$key] = $value; | ||||
| 		} | ||||
| 		 | ||||
|   | ||||
| @@ -1,12 +1,4 @@ | ||||
| <?php | ||||
| /* | ||||
| $brave = new brave(); | ||||
|  | ||||
| $handle = fopen("captcha.html", "r"); | ||||
| $html = fread($handle, filesize("captcha.html")); | ||||
| fclose($handle); | ||||
|  | ||||
| $brave->bypasscaptcha($html, "yes", "ca");*/ | ||||
|  | ||||
| class brave{ | ||||
| 	 | ||||
| @@ -154,6 +146,11 @@ class brave{ | ||||
| 			case "no": $nsfw = "strict"; break; | ||||
| 		} | ||||
| 		 | ||||
| 		if($country == "any"){ | ||||
| 			 | ||||
| 			$country = "all"; | ||||
| 		} | ||||
| 		 | ||||
| 		$headers = [ | ||||
| 			"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", | ||||
| 			"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||
| @@ -169,11 +166,6 @@ class brave{ | ||||
| 			"Sec-Fetch-User: ?1" | ||||
| 		]; | ||||
| 		 | ||||
| 		if($country == "any"){ | ||||
| 			 | ||||
| 			$country = "all"; | ||||
| 		} | ||||
| 		 | ||||
| 		$curlproc = curl_init(); | ||||
| 		 | ||||
| 		if($get !== []){ | ||||
| @@ -1990,6 +1982,8 @@ class brave{ | ||||
| 			as $result | ||||
| 		){ | ||||
| 			 | ||||
| 			print_r($result); | ||||
| 			 | ||||
| 			$out["image"][] = [ | ||||
| 				"title" => $result["title"], | ||||
| 				"source" => [ | ||||
|   | ||||
							
								
								
									
										809
									
								
								scraper/facebook.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										809
									
								
								scraper/facebook.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,809 @@ | ||||
| <?php | ||||
|  | ||||
| class facebook{ | ||||
| 	 | ||||
| 	const get = 0; | ||||
| 	const post = 1; | ||||
| 	 | ||||
| 	public function __construct(){ | ||||
| 		 | ||||
| 		include "lib/nextpage.php"; | ||||
| 		$this->nextpage = new nextpage("fb"); | ||||
| 	} | ||||
| 	 | ||||
| 	public function getfilters($page){ | ||||
| 		 | ||||
| 		return [ | ||||
| 			"sort" => [ | ||||
| 				"display" => "Sort by", | ||||
| 				"option" => [ | ||||
| 					"relevance" => "Relevance", | ||||
| 					"most_recent" => "Most recent" | ||||
| 				] | ||||
| 			], | ||||
| 			"newer" => [ | ||||
| 				"display" => "Newer than", | ||||
| 				"option" => "_DATE" | ||||
| 			], | ||||
| 			"older" => [ | ||||
| 				"display" => "Older than", | ||||
| 				"option" => "_DATE" | ||||
| 			], | ||||
| 			"live" => [ | ||||
| 				"display" => "Livestream", | ||||
| 				"option" => [ | ||||
| 					"no" => "No", | ||||
| 					"yes" => "Yes" | ||||
| 				] | ||||
| 			] | ||||
| 		]; | ||||
| 	} | ||||
| 	 | ||||
| 	private function get($url, $get = [], $reqtype = self::get){ | ||||
| 		 | ||||
| 		$curlproc = curl_init(); | ||||
| 		 | ||||
| 		if($get !== []){ | ||||
| 			 | ||||
| 			$get = http_build_query($get); | ||||
| 			 | ||||
| 			if($reqtype === self::get){ | ||||
| 				 | ||||
| 				$headers = [ | ||||
| 					"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0", | ||||
| 					"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||
| 					"Accept-Language: en-US,en;q=0.5", | ||||
| 					"Accept-Encoding: gzip", | ||||
| 					"DNT: 1", | ||||
| 					"Connection: keep-alive", | ||||
| 					"Upgrade-Insecure-Requests: 1", | ||||
| 					"Sec-Fetch-Dest: document", | ||||
| 					"Sec-Fetch-Mode: navigate", | ||||
| 					"Sec-Fetch-Site: none", | ||||
| 					"Sec-Fetch-User: ?1" | ||||
| 				]; | ||||
| 				 | ||||
| 				$url .= "?" . $get; | ||||
| 			}else{ | ||||
| 				 | ||||
| 				curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); | ||||
| 				 | ||||
| 				$headers = [ | ||||
| 					"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0", | ||||
| 					"Accept: */*", | ||||
| 					"Accept-Language: en-US,en;q=0.5", | ||||
| 					"Accept-Encoding: gzip, deflate, br", | ||||
| 					"Content-Type: application/x-www-form-urlencoded", | ||||
| 					"X-FB-Friendly-Name: SearchCometResultsPaginatedResultsQuery", | ||||
| 					//"X-FB-LSD: AVptQC4a16c", | ||||
| 					//"X-ASBD-ID: 129477", | ||||
| 					"Content-Length: " . strlen($get), | ||||
| 					"Origin: https://www.facebook.com", | ||||
| 					"DNT: 1", | ||||
| 					"Connection: keep-alive", | ||||
| 					"Referer: https://www.facebook.com/watch/", | ||||
| 					"Cookie: datr=__GMZCgwVF5BbyvAtfJojQwg; oo=v1%7C3%3A1691641171; wd=955x995", | ||||
| 					"Sec-Fetch-Dest: empty", | ||||
| 					"Sec-Fetch-Mode: cors", | ||||
| 					"Sec-Fetch-Site: same-origin", | ||||
| 					"TE: trailers" | ||||
| 				]; | ||||
| 				 | ||||
| 				curl_setopt($curlproc, CURLOPT_POST, true); | ||||
| 				curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get); | ||||
| 			} | ||||
| 		} | ||||
| 		 | ||||
| 		curl_setopt($curlproc, CURLOPT_URL, $url); | ||||
| 		 | ||||
| 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding | ||||
| 		curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers); | ||||
| 		 | ||||
| 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | ||||
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | ||||
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); | ||||
| 		curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); | ||||
| 		curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); | ||||
| 		 | ||||
| 		$data = curl_exec($curlproc); | ||||
| 		 | ||||
| 		if(curl_errno($curlproc)){ | ||||
| 			 | ||||
| 			throw new Exception(curl_error($curlproc)); | ||||
| 		} | ||||
| 		 | ||||
| 		curl_close($curlproc); | ||||
| 		return $data; | ||||
| 	} | ||||
| 	 | ||||
| 	public function video($get){ | ||||
| 		 | ||||
| 		$search = $get["s"]; | ||||
| 		$npt = $get["npt"]; | ||||
| 		 | ||||
| 		$this->out = [ | ||||
| 			"status" => "ok", | ||||
| 			"npt" => null, | ||||
| 			"video" => [], | ||||
| 			"author" => [], | ||||
| 			"livestream" => [], | ||||
| 			"playlist" => [], | ||||
| 			"reel" => [] | ||||
| 		]; | ||||
| 		 | ||||
| 		if($get["npt"]){ | ||||
| 			 | ||||
| 			$nextpage = | ||||
| 				json_decode( | ||||
| 					$this->nextpage->get( | ||||
| 						$npt, | ||||
| 						"videos" | ||||
| 					), | ||||
| 					true | ||||
| 				); | ||||
| 			 | ||||
| 			// parse next page | ||||
| 			$this->video_nextpage($nextpage); | ||||
| 			 | ||||
| 			return $this->out; | ||||
| 		} | ||||
| 		 | ||||
| 		// generate filter data | ||||
| 		// { | ||||
| 		//    "rp_creation_time:0":"{\"name\":\"creation_time\",\"args\":\"{\\\"start_year\\\":\\\"2023\\\",\\\"start_month\\\":\\\"2023-08\\\",\\\"end_year\\\":\\\"2023\\\",\\\"end_month\\\":\\\"2023-08\\\",\\\"start_day\\\":\\\"2023-08-10\\\",\\\"end_day\\\":\\\"2023-08-10\\\"}\"}", | ||||
| 		//    "videos_sort_by:0":"{\"name\":\"videos_sort_by\",\"args\":\"Most Recent\"}", | ||||
| 		//    "videos_live:0":"{\"name\":\"videos_live\",\"args\":\"\"}" | ||||
| 		// } | ||||
| 		$filter = []; | ||||
| 		$sort = $get["sort"]; | ||||
| 		$live = $get["live"]; | ||||
| 		$older = $get["older"]; | ||||
| 		$newer = $get["newer"]; | ||||
| 		 | ||||
| 		if( | ||||
| 			$older !== false || | ||||
| 			$newer !== false | ||||
| 		){ | ||||
| 			 | ||||
| 			if($older === false){ | ||||
| 				 | ||||
| 				$older = time(); | ||||
| 			} | ||||
| 			 | ||||
| 			if($newer === false){ | ||||
| 				 | ||||
| 				$newer = 0; | ||||
| 			} | ||||
| 			 | ||||
| 			$filter["rp_creation_time:0"] = | ||||
| 				json_encode( | ||||
| 					[ | ||||
| 						"name" => "creation_time", | ||||
| 						"args" => | ||||
| 							json_encode( | ||||
| 								[ | ||||
| 									"start_year" => date("Y", $newer), | ||||
| 									"start_month" => date("Y-m", $newer), | ||||
| 									"end_year" => date("Y", $older), | ||||
| 									"end_month" => date("Y-m", $older), | ||||
| 									"start_day" => date("Y-m-d", $newer), | ||||
| 									"end_day" => date("Y-m-d", $older) | ||||
| 								] | ||||
| 							) | ||||
| 					] | ||||
| 				); | ||||
| 		} | ||||
| 		 | ||||
| 		if($sort != "relevance"){ | ||||
| 			 | ||||
| 			$filter["videos_sort_by:0"] = | ||||
| 				json_encode( | ||||
| 					[ | ||||
| 						"name" => "videos_sort_by", | ||||
| 						"args" => "Most Recent" | ||||
| 					] | ||||
| 				); | ||||
| 		} | ||||
| 		 | ||||
| 		if($live != "no"){ | ||||
| 			 | ||||
| 			$filter["videos_live:0"] = json_encode( | ||||
| 				[ | ||||
| 					"name" => "videos_live", | ||||
| 					"args" => "" | ||||
| 				] | ||||
| 			); | ||||
| 		} | ||||
| 		 | ||||
| 		$req = [ | ||||
| 			"q" => $search | ||||
| 		]; | ||||
| 		 | ||||
| 		if(count($filter) !== 0){ | ||||
| 			 | ||||
| 			$req["filters"] = | ||||
| 				base64_encode( | ||||
| 					json_encode( | ||||
| 						$filter | ||||
| 					) | ||||
| 				); | ||||
| 		} | ||||
| 		 | ||||
| 		$html = | ||||
| 			$this->get( | ||||
| 				"https://www.facebook.com/watch/search/", | ||||
| 				$req | ||||
| 			); | ||||
| 		/* | ||||
| 		$handle = fopen("scraper/facebook.html", "r"); | ||||
| 		$html = fread($handle, filesize("scraper/facebook.html")); | ||||
| 		fclose($handle);*/ | ||||
| 		 | ||||
| 		preg_match_all( | ||||
| 			'/({"__bbox":.*,"sequence_number":0}})\]\]/', | ||||
| 			$html, | ||||
| 			$json | ||||
| 		); | ||||
| 		 | ||||
| 		if(!isset($json[1][1])){ | ||||
| 			 | ||||
| 			throw new Exception("Could not grep JSON body"); | ||||
| 		} | ||||
| 		 | ||||
| 		$json = json_decode($json[1][1], true); | ||||
| 		 | ||||
| 		foreach( | ||||
| 			$json | ||||
| 			["__bbox"] | ||||
| 			["result"] | ||||
| 			["data"] | ||||
| 			["serpResponse"] | ||||
| 			["results"] | ||||
| 			["edges"] | ||||
| 			as $result | ||||
| 		){ | ||||
| 			 | ||||
| 			$this->parse_edge($result); | ||||
| 		} | ||||
| 		 | ||||
| 		// get nextpage data | ||||
| 		if( | ||||
| 			$json | ||||
| 			["__bbox"] | ||||
| 			["result"] | ||||
| 			["data"] | ||||
| 			["serpResponse"] | ||||
| 			["results"] | ||||
| 			["page_info"] | ||||
| 			["has_next_page"] | ||||
| 			== 1 | ||||
| 		){ | ||||
| 			 | ||||
| 			preg_match( | ||||
| 				'/handleWithCustomApplyEach\(ScheduledApplyEach,({.*})\);}\);}\);<\/script>/', | ||||
| 				$html, | ||||
| 				$nextpagedata | ||||
| 			); | ||||
| 			 | ||||
| 			// [POST] https://www.facebook.com/api/graphql/ | ||||
| 			// FORM data, not JSON! | ||||
| 			 | ||||
| 			$nextpage = [ | ||||
| 				"av" => "0", | ||||
| 				"__user" => null, | ||||
| 				"__a" => null, | ||||
| 				"__req" => "2", | ||||
| 				"__hs" => null, | ||||
| 				"dpr" => "1", | ||||
| 				"__ccg" => null, | ||||
| 				"__rev" => null, | ||||
| 				// another client side token | ||||
| 				"__s" => $this->randomstring(6) . ":" . $this->randomstring(6) . ":" . $this->randomstring(6), | ||||
| 				"__hsi" => null, | ||||
| 				// tracking fingerprint (probably generated using webgl) | ||||
| 				"__dyn" => "7xeUmwlE7ibwKBWo2vwAxu13w8CewSwMwNw9G2S0im3y4o0B-q1ew65xO2O1Vw8G1Qw5Mx61vw9m1YwBgao6C0Mo5W3S7Udo5q4U2zxe2Gew9O222SUbEaU2eU5O0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w", | ||||
| 				"__csr" => $this->randomstring(null), | ||||
| 				"__comet_req" => null, | ||||
| 				"lsd" => null, | ||||
| 				"jazoest" => null, | ||||
| 				"__spin_r" => null, | ||||
| 				"__spin_b" => null, | ||||
| 				"__spin_t" => null, | ||||
| 				"fb_api_caller_class" => "RelayModern", | ||||
| 				"fb_api_req_friendly_name" => "SearchCometResultsPaginatedResultsQuery", | ||||
| 				"variables" => [ // this is json | ||||
| 					"UFI2CommentsProvider_commentsKey" => "SearchCometResultsInitialResultsQuery", | ||||
| 					"allow_streaming" => false, | ||||
| 					"args" => [ | ||||
| 						"callsite" => "comet:watch_search", | ||||
| 						"config" => [ | ||||
| 							"exact_match" => false, | ||||
| 							"high_confidence_config" => null, | ||||
| 							"intercept_config" => null, | ||||
| 							"sts_disambiguation" => null, | ||||
| 							"watch_config" => null | ||||
| 						], | ||||
| 						"context" => [ | ||||
| 							"bsid" => null, | ||||
| 							"tsid" => null | ||||
| 						], | ||||
| 						"experience" => [ | ||||
| 							"encoded_server_defined_params" => null, | ||||
| 							"fbid" => null, | ||||
| 							"type" => "WATCH_TAB_GLOBAL" | ||||
| 						], | ||||
| 						"filters" => [], | ||||
| 						"text" => $search | ||||
| 					], | ||||
| 					"count" => 5, | ||||
| 					"cursor" => | ||||
| 						$json | ||||
| 						["__bbox"] | ||||
| 						["result"] | ||||
| 						["data"] | ||||
| 						["serpResponse"] | ||||
| 						["results"] | ||||
| 						["page_info"] | ||||
| 						["end_cursor"], | ||||
| 					"displayCommentsContextEnableComment" => false, | ||||
| 					"displayCommentsContextIsAdPreview" => false, | ||||
| 					"displayCommentsContextIsAggregatedShare" => false, | ||||
| 					"displayCommentsContextIsStorySet" => false, | ||||
| 					"displayCommentsFeedbackContext" => null, | ||||
| 					"feedLocation" => "SEARCH", | ||||
| 					"feedbackSource" => 23, | ||||
| 					"fetch_filters" => true, | ||||
| 					"focusCommentID" => null, | ||||
| 					"locale" => null, | ||||
| 					"privacySelectorRenderLocation" => "COMET_STREAM", | ||||
| 					"renderLocation" => "search_results_page", | ||||
| 					"scale" => 1, | ||||
| 					"stream_initial_count" => 0, | ||||
| 					"useDefaultActor" => false, | ||||
| 					"__relay_internal__pv__IsWorkUserrelayprovider" => false, | ||||
| 					"__relay_internal__pv__IsMergQAPollsrelayprovider" => false, | ||||
| 					"__relay_internal__pv__StoriesArmadilloReplyEnabledrelayprovider" => false, | ||||
| 					"__relay_internal__pv__StoriesRingrelayprovider" => false | ||||
| 				], | ||||
| 				"server_timestamps" => "true", | ||||
| 				"doc_id" => "6761275837251607" // is actually dynamic | ||||
| 			]; | ||||
| 			 | ||||
| 			// append filters to nextpage | ||||
| 			foreach($filter as $key => $value){ | ||||
| 				 | ||||
| 				$nextpage["variables"]["args"]["filters"][] = | ||||
| 					$value; | ||||
| 			} | ||||
| 			 | ||||
| 			$nextpagedata = json_decode($nextpagedata[1], true); | ||||
| 			 | ||||
| 			// get bsid | ||||
| 			foreach($nextpagedata["require"] as $key){ | ||||
| 				 | ||||
| 				foreach($key as $innerkey){ | ||||
| 					 | ||||
| 					if(is_array($innerkey)){ | ||||
| 						foreach($innerkey as $inner_innerkey){ | ||||
| 							 | ||||
| 							if(is_array($inner_innerkey)){ | ||||
| 								foreach($inner_innerkey as $inner_inner_innerkey){ | ||||
| 										 | ||||
| 									if( | ||||
| 										isset( | ||||
| 											$inner_inner_innerkey | ||||
| 											["variables"] | ||||
| 											["args"] | ||||
| 											["context"] | ||||
| 											["bsid"] | ||||
| 										) | ||||
| 									){ | ||||
| 										 | ||||
| 										$nextpage | ||||
| 										["variables"] | ||||
| 										["args"] | ||||
| 										["context"] | ||||
| 										["bsid"] = | ||||
| 											$inner_inner_innerkey | ||||
| 											["variables"] | ||||
| 											["args"] | ||||
| 											["context"] | ||||
| 											["bsid"]; | ||||
| 									} | ||||
| 								} | ||||
| 							} | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 			 | ||||
| 			foreach($nextpagedata["define"] as $key){ | ||||
| 				 | ||||
| 				if(isset($key[2]["haste_session"])){ | ||||
| 					 | ||||
| 					$nextpage["__hs"] = $key[2]["haste_session"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["connectionClass"])){ | ||||
| 					 | ||||
| 					$nextpage["__ccg"] = $key[2]["connectionClass"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["__spin_r"])){ | ||||
| 					 | ||||
| 					$nextpage["__spin_r"] = (string)$key[2]["__spin_r"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["hsi"])){ | ||||
| 					 | ||||
| 					$nextpage["__hsi"] = (string)$key[2]["hsi"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if( | ||||
| 					isset($key[2]["token"]) && | ||||
| 					!empty($key[2]["token"]) | ||||
| 				){ | ||||
| 					 | ||||
| 					$nextpage["lsd"] = $key[2]["token"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["__spin_r"])){ | ||||
| 					 | ||||
| 					$nextpage["__spin_r"] = (string)$key[2]["__spin_r"]; | ||||
| 					$nextpage["__rev"] = $nextpage["__spin_r"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["__spin_b"])){ | ||||
| 					 | ||||
| 					$nextpage["__spin_b"] = $key[2]["__spin_b"]; | ||||
| 				} | ||||
| 				 | ||||
| 				if(isset($key[2]["__spin_t"])){ | ||||
| 					 | ||||
| 					$nextpage["__spin_t"] = (string)$key[2]["__spin_t"]; | ||||
| 				} | ||||
| 			} | ||||
| 			 | ||||
| 			preg_match( | ||||
| 				'/{"u":"\\\\\/ajax\\\\\/qm\\\\\/\?__a=([0-9]+)&__user=([0-9]+)&__comet_req=([0-9]+)&jazoest=([0-9]+)"/', | ||||
| 				$html, | ||||
| 				$ajaxparams | ||||
| 			); | ||||
| 			 | ||||
| 			if(count($ajaxparams) !== 5){ | ||||
| 				 | ||||
| 				throw new Exception("Could not grep the AJAX parameters"); | ||||
| 			} | ||||
| 			 | ||||
| 			$nextpage["__a"] = $ajaxparams[1]; | ||||
| 			$nextpage["__user"] = $ajaxparams[2]; | ||||
| 			$nextpage["__comet_req"] = $ajaxparams[3]; | ||||
| 			$nextpage["jazoest"] = $ajaxparams[4]; | ||||
| 			 | ||||
| 			/* | ||||
| 			$handle = fopen("scraper/facebook-nextpage.json", "r"); | ||||
| 			$json = fread($handle, filesize("scraper/facebook-nextpage.json")); | ||||
| 			fclose($handle);*/ | ||||
| 			 | ||||
| 			$nextpage["variables"] = json_encode($nextpage["variables"]); | ||||
| 			 | ||||
| 			$this->video_nextpage($nextpage); | ||||
| 		} | ||||
| 		 | ||||
| 		return $this->out; | ||||
| 	} | ||||
| 	 | ||||
| 	private function video_nextpage($nextpage, $getcursor = false){ | ||||
| 		 | ||||
| 		$json = | ||||
| 			$this->get( | ||||
| 				"https://www.facebook.com/api/graphql/", | ||||
| 				$nextpage, | ||||
| 				self::post | ||||
| 			); | ||||
| 		 | ||||
| 		$json = json_decode($json, true); | ||||
| 		 | ||||
| 		if($json === null){ | ||||
| 			 | ||||
| 			throw new Exception("Failed to decode next page JSON"); | ||||
| 		} | ||||
| 		 | ||||
| 		foreach( | ||||
| 			$json | ||||
| 			["data"] | ||||
| 			["serpResponse"] | ||||
| 			["results"] | ||||
| 			["edges"] | ||||
| 			as $result | ||||
| 		){ | ||||
| 			 | ||||
| 			$this->parse_edge($result); | ||||
| 		} | ||||
| 		 | ||||
| 		if( | ||||
| 			$json | ||||
| 			["data"] | ||||
| 			["serpResponse"] | ||||
| 			["results"] | ||||
| 			["page_info"] | ||||
| 			["has_next_page"] == 1 | ||||
| 		){ | ||||
| 			 | ||||
| 			$nextpage["variables"] = json_decode($nextpage["variables"], true); | ||||
| 			 | ||||
| 			$nextpage["variables"]["cursor"] = | ||||
| 				$json | ||||
| 				["data"] | ||||
| 				["serpResponse"] | ||||
| 				["results"] | ||||
| 				["page_info"] | ||||
| 				["end_cursor"]; | ||||
| 			 | ||||
| 			$nextpage["variables"] = json_encode($nextpage["variables"]); | ||||
| 			 | ||||
| 			//change this for second call. after, it's static. | ||||
| 			// TODO: csr also updates to longer string | ||||
| 			$nextpage["__dyn"] = "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q322aew9G2S0zU20xi3y4o0B-q1ew65xOfxO1Vw8G11xmfz81s8hwGwQw9m1YwBgao6C2O0B85W3S7Udo5qfK0EUjwGzE2swwwJK2W2K0zK5o4q0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w"; | ||||
| 			 | ||||
| 			// TODO: change this on third and 6th call | ||||
| 			//$nextpage["__s"] = $this->randomstring(6) . ":" . explode(":", $nextpage["__s"], 2)[1]; | ||||
| 			 | ||||
| 			$this->out["npt"] = $this->nextpage->store(json_encode($nextpage), "videos"); | ||||
| 		} | ||||
| 	} | ||||
| 	 | ||||
| 	private function parse_edge($edge){ | ||||
|  | ||||
| 		$append = "video";		 | ||||
| 		$edge = | ||||
| 			$edge | ||||
| 			["relay_rendering_strategy"] | ||||
| 			["view_model"]; | ||||
| 		 | ||||
| 		if( | ||||
| 			strtolower( | ||||
| 				$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["video_broadcast_status"] | ||||
| 			) | ||||
| 			== "live" | ||||
| 		){ | ||||
| 			 | ||||
| 			// handle livestream | ||||
| 			$duration = "_LIVE"; | ||||
| 			$append = "livestream"; | ||||
| 			$timetext = null; | ||||
| 			$views = | ||||
| 				(int)$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["relative_time_string"]; | ||||
| 			 | ||||
| 		}elseif( | ||||
| 			stripos( | ||||
| 				$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["video_broadcast_status"], | ||||
| 				"vod" | ||||
| 			) !== false | ||||
| 		){ | ||||
| 			 | ||||
| 			// handle VOD format | ||||
| 			$timetext = null; | ||||
| 			$views = | ||||
| 				(int)$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["relative_time_string"]; | ||||
| 			 | ||||
| 			$duration = | ||||
| 				$this->hms2int( | ||||
| 					$edge | ||||
| 					["video_thumbnail_model"] | ||||
| 					["video_duration_text"] | ||||
| 				); | ||||
| 			 | ||||
| 		}else{ | ||||
| 			 | ||||
| 			// handle normal format | ||||
| 			$timetext = | ||||
| 				explode( | ||||
| 					" · ", | ||||
| 					$edge | ||||
| 					["video_metadata_model"] | ||||
| 					["relative_time_string"], | ||||
| 					2 | ||||
| 				);  | ||||
| 			 | ||||
| 			if(count($timetext) === 2){ | ||||
| 				 | ||||
| 				$views = $this->truncatedcount2int($timetext[1]); | ||||
| 			}else{ | ||||
| 				 | ||||
| 				$views = null; | ||||
| 			} | ||||
| 			 | ||||
| 			$timetext = strtotime($timetext[0]); | ||||
| 			 | ||||
| 			$duration = | ||||
| 				$this->hms2int( | ||||
| 					$edge | ||||
| 					["video_thumbnail_model"] | ||||
| 					["video_duration_text"] | ||||
| 				); | ||||
| 		} | ||||
| 		 | ||||
| 		if( | ||||
| 			isset( | ||||
| 				$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["video_owner_profile"] | ||||
| 				["uri_token"] | ||||
| 			) | ||||
| 		){ | ||||
| 			 | ||||
| 			$profileurl = | ||||
| 				"https://www.facebook.com/watch/" . | ||||
| 				$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["video_owner_profile"] | ||||
| 				["uri_token"]; | ||||
| 		}else{ | ||||
| 			 | ||||
| 			$profileurl = | ||||
| 				$edge | ||||
| 				["video_metadata_model"] | ||||
| 				["video_owner_profile"] | ||||
| 				["url"]; | ||||
| 		} | ||||
| 		 | ||||
| 		$this->out[$append][] = [ | ||||
| 			"title" => | ||||
| 				$this->limitstrlen( | ||||
| 					str_replace( | ||||
| 						"\n", | ||||
| 						" ", | ||||
| 						$edge | ||||
| 						["video_metadata_model"] | ||||
| 						["title"] | ||||
| 					), | ||||
| 					100 | ||||
| 				), | ||||
| 			"description" => | ||||
| 				empty( | ||||
| 					$edge | ||||
| 					["video_metadata_model"] | ||||
| 					["save_description"] | ||||
| 				) ? | ||||
| 				null : | ||||
| 				str_replace( | ||||
| 					"\n", | ||||
| 					" ", | ||||
| 					$this->limitstrlen( | ||||
| 						$edge | ||||
| 						["video_metadata_model"] | ||||
| 						["save_description"] | ||||
| 					) | ||||
| 				), | ||||
| 			"author" => [ | ||||
| 				"name" => | ||||
| 					$edge | ||||
| 					["video_metadata_model"] | ||||
| 					["video_owner_profile"] | ||||
| 					["name"], | ||||
| 				"url" => $profileurl, | ||||
| 				"avatar" => null | ||||
| 			], | ||||
| 			"date" => $timetext, | ||||
| 			"duration" => $duration, | ||||
| 			"views" => $views, | ||||
| 			"thumb" => | ||||
| 				[ | ||||
| 					"url" => | ||||
| 						$edge | ||||
| 						["video_thumbnail_model"] | ||||
| 						["thumbnail_image"] | ||||
| 						["uri"], | ||||
| 					"ratio" => "16:9" | ||||
| 				], | ||||
| 			"url" => | ||||
| 				"https://www.facebook.com/watch/?v=" . | ||||
| 				$edge | ||||
| 				["video_click_model"] | ||||
| 				["click_metadata_model"] | ||||
| 				["video_id"] | ||||
| 		]; | ||||
| 	} | ||||
| 	 | ||||
| 	private function randomstring($len){ | ||||
| 		 | ||||
| 		if($len === null){ | ||||
| 			 | ||||
| 			$str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-"; | ||||
| 			$len = rand(141, 145); | ||||
| 			$c = 61; | ||||
| 		}else{ | ||||
| 						 | ||||
| 			$str = "abcdefghijklmnopqrstuvwxyz123456789"; | ||||
| 			$c = 34; | ||||
| 		} | ||||
| 		 | ||||
| 		$out = null; | ||||
| 		for($i=0; $i<$len; $i++){ | ||||
| 			 | ||||
| 			$out .= $str[rand(0, $c)]; | ||||
| 		} | ||||
| 		 | ||||
| 		return $out; | ||||
| 	} | ||||
| 	 | ||||
| 	private function limitstrlen($text, $len = 300){ | ||||
| 		 | ||||
| 		return explode("\n", wordwrap($text, $len, "\n"))[0]; | ||||
| 	} | ||||
| 	 | ||||
| 	private function hms2int($time){ | ||||
| 		 | ||||
| 		$parts = explode(":", $time, 3); | ||||
| 		$time = 0; | ||||
| 		 | ||||
| 		if(count($parts) === 3){ | ||||
| 			 | ||||
| 			// hours | ||||
| 			$time = $time + ((int)$parts[0] * 3600); | ||||
| 			array_shift($parts); | ||||
| 		} | ||||
| 		 | ||||
| 		if(count($parts) === 2){ | ||||
| 			 | ||||
| 			// minutes | ||||
| 			$time = $time + ((int)$parts[0] * 60); | ||||
| 			array_shift($parts); | ||||
| 		} | ||||
| 		 | ||||
| 		// seconds | ||||
| 		$time = $time + (int)$parts[0]; | ||||
| 		 | ||||
| 		return $time; | ||||
| 	} | ||||
| 	 | ||||
| 	private function truncatedcount2int($number){ | ||||
| 		 | ||||
| 		// decimal should always be 1 number long | ||||
| 		$number = explode(" ", $number, 2); | ||||
| 		$number = $number[0]; | ||||
| 		 | ||||
| 		$unit = strtolower($number[strlen($number) - 1]); | ||||
| 		 | ||||
| 		$tmp = explode(".", $number, 2); | ||||
| 		$number = (int)$number; | ||||
| 		 | ||||
| 		if(count($tmp) === 2){ | ||||
| 			 | ||||
| 			$decimal = (int)$tmp[1]; | ||||
| 		}else{ | ||||
| 			 | ||||
| 			$decimal = 0; | ||||
| 		} | ||||
| 		 | ||||
| 		switch($unit){ | ||||
| 			 | ||||
| 			case "k": | ||||
| 				$exponant = 1000; | ||||
| 				break; | ||||
| 			 | ||||
| 			case "m": | ||||
| 				$exponant = 1000000; | ||||
| 				break; | ||||
| 			 | ||||
| 			case "b"; | ||||
| 				$exponant = 1000000000; | ||||
| 				break; | ||||
| 			 | ||||
| 			default: | ||||
| 				$exponant = 1; | ||||
| 				break; | ||||
| 		} | ||||
| 		 | ||||
| 		return ($number * $exponant) + ($decimal * ($exponant / 10)); | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user