i overengineered the fuck out of that baidu scraper jesus

This commit is contained in:
2026-07-05 01:57:56 -04:00
parent 8328d93b17
commit 46411a6c83

View File

@@ -666,14 +666,24 @@ class baidu{
//
// Don't parse as a search result if it's a card
//
$card =
$tpl_probe =
$this->fuckhtml
->getElementsByClassName(
"cosc-card",
$div
->getElementsByAttributeValue(
"tpl",
"www_index",
[$datafield]
);
if(count($card) !== 0){
if(
count($tpl_probe) === 0 &&
count(
$this->fuckhtml
->getElementsByClassName(
"cosc-card",
$div
)
) !== 0
){
//
// Parse chinese youtube shorts
@@ -870,10 +880,13 @@ class baidu{
// class:FYB_RD -> News garbage, IGNORE
$result =
$this->fuckhtml
->getElementsByClassName(
"result",
[$datafield]
array_merge(
$tpl_probe,
$this->fuckhtml
->getElementsByClassName(
"result",
[$datafield]
)
);
if(count($result) !== 0){
@@ -885,7 +898,7 @@ class baidu{
$title =
$this->fuckhtml
->getElementsByClassName(
"sc-link",
"cosc-title-a",
"a"
);
@@ -905,8 +918,9 @@ class baidu{
$description =
$this->fuckhtml
->getElementsByClassName(
"c-color",
->getElementsByAttributeValue(
"data-sanssr-cmpt",
"card/www-summary-1",
$div
);