From ac04f0c8684ee462561e2fc57d21bf7dc0c22691 Mon Sep 17 00:00:00 2001 From: Ne-Lexa Date: Mon, 3 Oct 2022 17:39:21 +0300 Subject: [PATCH] fixed extract apps from cluster pages --- src/HttpClient/HttpClient.php | 2 +- src/Scraper/ClusterAppsScraper.php | 6 +--- src/Scraper/Extractor/AppsExtractor.php | 43 +++++++++++++++++++++++++ src/Scraper/PlayStoreUiAppsScraper.php | 6 +--- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/HttpClient/HttpClient.php b/src/HttpClient/HttpClient.php index 710c927..a23923b 100644 --- a/src/HttpClient/HttpClient.php +++ b/src/HttpClient/HttpClient.php @@ -57,7 +57,7 @@ public function __construct(?GuzzleClient $client = null, ?CacheInterface $cache } $stack = HandlerStack::create(); - if (PHP_SAPI === 'cli') { + if (\PHP_SAPI === 'cli') { $logTemplate = $config['logTemplate'] ?? '🌎 [{ts}] "{method} {url} HTTP/{version}" {code} "{phrase}" - {res_header_Content-Length}'; $stack->push(Middleware::log(new ConsoleLog(), new MessageFormatter($logTemplate)), 'logger'); diff --git a/src/Scraper/ClusterAppsScraper.php b/src/Scraper/ClusterAppsScraper.php index 0e13008..2e7b0df 100644 --- a/src/Scraper/ClusterAppsScraper.php +++ b/src/Scraper/ClusterAppsScraper.php @@ -57,11 +57,7 @@ public function __invoke(RequestInterface $request, ResponseInterface $response, $locale = $query[GPlayApps::REQ_PARAM_LOCALE] ?? GPlayApps::DEFAULT_LOCALE; $country = $query[GPlayApps::REQ_PARAM_COUNTRY] ?? GPlayApps::DEFAULT_COUNTRY; - $apps = []; - - foreach ($scriptDataInfo[0] as $data) { - $apps[] = AppsExtractor::extractApp(isset($data[1]) ? $data : $data[0], $locale, $country); - } + $apps = AppsExtractor::extractApps($scriptDataInfo[0], $locale, $country); $nextToken = $scriptDataInfo[1][3][1] ?? null; diff --git a/src/Scraper/Extractor/AppsExtractor.php b/src/Scraper/Extractor/AppsExtractor.php index ddbcb5e..c4a54d9 100644 --- a/src/Scraper/Extractor/AppsExtractor.php +++ b/src/Scraper/Extractor/AppsExtractor.php @@ -71,4 +71,47 @@ public static function extractApp(array $data, string $locale, string $country): ->build() ; } + + public static function isAppStructure(array $data): bool + { + return + isset( + $data[0][0], // package id + $data[1][3][2], // icon + $data[3], // app name + $data[14] // developer name + ) && \is_string($data[0][0]) + && \is_string($data[1][3][2]) + && \is_string($data[3]) + && \is_string($data[14]) + ; + } + + /** + * @param mixed $items + * @param string $locale + * @param string $country + * + * @return App[] + */ + public static function extractApps($items, string $locale, string $country): array + { + $apps = []; + + if (\is_array($items)) { + foreach ($items as $item) { + if (self::isAppStructure($item)) { + $apps[] = self::extractApp($item, $locale, $country); + } elseif (isset($item[0]) && self::isAppStructure($item[0])) { + $apps[] = self::extractApp($item[0], $locale, $country); + } elseif (isset($item[0][0]) && self::isAppStructure($item[0][0])) { + $apps[] = self::extractApp($item[0][0], $locale, $country); + } elseif (isset($item[0][0][0]) && self::isAppStructure($item[0][0][0])) { + $apps[] = self::extractApp($item[0][0][0], $locale, $country); + } + } + } + + return $apps; + } } diff --git a/src/Scraper/PlayStoreUiAppsScraper.php b/src/Scraper/PlayStoreUiAppsScraper.php index 9d6a12a..d4d958f 100644 --- a/src/Scraper/PlayStoreUiAppsScraper.php +++ b/src/Scraper/PlayStoreUiAppsScraper.php @@ -51,11 +51,7 @@ public function __invoke(RequestInterface $request, ResponseInterface $response, $locale = $query[GPlayApps::REQ_PARAM_LOCALE] ?? GPlayApps::DEFAULT_LOCALE; $country = $query[GPlayApps::REQ_PARAM_COUNTRY] ?? GPlayApps::DEFAULT_COUNTRY; - $apps = []; - - foreach ($json[0] as $data) { - $apps[] = AppsExtractor::extractApp(isset($data[1]) ? $data : $data[0], $locale, $country); - } + $apps = AppsExtractor::extractApps($json[0], $locale, $country); $nextToken = $json[1][3][1] ?? null;