diff --git a/src/entity/CrawledResult.php b/src/entity/CrawledResult.php index 1780c75..4cc1ac9 100644 --- a/src/entity/CrawledResult.php +++ b/src/entity/CrawledResult.php @@ -11,9 +11,9 @@ final class CrawledResult * @param string[] $allUrls * @param string[] $followedUrls * @param string[] $openedUrls - * @param array> $urlReferences + * @param array> $urlReferences * @param Url[] $urls - * @param array}> $errors + * @param array}> $errors */ public function __construct( private array $allUrls, @@ -55,7 +55,7 @@ public function getOpenedUrls(): array /** - * @return array> + * @return array> */ public function getUrlReferences(): array { @@ -85,4 +85,23 @@ public function getRobots(): ?string { return $this->robots; } + + /** + * get all data as array + * + * @return array + */ + public function toArray(): array { + return [ + 'allUrls' => $this->allUrls, + 'followedUrls' => $this->followedUrls, + 'openedUrls' => $this->openedUrls, + 'urlReferences' => $this->urlReferences, + 'urls' => array_map(function(Url $url) { + return $url->toArray(); + }, $this->urls), + 'errors' => $this->errors, + 'robots' => $this->robots, + ]; + } } diff --git a/src/entity/Url.php b/src/entity/Url.php index e51172f..99fd6bf 100644 --- a/src/entity/Url.php +++ b/src/entity/Url.php @@ -102,4 +102,24 @@ public function getHttpCode(): int { return $this->httpCode; } + + + public function getDomain(){ + return $this->url->getHost(); + } + + public function toArray(): array{ + return [ + 'url' => $this->url->getAbsoluteUrl(), + 'html' => $this->html, + 'size' => $this->size, + 'title' => $this->title, + 'texts' => $this->texts, + 'uniqueTexts' => $this->uniqueTexts, + 'headers' => $this->headers, + 'links' => $this->links, + 'loadingTime' => $this->loadingTime, + 'httpCode' => $this->httpCode, + ]; + } }