diff --git a/.github/workflows/phpunit.yml b/.github/workflows/phpunit.yml new file mode 100644 index 0000000..ab4528d --- /dev/null +++ b/.github/workflows/phpunit.yml @@ -0,0 +1,30 @@ +name: PHPUnit + +on: + pull_request: + push: + branches: + - main + +jobs: + phpunit: + strategy: + fail-fast: true + matrix: + os: [ ubuntu-latest ] + php: [ 8.3, 8.2 ] + stability: [ prefer-stable ] + name: PHPUnit - PHP ${{ matrix.php }} + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + coverage: none + - name: Composer install + run: composer install --no-interaction --no-ansi --no-progress + - name: Run PHPUnit + run: php ./vendor/bin/phpunit diff --git a/.gitignore b/.gitignore index ca25eed..e1d17f4 100755 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ vendor .idea .phpunit.result.cache +.phpunit.cache composer.lock diff --git a/composer.json b/composer.json index 9efe953..57e0d62 100755 --- a/composer.json +++ b/composer.json @@ -4,17 +4,21 @@ "license": "MIT", "type": "library", "scripts": { - "phpstan": "php ./vendor/bin/phpstan analyse --memory-limit=4G" + "phpstan": "php ./vendor/bin/phpstan analyse --memory-limit=4G", + "test": "php ./vendor/bin/phpunit" }, "require": { "php": ">=8.2", "guzzlehttp/guzzle": "~7.4", "ext-json": "*", - "ext-mbstring": "*" + "ext-mbstring": "*", + "psr/log": "^3.0" }, "require-dev": { + "phpunit/phpunit": "^11.5", "phpstan/phpstan": "^2.1.36", - "phpstan/extension-installer": "^1.3" + "phpstan/extension-installer": "^1.3", + "phpstan/phpstan-phpunit": "^2.0.12" }, "autoload": { "psr-4": { diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..d780bdc --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,20 @@ + + + + + tests + + + + + src + + + diff --git a/src/ConfluencePageContentDownloader.php b/src/ConfluencePageContentDownloader.php index 7c6fe63..6abe720 100755 --- a/src/ConfluencePageContentDownloader.php +++ b/src/ConfluencePageContentDownloader.php @@ -10,6 +10,8 @@ use Artemeon\Confluence\MacroReplacer\MacroReplacerInterface; use DOMDocument; use Exception; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; class ConfluencePageContentDownloader { @@ -19,15 +21,17 @@ class ConfluencePageContentDownloader private array $macroReplacers; private Content $contentEndpoint; private Download $downloadEndpoint; + private LoggerInterface $logger; /** * @param MacroReplacerInterface[] $macroReplacers */ - public function __construct(Content $contentEndpoint, Download $downloadEndpoint, array $macroReplacers = []) + public function __construct(Content $contentEndpoint, Download $downloadEndpoint, array $macroReplacers = [], ?LoggerInterface $logger = null) { $this->macroReplacers = $macroReplacers; $this->contentEndpoint = $contentEndpoint; $this->downloadEndpoint = $downloadEndpoint; + $this->logger = $logger ?? new NullLogger(); } public function downloadPageContent(ConfluencePage $page, bool $withAttachments = true): void @@ -54,10 +58,13 @@ public function downloadPageContent(ConfluencePage $page, bool $withAttachments $attachments = $this->contentEndpoint->findChildAttachments($pageId); foreach ($attachments as $attachment) { - $this->downloadEndpoint->downloadAttachment($attachment); + $this->downloadEndpoint->downloadAttachment($attachment, $pageId); } } catch (Exception $e) { - echo 'An error has occurred: ' . $e->getMessage(); + $this->logger->error( + sprintf('Failed to download Confluence page content for page "%s": %s', $page->getId() ?? 'unknown', $e->getMessage()), + ['exception' => $e], + ); } } diff --git a/src/Endpoint/Content.php b/src/Endpoint/Content.php index a9b4bd0..4306e7d 100755 --- a/src/Endpoint/Content.php +++ b/src/Endpoint/Content.php @@ -28,6 +28,7 @@ public function __construct(Client $client, Auth $auth) * @param int|null $offset * @return ConfluencePage[] * @throws GuzzleException + * @throws Exception */ public function findPagesInSpace(string $spaceKey, int $limit = 2000, ?int $offset = null): array { @@ -65,7 +66,13 @@ public function findPagesInSpace(string $spaceKey, int $limit = 2000, ?int $offs } /** - * Use the Confluence Content API to retrieve page content + * Fetches a single page by its ID from the Confluence Content API, expanding its + * stored body, version, space and labels into a {@see ConfluencePage}. + * + * @param string $pageId the Confluence content ID of the page to load + * + * @throws GuzzleException if the HTTP request fails (network error, timeout, …) + * @throws Exception if Confluence responds with a non-200 status code */ public function findPageContent(string $pageId): ConfluencePage { @@ -88,9 +95,15 @@ public function findPageContent(string $pageId): ConfluencePage } /** - * Use descendants.attachment in the Content API to get attachments + * Lists the attachments of a page via the Content API's child/attachment endpoint, + * expanding each attachment's history so its last-updated timestamp is available. + * + * @param string $pageId the Confluence content ID of the parent page + * + * @return list the page's attachments, empty if it has none * - * @return list + * @throws GuzzleException if the HTTP request fails (network error, timeout, …) + * @throws Exception if Confluence responds with a non-200 status code */ public function findChildAttachments(string $pageId): array { @@ -104,7 +117,7 @@ public function findChildAttachments(string $pageId): array ); if ($response->getStatusCode() !== 200) { - throw new Exception('Fehler beim Abrufen der Attachments. HTTP-Statuscode: ' . $response->getStatusCode()); + throw new Exception('Error retrieving attachments. HTTP status code: ' . $response->getStatusCode()); } $attachmentsData = json_decode($response->getBody()->getContents(), true); diff --git a/src/Endpoint/Download.php b/src/Endpoint/Download.php index cb7183d..260fed7 100755 --- a/src/Endpoint/Download.php +++ b/src/Endpoint/Download.php @@ -8,6 +8,8 @@ use Artemeon\Confluence\Endpoint\Dto\ConfluencePage; use DateTime; use GuzzleHttp\Client; +use GuzzleHttp\Exception\GuzzleException; +use RuntimeException; class Download { @@ -22,39 +24,55 @@ public function __construct(Client $client, Auth $auth, string $downloadFolder) $this->downloadFolder = $downloadFolder; } - private function checkDownloadFolder(): bool + /** + * Makes sure the configured download folder exists, creating it recursively if needed. + * + * @throws RuntimeException if the folder is missing and cannot be created + */ + private function ensureDownloadFolder(): void { - if (!is_dir($this->downloadFolder)) { - return mkdir($this->downloadFolder, 0755, true); + if (!is_dir($this->downloadFolder) && !mkdir($this->downloadFolder, 0755, true) && !is_dir($this->downloadFolder)) { + throw new RuntimeException(sprintf('The download folder "%s" does not exist and could not be created.', $this->downloadFolder)); } - - return true; } + /** + * Writes a page's (already prepared) HTML content to a file in the download folder. + * + * @param ConfluencePage $confluencePage the page whose content is written + * @param string $fileName the target file name, relative to the download folder (e.g. "content.html") + * + * @throws RuntimeException if the download folder cannot be ensured + */ public function downloadPageContent(ConfluencePage $confluencePage, string $fileName): void { - if (!$this->checkDownloadFolder()) { - echo 'Error: The download folder does not exist or could not be created.'; - - return; - } + $this->ensureDownloadFolder(); $htmlFile = $this->downloadFolder . '/' . $fileName; file_put_contents($htmlFile, $confluencePage->getContent()); } - public function downloadAttachment(ConfluenceAttachment $attachment): void + /** + * Downloads a single page attachment into the download folder, but only if it is + * new or has been updated since the locally stored copy (see {@see shouldAttachmentBeUpdated()}). + * + * @param ConfluenceAttachment $attachment the attachment to download; its title is used as the file name + * @param string $pageId the Confluence content ID of the page the attachment belongs to + * + * @throws RuntimeException if the download folder cannot be ensured + * @throws GuzzleException if the HTTP request to the REST endpoint fails + */ + public function downloadAttachment(ConfluenceAttachment $attachment, string $pageId): void { - if (!$this->checkDownloadFolder()) { - echo 'Error: The download folder does not exist or could not be created.'; - - return; - } + $this->ensureDownloadFolder(); if ($this->shouldAttachmentBeUpdated($attachment)) { - // Verwende den relativen Pfad aus der API, um das Attachment herunterzuladen + // Download via the supported REST endpoint. Same Basic-auth credentials + // (email + API token) as every other request; only the endpoint changed: + // the legacy /wiki/download servlet rejects API-token auth (HTTP 401), + // while the REST API accepts it. $attachmentContent = $this->client->get( - '/wiki/' . $attachment->findDownloadPath(), + 'wiki/rest/api/content/' . $pageId . '/child/attachment/' . $attachment->getId() . '/download', array_merge([], $this->auth->getAuthenticationArray()) )->getBody()->getContents(); @@ -67,6 +85,17 @@ private function getAttachmentFilePath(ConfluenceAttachment $attachment): string return $this->downloadFolder . '/' . $attachment->getTitle(); } + /** + * Decides whether an attachment needs to be (re-)downloaded. + * + * Returns true when no local copy exists yet, when the attachment has no known + * last-updated date, or when the local file is older than the attachment's + * last-updated date; otherwise the local copy is considered up to date. + * + * @param ConfluenceAttachment $attachment the attachment to check against its local copy + * + * @return bool true if the attachment should be downloaded, false if the local copy is current + */ private function shouldAttachmentBeUpdated(ConfluenceAttachment $attachment): bool { $filepath = $this->getAttachmentFilePath($attachment); @@ -77,9 +106,9 @@ private function shouldAttachmentBeUpdated(ConfluenceAttachment $attachment): bo } if (file_exists($filepath)) { - $filemtime = filemtime($filepath); - if (is_int($filemtime)) { - return $filemtime < $lastUpdated->getTimestamp(); + $fileModificationTime = filemtime($filepath); + if (is_int($fileModificationTime)) { + return $fileModificationTime < $lastUpdated->getTimestamp(); } } diff --git a/src/Endpoint/Dto/ConfluenceAttachment.php b/src/Endpoint/Dto/ConfluenceAttachment.php index ae5b0f1..ff2408f 100755 --- a/src/Endpoint/Dto/ConfluenceAttachment.php +++ b/src/Endpoint/Dto/ConfluenceAttachment.php @@ -14,6 +14,7 @@ class ConfluenceAttachment /** * @param array{ + * id: string, * title: string, * history?: array{ * lastUpdated?: array{ @@ -28,9 +29,9 @@ public function __construct(private array $rawData) $this->lastUpdated = isset($rawData['history']['lastUpdated']['when']) ? new DateTime($rawData['history']['lastUpdated']['when']) : null; } - public function findDownloadPath(): ?string + public function getId(): string { - return $this->rawData['_links']['download'] ?? null; + return $this->rawData['id']; } public function getTitle(): string diff --git a/tests/.gitkeep b/tests/.gitkeep deleted file mode 100755 index e69de29..0000000 diff --git a/tests/ConfluencePageContentDownloaderTest.php b/tests/ConfluencePageContentDownloaderTest.php new file mode 100644 index 0000000..95f669b --- /dev/null +++ b/tests/ConfluencePageContentDownloaderTest.php @@ -0,0 +1,98 @@ + 'att1', 'title' => 'image.png']); + + $replacer = $this->createMock(MacroReplacerInterface::class); + $replacer->expects($this->once())->method('replace')->willReturn('

replaced

'); + + $content = $this->createMock(Content::class); + $content->expects($this->once())->method('findChildAttachments')->with('123')->willReturn([$attachment]); + + $download = $this->createMock(Download::class); + $download->expects($this->once())->method('downloadPageContent') + ->with($this->isInstanceOf(ConfluencePage::class), 'content.html'); + $download->expects($this->once())->method('downloadAttachment')->with($attachment, '123'); + + $logger = $this->createMock(LoggerInterface::class); + $logger->expects($this->never())->method('error'); + + $downloader = new ConfluencePageContentDownloader($content, $download, [$replacer], $logger); + $downloader->downloadPageContent($this->page('123')); + } + + public function testDoesNotDownloadAttachmentsWhenDisabled(): void + { + $content = $this->createMock(Content::class); + $content->expects($this->never())->method('findChildAttachments'); + + $download = $this->createMock(Download::class); + $download->expects($this->once())->method('downloadPageContent'); + $download->expects($this->never())->method('downloadAttachment'); + + $downloader = new ConfluencePageContentDownloader($content, $download); + $downloader->downloadPageContent($this->page('123'), false); + } + + public function testDoesNotDownloadAttachmentsWhenPageHasNoId(): void + { + $content = $this->createMock(Content::class); + $content->expects($this->never())->method('findChildAttachments'); + + $download = $this->createMock(Download::class); + $download->expects($this->once())->method('downloadPageContent'); + $download->expects($this->never())->method('downloadAttachment'); + + $downloader = new ConfluencePageContentDownloader($content, $download); + $downloader->downloadPageContent($this->page(null)); + } + + public function testLogsAnErrorWhenDownloadingFails(): void + { + // Stubs (no expectations): they only provide behaviour. The assertion lives on + // the logger mock below. + $download = self::createStub(Download::class); + $download->method('downloadPageContent')->willThrowException(new RuntimeException('boom')); + + $logger = $this->createMock(LoggerInterface::class); + $logger->expects($this->once())->method('error') + ->with($this->stringContains('boom'), $this->arrayHasKey('exception')); + + $downloader = new ConfluencePageContentDownloader(self::createStub(Content::class), $download, [], $logger); + + // Must not bubble up — the failure is caught and logged. + $downloader->downloadPageContent($this->page('123')); + } + + private function page(?string $id): ConfluencePage + { + $rawData = ['body' => ['storage' => ['value' => '

original

']]]; + if ($id !== null) { + $rawData['id'] = $id; + } + + return new ConfluencePage($rawData); + } +} diff --git a/tests/Endpoint/ContentTest.php b/tests/Endpoint/ContentTest.php new file mode 100644 index 0000000..7d2a930 --- /dev/null +++ b/tests/Endpoint/ContentTest.php @@ -0,0 +1,118 @@ + [ + ['id' => 'a1', 'title' => 'first.png'], + ['id' => 'a2', 'title' => 'second.png', 'history' => ['lastUpdated' => ['when' => '2021-01-01T00:00:00.000Z']]], + ], + ], JSON_THROW_ON_ERROR); + $content = $this->content(new Response(200, [], $json)); + + $attachments = $content->findChildAttachments('123'); + + self::assertCount(2, $attachments); + self::assertContainsOnlyInstancesOf(ConfluenceAttachment::class, $attachments); + self::assertSame('a1', $attachments[0]->getId()); + self::assertSame('second.png', $attachments[1]->getTitle()); + } + + public function testFindChildAttachmentsReturnsEmptyListWhenThereAreNoAttachments(): void + { + $content = $this->content(new Response(200, [], json_encode(['results' => []], JSON_THROW_ON_ERROR))); + + self::assertSame([], $content->findChildAttachments('123')); + } + + public function testFindChildAttachmentsThrowsOnUnexpectedSuccessStatus(): void + { + // 204 is a 2xx (so Guzzle's http_errors does not trigger) but not the 200 the + // endpoint requires, exercising the explicit non-200 guard. + $content = $this->content(new Response(204)); + + $this->expectException(Exception::class); + $content->findChildAttachments('123'); + } + + public function testFindChildAttachmentsSurfacesServerErrorsAsGuzzleException(): void + { + $content = $this->content(new Response(500, [], 'boom')); + + $this->expectException(GuzzleException::class); + $content->findChildAttachments('123'); + } + + public function testFindPagesInSpaceParsesPagesAndTerminatesOnAShortPage(): void + { + $json = json_encode([ + 'results' => [ + ['id' => 'p1', 'title' => 'Page one'], + ['id' => 'p2', 'title' => 'Page two'], + ], + 'limit' => 200, + 'size' => 2, + ], JSON_THROW_ON_ERROR); + $content = $this->content(new Response(200, [], $json)); + + $pages = $content->findPagesInSpace('SPACE'); + + self::assertCount(2, $pages); + self::assertContainsOnlyInstancesOf(ConfluencePage::class, $pages); + self::assertArrayHasKey('p1', $pages); + self::assertSame('Page two', $pages['p2']->getTitle()); + } + + public function testFindPageContentParsesASinglePage(): void + { + $json = json_encode([ + 'id' => 'p9', + 'title' => 'Single page', + 'body' => ['storage' => ['value' => '

body

']], + ], JSON_THROW_ON_ERROR); + $content = $this->content(new Response(200, [], $json)); + + $page = $content->findPageContent('p9'); + + self::assertSame('p9', $page->getId()); + self::assertSame('

body

', $page->getContent()); + } + + public function testFindPageContentThrowsOnUnexpectedSuccessStatus(): void + { + $content = $this->content(new Response(204)); + + $this->expectException(Exception::class); + $content->findPageContent('p9'); + } + + private function content(Response $response): Content + { + $stack = HandlerStack::create(new MockHandler([$response])); + $client = new Client(['handler' => $stack, 'base_uri' => 'https://example.atlassian.net/']); + + return new Content($client, new Auth('user@example.com', 'secret-token')); + } +} diff --git a/tests/Endpoint/DownloadTest.php b/tests/Endpoint/DownloadTest.php new file mode 100644 index 0000000..787fa63 --- /dev/null +++ b/tests/Endpoint/DownloadTest.php @@ -0,0 +1,190 @@ +tempDir = sys_get_temp_dir() . '/confluence-download-test-' . uniqid('', true); + mkdir($this->tempDir, 0777, true); + } + + protected function tearDown(): void + { + $this->removeRecursively($this->tempDir); + } + + public function testDownloadAttachmentHitsTheRestEndpointAndWritesTheFile(): void + { + $mock = new MockHandler([new Response(200, [], 'PNG-BYTES')]); + $download = new Download($this->client($mock), $this->auth(), $this->tempDir); + + $download->downloadAttachment($this->attachment('att42', 'image.png'), '12345'); + + self::assertSame('PNG-BYTES', file_get_contents($this->tempDir . '/image.png')); + $request = $mock->getLastRequest(); + self::assertNotNull($request); + self::assertSame( + '/wiki/rest/api/content/12345/child/attachment/att42/download', + $request->getUri()->getPath(), + ); + } + + public function testDownloadAttachmentSendsBasicAuthCredentials(): void + { + $mock = new MockHandler([new Response(200, [], 'data')]); + $download = new Download($this->client($mock), $this->auth(), $this->tempDir); + + $download->downloadAttachment($this->attachment('att1', 'file.bin'), '1'); + + $request = $mock->getLastRequest(); + self::assertNotNull($request); + $authorization = $request->getHeaderLine('Authorization'); + self::assertStringStartsWith('Basic ', $authorization); + self::assertSame( + 'user@example.com:secret-token', + base64_decode(substr($authorization, strlen('Basic ')), true), + ); + } + + public function testDownloadAttachmentSkipsDownloadWhenLocalCopyIsUpToDate(): void + { + file_put_contents($this->tempDir . '/image.png', 'existing'); + + // No responses queued: any HTTP call would make the MockHandler throw. + $mock = new MockHandler([]); + $download = new Download($this->client($mock), $this->auth(), $this->tempDir); + + // Attachment last changed in the past, local file's mtime is "now" -> up to date. + $download->downloadAttachment($this->attachment('att1', 'image.png', '2000-01-01T00:00:00.000Z'), '1'); + + self::assertNull($mock->getLastRequest()); + self::assertSame('existing', file_get_contents($this->tempDir . '/image.png')); + } + + public function testDownloadAttachmentReDownloadsWhenRemoteIsNewerThanLocalCopy(): void + { + file_put_contents($this->tempDir . '/image.png', 'stale'); + + $mock = new MockHandler([new Response(200, [], 'fresh')]); + $download = new Download($this->client($mock), $this->auth(), $this->tempDir); + + // Attachment last changed far in the future, local file's mtime is "now" -> stale. + $download->downloadAttachment($this->attachment('att1', 'image.png', '2999-01-01T00:00:00.000Z'), '1'); + + self::assertNotNull($mock->getLastRequest()); + self::assertSame('fresh', file_get_contents($this->tempDir . '/image.png')); + } + + public function testDownloadPageContentWritesContentToTheGivenFile(): void + { + $mock = new MockHandler([]); + $download = new Download($this->client($mock), $this->auth(), $this->tempDir); + + $download->downloadPageContent($this->page('

Hello

'), 'content.html'); + + self::assertSame('

Hello

', file_get_contents($this->tempDir . '/content.html')); + self::assertNull($mock->getLastRequest()); + } + + public function testEnsureDownloadFolderCreatesAMissingNestedFolder(): void + { + $nested = $this->tempDir . '/a/b/c'; + $download = new Download($this->client(new MockHandler([])), $this->auth(), $nested); + + $download->downloadPageContent($this->page('x'), 'content.html'); + + self::assertDirectoryExists($nested); + self::assertFileExists($nested . '/content.html'); + } + + public function testThrowsWhenDownloadFolderCannotBeCreated(): void + { + // A file blocks the folder path, so mkdir() cannot create it. + $blocker = $this->tempDir . '/iam-a-file'; + file_put_contents($blocker, 'x'); + + $download = new Download($this->client(new MockHandler([])), $this->auth(), $blocker . '/sub'); + + // Swallow the expected mkdir() warning so PHPUnit's strict handler does not + // fail the test before the RuntimeException we actually want to assert. + set_error_handler(static fn (): bool => true); + + try { + $this->expectException(RuntimeException::class); + $download->downloadPageContent($this->page('x'), 'content.html'); + } finally { + restore_error_handler(); + } + } + + private function client(MockHandler $mock): Client + { + return new Client([ + 'handler' => HandlerStack::create($mock), + 'base_uri' => 'https://example.atlassian.net/', + ]); + } + + private function auth(): Auth + { + return new Auth('user@example.com', 'secret-token'); + } + + private function attachment(string $id, string $title, ?string $lastUpdated = null): ConfluenceAttachment + { + $rawData = ['id' => $id, 'title' => $title]; + if ($lastUpdated !== null) { + $rawData['history'] = ['lastUpdated' => ['when' => $lastUpdated]]; + } + + return new ConfluenceAttachment($rawData); + } + + private function page(string $content): ConfluencePage + { + return new ConfluencePage(['id' => '1', 'body' => ['storage' => ['value' => $content]]]); + } + + private function removeRecursively(string $path): void + { + if (!file_exists($path)) { + return; + } + + if (is_dir($path)) { + foreach (scandir($path) ?: [] as $entry) { + if ($entry !== '.' && $entry !== '..') { + $this->removeRecursively($path . '/' . $entry); + } + } + rmdir($path); + + return; + } + + unlink($path); + } +} diff --git a/tests/Endpoint/Dto/ConfluenceAttachmentTest.php b/tests/Endpoint/Dto/ConfluenceAttachmentTest.php new file mode 100644 index 0000000..85bdf0e --- /dev/null +++ b/tests/Endpoint/Dto/ConfluenceAttachmentTest.php @@ -0,0 +1,41 @@ + 'att123', 'title' => 'diagram.png']); + + self::assertSame('att123', $attachment->getId()); + self::assertSame('diagram.png', $attachment->getTitle()); + } + + public function testLastUpdatedIsNullWhenHistoryIsMissing(): void + { + $attachment = new ConfluenceAttachment(['id' => 'att123', 'title' => 'diagram.png']); + + self::assertNull($attachment->getLastUpdated()); + } + + public function testLastUpdatedIsParsedFromHistoryWhenPresent(): void + { + $attachment = new ConfluenceAttachment([ + 'id' => 'att123', + 'title' => 'diagram.png', + 'history' => ['lastUpdated' => ['when' => '2021-06-15T10:30:00.000Z']], + ]); + + $lastUpdated = $attachment->getLastUpdated(); + + self::assertInstanceOf(DateTime::class, $lastUpdated); + self::assertSame('2021-06-15', $lastUpdated->format('Y-m-d')); + } +}