From 39c45c13424dd5d29b51d80c80ac353219cda3ce Mon Sep 17 00:00:00 2001 From: Gustavo Freze Date: Thu, 16 Apr 2026 10:32:03 -0300 Subject: [PATCH 1/2] refactor: Enhance documentation and improve performance of collection methods. --- Makefile | 6 +- README.md | 351 ++++++------------ composer.json | 2 +- src/Collectible.php | 147 +++++--- src/Collection.php | 14 +- src/Internal/EagerPipeline.php | 62 +++- .../Operations/Transforming/Filter.php | 9 +- src/Internal/Pipeline.php | 39 +- tests/EagerCollectionTest.php | 15 + tests/LazyCollectionTest.php | 15 + 10 files changed, 327 insertions(+), 333 deletions(-) diff --git a/Makefile b/Makefile index 07b3559..f6c305b 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,10 @@ review: ## Run static code analysis show-reports: ## Open static analysis reports (e.g., coverage, lints) in the browser @sensible-browser report/coverage/coverage-html/index.html report/coverage/mutation-report.html +.PHONY: show-outdated +show-outdated: ## Show outdated direct dependencies + @${DOCKER_RUN} composer outdated --direct + .PHONY: clean clean: ## Remove dependencies and generated artifacts @sudo chown -R ${USER}:${USER} ${PWD} @@ -60,7 +64,7 @@ help: ## Display this help message | awk 'BEGIN {FS = ":.*?## "}; {printf "$(YELLOW)%-25s$(RESET) %s\n", $$1, $$2}' @echo "" @echo "$$(printf '$(GREEN)')Reports$$(printf '$(RESET)')" - @grep -E '^(show-reports):.*?## .*$$' $(MAKEFILE_LIST) \ + @grep -E '^(show-reports|show-outdated):.*?## .*$$' $(MAKEFILE_LIST) \ | awk 'BEGIN {FS = ":.*?## "}; {printf "$(YELLOW)%-25s$(RESET) %s\n", $$1, $$2}' @echo "" @echo "$$(printf '$(GREEN)')Cleanup$$(printf '$(RESET)')" diff --git a/README.md b/README.md index e62e0ef..821ef0f 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,21 @@ # Collection -[![License](https://img.shields.io/badge/license-MIT-green)](LICENSE) +[![License](https://img.shields.io/badge/license-MIT-green)](https://github.com/tiny-blocks/collection/blob/main/LICENSE) * [Overview](#overview) * [Installation](#installation) * [How to use](#how-to-use) - * [Writing](#writing) - * [Filtering](#filtering) - * [Ordering](#ordering) - * [Retrieving](#retrieving) - * [Comparing](#comparing) - * [Aggregation](#aggregation) - * [Transforming](#transforming) -* [Evaluation strategies](#evaluation-strategies) + + [Writing](#writing) + + [Filtering](#filtering) + + [Ordering](#ordering) + + [Retrieving](#retrieving) + + [Comparing](#comparing) + + [Aggregation](#aggregation) + + [Transforming](#transforming) * [FAQ](#faq) * [License](#license) * [Contributing](#contributing) -
- ## Overview The `Collection` library provides a flexible and efficient API to manipulate, iterate, and manage collections in a @@ -30,16 +27,12 @@ elements into memory at once. The library supports adding, removing, filtering, sorting, and transforming elements. -
- ## Installation -```bash +``` composer require tiny-blocks/collection ``` -
- ## How to use The library exposes the available behaviors through the `Collectible` interface and provides utilities to manipulate @@ -68,11 +61,11 @@ use TinyBlocks\Collection\Order; use TinyBlocks\Mapper\KeyPreservation; $collection = Collection::createFrom(elements: [1, 2, 3, 4, 5]) - ->add(6, 7) - ->filter(predicates: static fn(int $value): bool => $value > 3) - ->sort(order: Order::ASCENDING_VALUE) - ->map(transformations: static fn(int $value): int => $value * 2) - ->toArray(keyPreservation: KeyPreservation::DISCARD); + ->add(6, 7) + ->filter(predicates: static fn(int $value): bool => $value > 3) + ->sort(order: Order::ASCENDING_VALUE) + ->map(transformations: static fn(int $value): int => $value * 2) + ->toArray(keyPreservation: KeyPreservation::DISCARD); # Output: [8, 10, 12, 14] ``` @@ -102,35 +95,13 @@ final class Invoices extends Collection } ``` -### Creating collections - -```php -use TinyBlocks\Collection\Collection; - -$eager = Collection::createFrom(elements: [1, 2, 3]); - -$eagerFromClosure = Collection::createFromClosure(factory: static function (): array { - return [1, 2, 3]; -}); - -$lazy = Collection::createLazyFrom(elements: [1, 2, 3]); - -$lazyFromClosure = Collection::createLazyFromClosure(factory: static function (): iterable { - yield 1; - yield 2; - yield 3; -}); -``` - -
- -## Writing +### Writing These methods enable adding, removing, and modifying elements in the Collection. #### Adding elements -- `add`: Returns a new collection with the specified elements appended. +* `add`: Returns a new collection with the specified elements appended. ```php $collection->add(1, 2, 3); @@ -142,7 +113,7 @@ These methods enable adding, removing, and modifying elements in the Collection. #### Merging collections -- `merge`: Merges the elements of another Collectible into the current Collection. +* `merge`: Merges the elements of another Collectible into the current Collection. ```php $collectionA->merge(other: $collectionB); @@ -150,58 +121,50 @@ These methods enable adding, removing, and modifying elements in the Collection. #### Removing elements -- `remove`: Returns a new collection with all occurrences of the specified element removed. +* `remove`: Returns a new collection with all occurrences of the specified element removed. ```php $collection->remove(element: 1); ``` +* `removeAll`: Returns a new collection with elements removed. -- `removeAll`: Returns a new collection with elements removed. -

- - **With a predicate**: Removes only the elements that satisfy the given predicate. + + **With a predicate**: Removes only the elements that satisfy the given predicate. ```php $collection->removeAll(predicate: static fn(Amount $amount): bool => $amount->value > 10.0); ``` - - - **Without a predicate**: Removes all elements from the Collection. + + **Without a predicate**: Removes all elements from the Collection. ```php $collection->removeAll(); ``` -
- ### Filtering These methods enable filtering elements in the Collection based on specific conditions. #### Filter by predicate -- `filter`: Retains only elements satisfying all given predicates. -

+* `filter`: Retains only elements satisfying all given predicates. - - **With predicates**: Retains elements that satisfy the provided predicates. + + **With predicates**: Retains elements that satisfy the provided predicates. ```php $collection->filter(predicates: static fn(Amount $amount): bool => $amount->value > 100); ``` - - - **Without predicates**: Removes all falsy values (e.g., `null`, `false`, `0`, `''`, empty arrays). + + **Without predicates**: Removes all falsy values (e.g., `null`, `false`, `0`, `''`, empty arrays). ```php $collection->filter(); ``` -
- ### Ordering These methods enable sorting elements in the Collection based on the specified order and optional comparator. #### Sort by order and custom comparator -- `sort`: Returns a new sorted collection. +* `sort`: Returns a new sorted collection. ``` Order::ASCENDING_KEY: Sorts the collection in ascending order by key. @@ -214,7 +177,7 @@ These methods enable sorting elements in the Collection based on the specified o ```php use TinyBlocks\Collection\Order; - + $collection->sort(order: Order::DESCENDING_VALUE); ``` @@ -222,14 +185,12 @@ These methods enable sorting elements in the Collection based on the specified o ```php use TinyBlocks\Collection\Order; - + $collection->sort( order: Order::ASCENDING_VALUE, comparator: static fn(Amount $first, Amount $second): int => $first->value <=> $second->value ); - ``` - -
+ ``` ### Retrieving @@ -238,7 +199,7 @@ elements, or finding elements that match a specific condition. #### Retrieve count -- `count`: Returns the total number of elements in the Collection. +* `count`: Returns the total number of elements in the Collection. ```php $collection->count(); @@ -246,7 +207,7 @@ elements, or finding elements that match a specific condition. #### Check if empty -- `isEmpty`: Determines whether the collection has no elements. +* `isEmpty`: Determines whether the collection has no elements. ```php $collection->isEmpty(); @@ -254,7 +215,7 @@ elements, or finding elements that match a specific condition. #### Retrieve by condition -- `findBy`: Finds the first element that satisfies any given predicate, or returns `null` if no predicate matches. +* `findBy`: Finds the first element that satisfies any given predicate, or returns `null` if no predicate matches. When called without predicates, it returns `null`. ```php @@ -263,19 +224,17 @@ elements, or finding elements that match a specific condition. #### Retrieve single elements -- `first`: Retrieves the first element from the Collection or returns a default value if the Collection is empty. +* `first`: Retrieves the first element from the Collection or returns a default value if the Collection is empty. ```php $collection->first(defaultValueIfNotFound: 'fallback'); ``` - -- `getBy`: Retrieves an element by its zero-based index or returns a default value if the index is out of bounds. +* `getBy`: Retrieves an element by its zero-based index or returns a default value if the index is out of bounds. ```php $collection->getBy(index: 0, defaultValueIfNotFound: 'fallback'); ``` - -- `last`: Retrieves the last element from the Collection or returns a default value if the Collection is empty. +* `last`: Retrieves the last element from the Collection or returns a default value if the Collection is empty. ```php $collection->last(defaultValueIfNotFound: 'fallback'); @@ -283,7 +242,7 @@ elements, or finding elements that match a specific condition. #### Retrieve collection segments -- `slice`: Extracts a contiguous segment of the collection, starting at the specified offset. +* `slice`: Extracts a contiguous segment of the collection, starting at the specified offset. If length is negative, it excludes that many elements from the end. If length is not provided or set to -1, it returns all elements from the specified offset to the end. @@ -291,15 +250,13 @@ elements, or finding elements that match a specific condition. $collection->slice(offset: 1, length: 2); ``` -
- ### Comparing These methods enable comparing collections to check for equality or to verify element membership. #### Check if collection contains element -- `contains`: Checks if the Collection contains a specific element. Uses strict equality for scalars and loose equality +* `contains`: Checks if the Collection contains a specific element. Uses strict equality for scalars and loose equality for objects. ```php @@ -308,20 +265,18 @@ These methods enable comparing collections to check for equality or to verify el #### Compare collections for equality -- `equals`: Compares the current Collection with another collection for element-wise equality. +* `equals`: Compares the current Collection with another collection for element-wise equality. ```php $collectionA->equals(other: $collectionB); ``` -
- ### Aggregation These methods perform operations that return a single value based on the Collection's content, such as summing or combining elements. -- `reduce`: Combines all elements in the Collection into a single value using the provided accumulator function and an +* `reduce`: Combines all elements in the Collection into a single value using the provided accumulator function and an initial value. This method is helpful for accumulating results, like summing or concatenating values. ```php @@ -330,24 +285,20 @@ combining elements. initial: 0.0 ); ``` - -- `joinToString`: Joins all elements into a string with the given separator. +* `joinToString`: Joins all elements into a string with the given separator. ```php $collection->joinToString(separator: ', '); ``` -
- ### Transforming These methods allow the Collection's elements to be transformed or converted into different formats. #### Applying actions without modifying elements -- `each`: Executes actions on each element in the Collection without modification. - This is a terminal operation that does not return the collection. It is useful for performing side effects, such as - logging or accumulating values. +* `each`: Executes actions on each element in the Collection without modification. + The method is helpful for performing side effects, such as logging or accumulating values. ```php $collection->each(actions: static fn(Amount $amount): void => $total += $amount->value); @@ -355,7 +306,7 @@ These methods allow the Collection's elements to be transformed or converted int #### Grouping elements -- `groupBy`: Groups the elements in the Collection based on the provided classifier. +* `groupBy`: Groups the elements in the Collection based on the provided classifier. ```php $collection->groupBy(classifier: static fn(Amount $amount): string => $amount->currency->name); @@ -363,7 +314,7 @@ These methods allow the Collection's elements to be transformed or converted int #### Mapping elements -- `map`: Applies transformations to each element in the Collection and returns a new collection with the transformed +* `map`: Applies transformations to each element in the Collection and returns a new collection with the transformed elements. ```php @@ -372,7 +323,7 @@ These methods allow the Collection's elements to be transformed or converted int #### Flattening elements -- `flatten`: Flattens nested iterables by exactly one level. Non-iterable elements are yielded as-is. +* `flatten`: Flattens nested iterables by exactly one level. Non-iterable elements are yielded as-is. ```php $collection->flatten(); @@ -380,7 +331,7 @@ These methods allow the Collection's elements to be transformed or converted int #### Convert to array -- `toArray`: Converts the Collection into an array. +* `toArray`: Converts the Collection into an array. ``` KeyPreservation::DISCARD: Converts while discarding the keys. @@ -391,13 +342,13 @@ These methods allow the Collection's elements to be transformed or converted int ```php use TinyBlocks\Mapper\KeyPreservation; - + $collection->toArray(keyPreservation: KeyPreservation::DISCARD); ``` #### Convert to JSON -- `toJson`: Converts the Collection into a JSON string. +* `toJson`: Converts the Collection into a JSON string. ``` KeyPreservation::DISCARD: Converts while discarding the keys. @@ -408,139 +359,10 @@ These methods allow the Collection's elements to be transformed or converted int ```php use TinyBlocks\Mapper\KeyPreservation; - + $collection->toJson(keyPreservation: KeyPreservation::DISCARD); ``` -
- -## Evaluation strategies - -The complexity of every operation in this library is determined by the evaluation strategy chosen at creation time. -Calling `createFrom`, `createFromEmpty`, or `createFromClosure` produces a collection backed by an `EagerPipeline`. -Calling `createLazyFrom`, `createLazyFromEmpty`, or `createLazyFromClosure` produces a collection backed by a -`LazyPipeline`. All subsequent operations on that collection inherit the behavior of the chosen pipeline. - -This is analogous to how `java.util.ArrayList` and `java.util.LinkedList` both implement `java.util.List`, but each -operation has different costs depending on which concrete class backs the list. - -### Eager pipeline - -When the collection is created eagerly, elements are stored in a plain PHP array. This array is the source of truth -for all operations. - -**Creation.** Factory methods like `createFrom` call `iterator_to_array` on the input, consuming all elements -immediately. Time: O(n). Space: O(n). - -**Transforming operations.** Every call to a transforming method (`add`, `filter`, `map`, `sort`, etc.) calls -`pipe()` internally, which executes `iterator_to_array($operation->apply($this->elements))`. This means the -operation is applied to all elements immediately and the result is stored in a new array. The time cost depends -on the operation (O(n) for filter, O(n log n) for sort), and the space cost is always O(n) because a new array -is allocated. - -**Access operations.** Methods like `count`, `first`, `last`, and `getBy` read the internal array directly. -`count` calls PHP's native `count()` on the array. `first` and `last` use `array_key_first` and `array_key_last`. -`getBy` uses `array_key_exists`. All are O(1) time and O(1) space. - -**Terminal operations.** Methods like `contains`, `reduce`, `each`, `equals`, and `findBy` iterate over the -collection. Since the elements are already materialized, the iteration itself is O(n). No additional -materialization cost is incurred. - -### Lazy pipeline - -When the collection is created lazily, nothing is computed at creation time. The source (iterable or closure) is -stored by reference, and operations are accumulated as stages in an array. - -**Creation.** Factory methods like `createLazyFrom` store a reference to the iterable. `createLazyFromClosure` -stores the closure without invoking it. Time: O(1). Space: O(1). - -**Transforming operations.** Every call to a transforming method calls `pipe()`, which appends the operation to -the internal `$stages` array. No elements are processed. Time: O(1). Space: O(1). The actual cost is deferred -to the moment the collection is consumed. - -**Consumption.** When the collection is iterated (explicitly or through `count`, `toArray`, `reduce`, etc.), -`process()` is called. It invokes the source closure (if applicable), then chains all stages into a generator -pipeline. Elements flow one at a time through every stage: each element passes through stage 0, then stage 1, -then stage 2, and so on, before the next element enters the pipeline. For k streaming stages, total time is -O(n * k). - -**Access operations.** `count` calls `iterator_count`, which consumes the entire generator: O(n). `first` and -`isEmpty` yield one element from the generator: O(1). `last` and `getBy` iterate the generator: O(n) worst case. - -**Barrier operations.** Most operations are streaming: they process one element at a time without accumulating -state. Two operations are exceptions. `sort` must consume all input (via `iterator_to_array`), sort it, then -yield the sorted result: O(n log n) time, O(n) space. `groupBy` must accumulate all elements into a groups -array, then yield: O(n) time, O(n) space. When a barrier exists in a lazy pipeline, it forces full evaluation -of all preceding stages before any subsequent stage can process an element. This means that calling `first()` -on a lazy collection that has a `sort()` in its pipeline still costs O(n log n), because the sort barrier must -consume everything first. - -### Complexity reference - -The table below summarizes the time and space complexity of each method under both strategies. Each value was -derived by tracing the execution path from `Collection` through the `Pipeline` into the underlying `Operation`. -The column "Why" references the pipeline behavior described above. - -#### Factory methods - -| Method | Time | Space | Why | -|-------------------------|------|-------|------------------------------------------------------| -| `createFrom` | O(n) | O(n) | Calls `iterator_to_array` on the input. | -| `createFromEmpty` | O(1) | O(1) | Creates an empty array. | -| `createFromClosure` | O(n) | O(n) | Invokes the closure, then calls `iterator_to_array`. | -| `createLazyFrom` | O(1) | O(1) | Stores the iterable reference without iterating. | -| `createLazyFromEmpty` | O(1) | O(1) | Stores an empty array reference. | -| `createLazyFromClosure` | O(1) | O(1) | Stores the closure without invoking it. | - -#### Transforming methods - -For lazy collections, all transforming methods are O(1) time and O(1) space at call time because `pipe()` only -appends a stage. The cost shown below is for eager collections, where `pipe()` materializes immediately. - -| Method | Time | Space | Why | -|-------------|------------|----------|------------------------------------------------------------------------------------------| -| `add` | O(n + m) | O(n + m) | Yields all existing elements, then the m new ones. | -| `merge` | O(n + m) | O(n + m) | Yields all elements from both collections. | -| `filter` | O(n) | O(n) | Tests each element against the predicate. | -| `map` | O(n * t) | O(n) | Applies t transformations to each element. | -| `flatten` | O(n + s) | O(n + s) | Iterates each element; expands nested iterables by one level. s = total nested elements. | -| `remove` | O(n) | O(n) | Tests each element for equality. | -| `removeAll` | O(n) | O(n) | Tests each element against the predicate. | -| `sort` | O(n log n) | O(n) | Materializes all elements, sorts via `uasort` or `ksort`, then yields. Barrier. | -| `slice` | O(n) | O(n) | Iterates up to offset + length elements. | -| `groupBy` | O(n) | O(n) | Accumulates all elements into a groups array, then yields. Barrier. | - -#### Access methods - -These delegate directly to the pipeline. The cost differs between eager and lazy because eager reads the -internal array, while lazy must evaluate the generator. - -| Method | Eager | Lazy | Why | -|-----------|-------|------|------------------------------------------------------------------------| -| `count` | O(1) | O(n) | Eager: `count($array)`. Lazy: `iterator_count($generator)`. | -| `first` | O(1) | O(1) | Eager: `array_key_first`. Lazy: first yield from the generator. | -| `last` | O(1) | O(n) | Eager: `array_key_last`. Lazy: iterates all to reach the last element. | -| `getBy` | O(1) | O(n) | Eager: `array_key_exists`. Lazy: iterates until the index. | -| `isEmpty` | O(1) | O(1) | Checks if the first element exists. | - -#### Terminal methods - -These iterate the collection to produce a result. Since eager collections already hold a materialized array, the -iteration cost is the same for both strategies. - -| Method | Time | Space | Why | -|----------------|----------|-------|-----------------------------------------------------------------| -| `contains` | O(n) | O(1) | Iterates until the element is found or the end is reached. | -| `findBy` | O(n * p) | O(1) | Tests p predicates per element until a match. | -| `each` | O(n * a) | O(1) | Applies a actions to every element. | -| `equals` | O(n) | O(1) | Walks two generators in parallel, comparing element by element. | -| `reduce` | O(n) | O(1) | Folds all elements into a single carry value. | -| `joinToString` | O(n) | O(n) | Accumulates into an intermediate array, then calls `implode`. | -| `toArray` | O(n) | O(n) | Iterates all elements into a new array. | -| `toJson` | O(n) | O(n) | Calls `toArray`, then `json_encode`. | - -
- ## FAQ ### 01. Why is my iterator consumed after certain operations? @@ -566,20 +388,75 @@ recreate the `Collection`. ### 03. What is the difference between eager and lazy evaluation? -- **Eager evaluation** (`createFrom` / `createFromEmpty` / `createFromClosure`): Elements are materialized immediately - into an array, enabling constant-time access by index, count, first, last, and repeated iteration. - -- **Lazy evaluation** (`createLazyFrom` / `createLazyFromEmpty` / `createLazyFromClosure`): Elements are processed - on-demand through generators, consuming memory only as each element is yielded. Ideal for large datasets or pipelines - where not all elements need to be materialized. - -
+Both modes share the same execution model. Transforming operations append a stage to the pipeline at the call site +without iterating. Terminal operations run the fused pass over all chained stages. + +The difference is what each mode does at creation and after the fused pass completes: + +* **Eager** (`createFrom*`): the source is materialized into an array at creation. The first terminal call runs the + fused pass and caches the result. Subsequent terminal calls reuse the cache. +* **Lazy** (`createLazyFrom*`): the source is stored by reference. Every terminal call re-runs the entire pipeline. + +**Notation.** `n` = source size at the terminal call. `P` = total cost of the fused pass, equal to the sum of the +per-element contributions of every chained stage. For a pipeline of pure per-element stages, `P` is O(n · s), where +`s` is the number of stages. Non-linear stages (`sort`, `groupBy`) dominate `P`. + +#### Creation + +| Method | Eager | Lazy | +|-------------------------|-------------------------------------------------------------------|----------------------------------------------------------------| +| `createFrom` | O(n) time, O(n) space. Iterates the input once and stores it. | — | +| `createFromEmpty` | O(1) time, O(1) space. | — | +| `createFromClosure` | O(n) time, O(n) space. Invokes the factory and stores the result. | — | +| `createLazyFrom` | — | O(1) time, O(1) space. Stores the iterable by reference. | +| `createLazyFromEmpty` | — | O(1) time, O(1) space. | +| `createLazyFromClosure` | — | O(1) time, O(1) space. Stores the factory without invoking it. | + +#### Transforming + +Transforming methods append a pipeline stage at the call site and execute only during the fused pass. + +| Method | Call site (both modes) | Contribution to the fused pass | +|-------------|------------------------|------------------------------------------------------------------------------------------| +| `add` | O(1) time, O(1) space. | O(m) time, O(m) space, where `m` is the number of appended elements. | +| `merge` | O(1) time, O(1) space. | O(m) time, O(m) space, where `m` is the number of elements in the other collection. | +| `remove` | O(1) time, O(1) space. | O(n) time, O(1) space. | +| `removeAll` | O(1) time, O(1) space. | O(n) time, O(1) space. | +| `filter` | O(1) time, O(1) space. | O(n · p) time, O(1) space, where `p` is the number of predicates. | +| `flatten` | O(1) time, O(1) space. | O(n + s) time, O(1) space, where `s` is the total number of nested elements. | +| `map` | O(1) time, O(1) space. | O(n · t) time, O(1) space, where `t` is the number of transformations. | +| `slice` | O(1) time, O(1) space. | O(min(offset + length, n)) time, O(1) space. Short-circuits once the segment is emitted. | +| `groupBy` | O(1) time, O(1) space. | O(n) time, O(n) space. Buffers all groups before emitting. Breaks streaming. | +| `sort` | O(1) time, O(1) space. | O(n log n) time, O(n) space. Buffers all elements before emitting. Breaks streaming. | + +#### Terminal + +Terminal methods trigger the fused pass. Eager cells show **first call / subsequent calls** when they differ. +Subsequent calls read the cache without re-running the pipeline. + +| Method | Eager | Lazy | +|----------------|----------------------------------------------------------------------------------------------------|------------------------------------------------------------------| +| `count` | Amortized O(P) / O(1). | O(P) per call. Must reach the end. | +| `first` | Amortized O(P) / O(1). | O(P_first) per call. Short-circuits at the first element. | +| `last` | Amortized O(P) / O(1). | O(P) per call. Must reach the end. | +| `getBy` | Amortized O(P) / O(1). | O(P_index) per call. Short-circuits at the requested index. | +| `isEmpty` | Amortized O(P) / O(1). | O(P_first) per call. Short-circuits at the first element. | +| `contains` | O(P + n) / O(n). Short-circuits at the first match. | O(P) per call. Short-circuits at the first match. | +| `findBy` | O(P + n · p) / O(n · p), where `p` is the number of predicates. Short-circuits at the first match. | O(P + p) per emitted element. Short-circuits at the first match. | +| `each` | O(P + n · a) / O(n · a), where `a` is the number of actions. | O(P + n · a) per call. | +| `equals` | O(P + n) / O(n). Short-circuits at the first mismatch. | O(P + n) per call. Short-circuits at the first mismatch. | +| `joinToString` | O(P + n) / O(n) time, O(n) space. | O(P + n) per call. | +| `reduce` | O(P + n) / O(n) time, O(1) intermediate space. | O(P + n) per call. | +| `toArray` | O(P + n) / O(n) time, O(n) space. | O(P + n) per call. | +| `toJson` | O(P + n) / O(n) time, O(n) space. | O(P + n) per call. | + +Eager aggregation terminals iterate the cached array without re-running the pipeline. Lazy terminals re-run the +pipeline on every call. Eager indexing terminals (`count`, `first`, `last`, `getBy`, `isEmpty`) return in O(1) from +the cache after the first access. ## License -Collection is licensed under [MIT](LICENSE). - -
+Collection is licensed under [MIT](https://github.com/tiny-blocks/collection/blob/main/LICENSE). ## Contributing diff --git a/composer.json b/composer.json index eb03678..d9caeb7 100644 --- a/composer.json +++ b/composer.json @@ -48,7 +48,7 @@ "tiny-blocks/mapper": "^2.0" }, "require-dev": { - "phpunit/phpunit": "^11.5", + "phpunit/phpunit": "^13.1", "phpstan/phpstan": "^2.1", "infection/infection": "^0.32", "tiny-blocks/currency": "^2.3", diff --git a/src/Collectible.php b/src/Collectible.php index 6fab550..cb71d3d 100644 --- a/src/Collectible.php +++ b/src/Collectible.php @@ -16,16 +16,32 @@ * * Two evaluation strategies are available: * - * - createFrom / createFromEmpty / createFromClosure: eager evaluation, materialized immediately. - * - createLazyFrom / createLazyFromEmpty / createLazyFromClosure: lazy evaluation via generators, on-demand. + * - createFrom / createFromEmpty / createFromClosure: eager evaluation. The source is materialized + * into an array immediately at creation time. The first terminal call runs all chained stages + * in a single fused pass and caches the result. Subsequent terminal calls reuse the cache. + * - createLazyFrom / createLazyFromEmpty / createLazyFromClosure: lazy evaluation via generators. + * The source is stored by reference. Every terminal call re-runs the entire pipeline from the source. + * + * Complexity notation used throughout this interface: + * + * - n = number of source elements at the time of the terminal call. + * - P = total time cost of running all chained transforming stages over n elements (the "fused pass"). + * For a pipeline of pure per-element stages, P is O(n * s) where s is the number of stages. + * Stages with non-linear contributions (e.g., `sort` is O(n log n)) dominate P. + * - "Call site" = cost paid when the method is invoked. + * - "Pass contribution" = cost this stage adds to P when a terminal operation later triggers the pass. + * + * Streaming-breaking stages: `sort` and `groupBy` must buffer all elements before emitting any output. + * Any stage placed after them in the same pipeline cannot stream and will see the full buffered set. + * Place these stages last whenever possible. */ interface Collectible extends Countable, IteratorAggregate { /** * Creates a collection populated with the given elements using eager evaluation. * - * Elements are materialized immediately into an array, enabling - * constant-time access by index, count, and repeated iteration. + * Elements are materialized immediately into an array, enabling the fused-pass cache on the + * first terminal access. * * O(n) time, O(n) space. Iterates the input once and stores all elements. * @@ -46,9 +62,8 @@ public static function createFromEmpty(): static; /** * Creates a collection using eager evaluation from a closure that produces an iterable. * - * The closure is invoked once at creation time and its result is materialized - * immediately into an array, enabling constant-time access by index, count, - * and repeated iteration. + * The closure is invoked once at creation time and its result is materialized immediately + * into an array, enabling the fused-pass cache on the first terminal access. * * O(n) time, O(n) space. Invokes the closure and stores all yielded elements. * @@ -60,8 +75,7 @@ public static function createFromClosure(Closure $factory): static; /** * Creates a collection populated with the given elements using lazy evaluation. * - * Elements are processed on-demand through generators, consuming - * memory only as each element is yielded. + * Elements are processed on-demand through generators, consuming memory only as each element is yielded. * * O(1) time, O(1) space. Stores a reference to the iterable without iterating. * @@ -82,8 +96,8 @@ public static function createLazyFromEmpty(): static; /** * Creates a collection using lazy evaluation from a closure that produces an iterable. * - * The closure is invoked each time the collection is iterated, enabling - * safe re-iteration over generators or other single-use iterables. + * The closure is invoked each time the collection is iterated, enabling safe re-iteration over + * generators or other single-use iterables. * * O(1) time, O(1) space. Stores the closure without invoking it. * @@ -95,8 +109,8 @@ public static function createLazyFromClosure(Closure $factory): static; /** * Returns a new collection with the specified elements appended. * - * Eager: O(n + m) time, O(n + m) space. Materializes all existing and new elements. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(m) time, O(m) space (m = number of appended elements). * * @param mixed ...$elements The elements to append. * @return static A new collection with the additional elements. @@ -106,8 +120,8 @@ public function add(mixed ...$elements): static; /** * Merges the elements of another Collectible into the current Collection. * - * Eager: O(n + m) time, O(n + m) space. Materializes elements from both collections. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(m) time, O(m) space (m = number of elements in `other`). * * @param Collectible $other The collection to merge with. * @return static A new collection containing elements from both collections. @@ -119,7 +133,11 @@ public function merge(Collectible $other): static; * * Uses strict equality for scalars and loose equality for objects. * - * O(n) time, O(1) space. Iterates until the element is found or the end is reached. + * Eager: O(P + n) on first terminal call (triggers fused pass and scans the result). + * O(n) on subsequent calls (scans the cached result). Short-circuits when found. + * O(n) cached space. + * Lazy: O(P) per call. The search is interleaved with the pass and short-circuits when found. + * O(1) intermediate space. * * @param mixed $element The element to search for. * @return bool True if the element exists, false otherwise. @@ -129,8 +147,9 @@ public function contains(mixed $element): bool; /** * Returns the total number of elements. * - * Eager: O(1) time, O(1) space. Reads the array length directly. - * Lazy: O(n) time, O(1) space. Must iterate all elements to count. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P) per call (must reach the end of the pipeline). O(1) intermediate space. * * @return int The element count. */ @@ -140,7 +159,10 @@ public function count(): int; * Finds the first element that satisfies any given predicate. * Without predicates, returns null. * - * O(n * p) time, O(1) space. Iterates until a match is found. p = number of predicates. + * Eager: O(P + n * p) on first terminal call (triggers fused pass and scans the result). + * O(n * p) on subsequent calls. Short-circuits when found. p = number of predicates. + * O(n) cached space. + * Lazy: O(P + p) per emitted element. Short-circuits when found. O(1) intermediate space. * * @param Closure ...$predicates Conditions to test each element against. * @return mixed The first matching element or null if no match is found. @@ -152,7 +174,9 @@ public function findBy(Closure ...$predicates): mixed; * * This is a terminal operation. The collection is not returned. * - * O(n * a) time, O(1) space. Iterates all elements. a = number of actions. + * Eager: O(P + n * a) on first terminal call. O(n * a) on subsequent calls (over cached result). + * O(n) cached space. a = number of actions. + * Lazy: O(P + n * a) per call. O(1) intermediate space. * * @param Closure ...$actions Actions to perform on each element. */ @@ -161,10 +185,12 @@ public function each(Closure ...$actions): void; /** * Compares this collection with another for element-wise equality. * - * Two collections are equal when they have the same size and every - * pair at the same position satisfies the equality comparison. + * Two collections are equal when they have the same size and every pair at the same position + * satisfies the equality comparison. * - * O(n) time, O(1) space. Walks both collections in parallel, comparing element by element. + * Eager: O(P + n) on first terminal call. O(n) on subsequent calls (over cached result). + * Short-circuits at the first mismatch. O(n) cached space. + * Lazy: O(P + n) per call. Short-circuits at the first mismatch. O(1) intermediate space. * * @param Collectible $other The collection to compare against. * @return bool True if both collections are element-wise equal. @@ -176,8 +202,8 @@ public function equals(Collectible $other): bool; * * All occurrences of the element are removed. * - * Eager: O(n) time, O(n) space. Materializes a new array excluding matches. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n) time, O(1) space. * * @param mixed $element The element to remove. * @return static A new collection without the specified element. @@ -188,8 +214,8 @@ public function remove(mixed $element): static; * Returns a new collection with all elements removed that satisfy the given predicate. * When no predicate is provided (i.e., $predicate is null), all elements are removed. * - * Eager: O(n) time, O(n) space. Materializes a new array excluding matches. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n) time, O(1) space. * * @param Closure|null $predicate Condition to determine which elements to remove. * @return static A new collection with the matching elements removed. @@ -201,8 +227,8 @@ public function removeAll(?Closure $predicate = null): static; * * Without predicates, falsy values are removed. * - * Eager: O(n) time, O(n) space. Materializes a new array with matching elements. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n * p) time, O(1) space (p = number of predicates). * * @param Closure|null ...$predicates Conditions each element must meet. * @return static A new collection with only the matching elements. @@ -212,8 +238,9 @@ public function filter(?Closure ...$predicates): static; /** * Returns the first element, or a default if the collection is empty. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_first. - * Lazy: O(1) time, O(1) space. Yields once from the pipeline. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_first) per call. Short-circuits at the first emitted element. O(1) intermediate space. * * @param mixed $defaultValueIfNotFound Value returned when the collection is empty. * @return mixed The first element or the default. @@ -223,8 +250,8 @@ public function first(mixed $defaultValueIfNotFound = null): mixed; /** * Flattens nested iterables by exactly one level. Non-iterable elements are yielded as-is. * - * Eager: O(n + s) time, O(n + s) space. s = total nested elements across all iterables. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n + s) time, O(1) space (s = total nested elements across all iterables). * * @return static A new collection with elements flattened by one level. */ @@ -233,8 +260,9 @@ public function flatten(): static; /** * Returns the element at the given zero-based index. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_exists. - * Lazy: O(n) time, O(1) space. Iterates until the index is reached. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_index) per call. Short-circuits at the requested position. O(1) intermediate space. * * @param int $index The zero-based position. * @param mixed $defaultValueIfNotFound Value returned when the index is out of bounds. @@ -245,11 +273,11 @@ public function getBy(int $index, mixed $defaultValueIfNotFound = null): mixed; /** * Groups elements by a key derived from each element. * - * The classifier receives each element and must return the group key. - * The resulting collection contains key to element-list pairs. + * The classifier receives each element and must return the group key. The resulting collection + * contains key-to-element-list pairs. * - * Eager: O(n) time, O(n) space. Materializes all groups into an associative array. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n) time, O(n) space. Buffers all groups before emitting. Breaks streaming. * * @param Closure $classifier Maps each element to its group key. * @return static A new collection of grouped elements. @@ -259,8 +287,9 @@ public function groupBy(Closure $classifier): static; /** * Determines whether the collection has no elements. * - * Eager: O(1) time, O(1) space. Checks the first yield from the materialized array. - * Lazy: O(1) time, O(1) space. Yields once from the pipeline. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_first) per call. Short-circuits at the first emitted element. O(1) intermediate space. * * @return bool True if the collection is empty. */ @@ -269,7 +298,9 @@ public function isEmpty(): bool; /** * Joins all elements into a string with the given separator. * - * O(n) time, O(n) space. Accumulates all elements into an intermediate array, then implodes. + * Eager: O(P + n) on first terminal call. O(n) on subsequent calls (over cached result). + * O(n) cached space plus O(n) for the resulting string. + * Lazy: O(P + n) per call. O(n) for the resulting string. * * @param string $separator The delimiter placed between each element. * @return string The concatenated result. @@ -279,8 +310,9 @@ public function joinToString(string $separator): string; /** * Returns the last element, or a default if the collection is empty. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_last. - * Lazy: O(n) time, O(1) space. Must iterate all elements to find the last. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P) per call. Must reach the end of the pipeline. O(1) intermediate space. * * @param mixed $defaultValueIfNotFound Value returned when the collection is empty. * @return mixed The last element or the default. @@ -292,8 +324,8 @@ public function last(mixed $defaultValueIfNotFound = null): mixed; * * Transformations are applied in order. Each receives the current value and key. * - * Eager: O(n * t) time, O(n) space. Materializes all transformed elements. t = number of transformations. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n * t) time, O(1) space (t = number of transformations). * * @param Closure ...$transformations Functions applied to each element. * @return static A new collection with the transformed elements. @@ -305,7 +337,9 @@ public function map(Closure ...$transformations): static; * * The accumulator receives the carry and the current element. * - * O(n) time, O(1) space. Iterates all elements, maintaining a single carry value. + * Eager: O(P + n) on first terminal call. O(n) on subsequent calls (over cached result). + * O(n) cached space. + * Lazy: O(P + n) per call. O(1) intermediate space (single carry value). * * @param Closure $accumulator Combines the carry with each element. * @param mixed $initial The starting value for the accumulation. @@ -318,8 +352,9 @@ public function reduce(Closure $accumulator, mixed $initial): mixed; * * Without a comparator, the spaceship operator is used. * - * Eager: O(n log n) time, O(n) space. Materializes and sorts all elements. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(n log n) time, O(n) space. Buffers all elements before emitting any output. + * breaks streaming for any stage placed after `sort` in the same pipeline. * * @param Order $order The sorting direction. * @param Closure|null $comparator Custom comparison function. @@ -330,8 +365,10 @@ public function sort(Order $order = Order::ASCENDING_KEY, ?Closure $comparator = /** * Extracts a contiguous segment of the collection. * - * Eager: O(n) time, O(n) space. Materializes the segment into a new array. - * Lazy: O(1) time, O(1) space. Appends a pipeline stage without iterating. + * Call site: O(1) time, O(1) space. Appends a pipeline stage in both eager and lazy modes. + * Pass contribution: O(min(offset + length, n)) time, O(1) space. Iteration short-circuits once + * the segment is fully emitted. An early `slice(0, k)` against a generator source can avoid + * touching the rest. * * @param int $offset Zero-based starting position. * @param int $length Number of elements to include. Use -1 for "until the end". @@ -342,7 +379,9 @@ public function slice(int $offset, int $length = -1): static; /** * Converts the Collection to an array. * - * O(n) time, O(n) space. Iterates all elements and stores them in an array. + * Eager: O(P + n) on first terminal call. O(n) on subsequent calls (over cached result). + * O(n) cached space. + * Lazy: O(P + n) per call. O(n) for the resulting array. * * The key preservation behavior should be provided from the `KeyPreservation` enum: * - {@see KeyPreservation::PRESERVE}: Preserves the array keys. @@ -358,7 +397,9 @@ public function toArray(KeyPreservation $keyPreservation = KeyPreservation::PRES /** * Converts the Collection to a JSON string. * - * O(n) time, O(n) space. Converts to array, then encodes to JSON. + * Eager: O(P + n) on first terminal call. O(n) on subsequent calls (over cached result). + * O(n) cached space plus O(n) for the JSON string. + * Lazy: O(P + n) per call. O(n) for the JSON string. * * The key preservation behavior should be provided from the `KeyPreservation` enum: * - {@see KeyPreservation::PRESERVE}: Preserves the array keys. diff --git a/src/Collection.php b/src/Collection.php index bdad92d..1661e0c 100644 --- a/src/Collection.php +++ b/src/Collection.php @@ -67,7 +67,7 @@ public static function createLazyFromClosure(Closure $factory): static public function getIterator(): Traversable { - yield from $this->pipeline->process(); + return $this->pipeline->process(); } public function add(mixed ...$elements): static @@ -82,7 +82,7 @@ public function merge(Collectible $other): static public function contains(mixed $element): bool { - return Equality::exists(elements: $this, element: $element); + return Equality::exists(elements: $this->pipeline->process(), element: $element); } public function count(): int @@ -92,17 +92,17 @@ public function count(): int public function findBy(Closure ...$predicates): mixed { - return Find::firstMatch(elements: $this, predicates: $predicates); + return Find::firstMatch(elements: $this->pipeline->process(), predicates: $predicates); } public function each(Closure ...$actions): void { - Each::execute(elements: $this, actions: $actions); + Each::execute(elements: $this->pipeline->process(), actions: $actions); } public function equals(Collectible $other): bool { - return Equality::compareAll(elements: $this, other: $other); + return Equality::compareAll(elements: $this->pipeline->process(), other: $other); } public function remove(mixed $element): static @@ -147,7 +147,7 @@ public function isEmpty(): bool public function joinToString(string $separator): string { - return Join::elements(elements: $this, separator: $separator); + return Join::elements(elements: $this->pipeline->process(), separator: $separator); } public function last(mixed $defaultValueIfNotFound = null): mixed @@ -162,7 +162,7 @@ public function map(Closure ...$transformations): static public function reduce(Closure $accumulator, mixed $initial): mixed { - return Reduce::from(elements: $this, accumulator: $accumulator, initial: $initial); + return Reduce::from(elements: $this->pipeline->process(), accumulator: $accumulator, initial: $initial); } public function sort(Order $order = Order::ASCENDING_KEY, ?Closure $comparator = null): static diff --git a/src/Internal/EagerPipeline.php b/src/Internal/EagerPipeline.php index 5360361..7ec4c75 100644 --- a/src/Internal/EagerPipeline.php +++ b/src/Internal/EagerPipeline.php @@ -8,66 +8,90 @@ use Generator; use TinyBlocks\Collection\Internal\Operations\Operation; -final readonly class EagerPipeline implements Pipeline +final class EagerPipeline implements Pipeline { - private function __construct(private array $elements) - { + private ?array $cache = null; + + private function __construct( + private readonly array $source, + private readonly array $stages = [] + ) { } public static function from(iterable $source): EagerPipeline { $elements = is_array($source) ? $source : iterator_to_array($source); - return new EagerPipeline(elements: $elements); + return new EagerPipeline(source: $elements); } public static function fromClosure(Closure $factory): EagerPipeline { $elements = iterator_to_array($factory()); - return new EagerPipeline(elements: $elements); + return new EagerPipeline(source: $elements); } public function pipe(Operation $operation): Pipeline { - $elements = iterator_to_array($operation->apply(elements: $this->elements)); + $stages = $this->stages; + $stages[] = $operation; - return new EagerPipeline(elements: $elements); + return new EagerPipeline(source: $this->source, stages: $stages); } public function count(): int { - return count($this->elements); + return count($this->materialize()); } - public function first(mixed $defaultValueIfNotFound = null): mixed + public function isEmpty(): bool { - return empty($this->elements) - ? $defaultValueIfNotFound - : $this->elements[array_key_first($this->elements)]; + return $this->materialize() === []; } - public function isEmpty(): bool + public function first(mixed $defaultValueIfNotFound = null): mixed { - return empty($this->elements); + $elements = $this->materialize(); + + return $elements === [] + ? $defaultValueIfNotFound + : $elements[array_key_first($elements)]; } public function last(mixed $defaultValueIfNotFound = null): mixed { - return empty($this->elements) + $elements = $this->materialize(); + + return $elements === [] ? $defaultValueIfNotFound - : $this->elements[array_key_last($this->elements)]; + : $elements[array_key_last($elements)]; } public function getBy(int $index, mixed $defaultValueIfNotFound = null): mixed { - return array_key_exists($index, $this->elements) - ? $this->elements[$index] + $elements = $this->materialize(); + + return array_key_exists($index, $elements) + ? $elements[$index] : $defaultValueIfNotFound; } public function process(): Generator { - yield from $this->elements; + yield from $this->materialize(); + } + + private function materialize(): array + { + if (is_null($this->cache)) { + $elements = $this->source; + foreach ($this->stages as $stage) { + $elements = $stage->apply(elements: $elements); + } + $this->cache = is_array($elements) ? $elements : iterator_to_array($elements); + } + + return $this->cache; } } diff --git a/src/Internal/Operations/Transforming/Filter.php b/src/Internal/Operations/Transforming/Filter.php index cb3c025..7e07862 100644 --- a/src/Internal/Operations/Transforming/Filter.php +++ b/src/Internal/Operations/Transforming/Filter.php @@ -16,13 +16,12 @@ private function __construct(?Closure ...$predicates) { $filtered = array_filter($predicates); - $this->compiledPredicate = match (count($filtered)) { - 0 => static fn(mixed $value, mixed $key): bool => (bool)$value, - default => static fn(mixed $value, mixed $key): bool => array_all( + $this->compiledPredicate = $filtered === [] + ? static fn(mixed $value, mixed $key): bool => (bool)$value + : static fn(mixed $value, mixed $key): bool => array_all( $filtered, static fn(Closure $predicate): bool => $predicate($value, $key) - ), - }; + ); } public static function matching(?Closure ...$predicates): Filter diff --git a/src/Internal/Pipeline.php b/src/Internal/Pipeline.php index c607625..cfc1384 100644 --- a/src/Internal/Pipeline.php +++ b/src/Internal/Pipeline.php @@ -13,6 +13,13 @@ * * The evaluation strategy (lazy or eager) is determined by the * concrete implementation, encapsulating the Strategy pattern. + * + * Complexity notation used throughout this interface: + * + * - n = number of source elements at the time of the terminal call. + * - P = total cost of running all chained stages over n elements (the "fused pass"). For a pipeline + * of pure per-element stages, P is O(n * s) where s is the number of stages. Stages with + * non-linear contributions (e.g., `sort` is O(n log n)) dominate P. */ interface Pipeline { @@ -22,6 +29,9 @@ interface Pipeline * Returns a new pipeline instance containing all previous stages * plus the given operation, preserving immutability. * + * Eager: O(1) time, O(1) space. Appends the stage. Materialization deferred to first terminal access. + * Lazy: O(1) time, O(1) space. Appends the stage without iterating. + * * @param Operation $operation The operation to append as the next stage. * @return Pipeline A new pipeline with the added stage. */ @@ -30,8 +40,9 @@ public function pipe(Operation $operation): Pipeline; /** * Returns the total number of elements in the pipeline. * - * Eager: O(1) time, O(1) space. Direct array count. - * Lazy: O(n) time, O(1) space. Must iterate all elements. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P) per call (must reach the end of the pipeline). O(1) intermediate space. * * @return int The element count. */ @@ -40,8 +51,9 @@ public function count(): int; /** * Returns the first element, or a default if empty. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_first. - * Lazy: O(1) time, O(1) space. Yields once from the pipeline. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_first) per call. Short-circuits at the first emitted element. O(1) intermediate space. * * @param mixed $defaultValueIfNotFound Value returned when empty. * @return mixed The first element or the default. @@ -51,8 +63,9 @@ public function first(mixed $defaultValueIfNotFound = null): mixed; /** * Determines whether the pipeline has no elements. * - * Eager: O(1) time, O(1) space. Checks if the array is empty. - * Lazy: O(1) time, O(1) space. Checks if the generator produces a value. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_first) per call. Short-circuits at the first emitted element. O(1) intermediate space. * * @return bool True if the pipeline is empty. */ @@ -61,8 +74,9 @@ public function isEmpty(): bool; /** * Returns the last element, or a default if empty. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_last. - * Lazy: O(n) time, O(1) space. Must iterate all elements. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P) per call. Must reach the end of the pipeline. O(1) intermediate space. * * @param mixed $defaultValueIfNotFound Value returned when empty. * @return mixed The last element or the default. @@ -72,8 +86,9 @@ public function last(mixed $defaultValueIfNotFound = null): mixed; /** * Returns the element at the given zero-based index. * - * Eager: O(1) time, O(1) space. Direct array access via array_key_exists. - * Lazy: O(n) time, O(1) space. Must iterate up to the index. + * Eager: amortized O(P) on first terminal call. O(1) on subsequent calls (cached). + * O(n) cached space. + * Lazy: O(P_index) per call. Short-circuits at the requested position. O(1) intermediate space. * * @param int $index The zero-based position. * @param mixed $defaultValueIfNotFound Value returned when the index is out of bounds. @@ -84,6 +99,10 @@ public function getBy(int $index, mixed $defaultValueIfNotFound = null): mixed; /** * Executes all accumulated stages and yields the resulting elements. * + * Eager: amortized O(P) on first terminal call. O(n) on subsequent calls (over cached result). + * O(n) cached space. + * Lazy: O(P) per iteration. O(1) intermediate space. + * * @return Generator A generator producing the processed elements. */ public function process(): Generator; diff --git a/tests/EagerCollectionTest.php b/tests/EagerCollectionTest.php index d0f32f5..fbf494f 100644 --- a/tests/EagerCollectionTest.php +++ b/tests/EagerCollectionTest.php @@ -639,6 +639,21 @@ public function testFilterPreservesKeys(): void self::assertSame(['b' => 2, 'c' => 3], $actual->toArray()); } + public function testFilterWithMultiplePredicatesRetainsOnlyMatchingAll(): void + { + /** @Given an eager collection of integers */ + $collection = Collection::createFrom(elements: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + + /** @When filtering with two predicates: greater than 3 and even */ + $actual = $collection->filter( + static fn(int $value): bool => $value > 3, + static fn(int $value): bool => $value % 2 === 0 + ); + + /** @Then only elements satisfying both predicates should remain */ + self::assertSame([4, 6, 8, 10], $actual->toArray(keyPreservation: KeyPreservation::DISCARD)); + } + public function testLastReturnsElement(): void { /** @Given an eager collection with three elements */ diff --git a/tests/LazyCollectionTest.php b/tests/LazyCollectionTest.php index 3d1c409..84b3114 100644 --- a/tests/LazyCollectionTest.php +++ b/tests/LazyCollectionTest.php @@ -613,6 +613,21 @@ public function testFilterPreservesKeys(): void self::assertSame(['b' => 2, 'c' => 3], $actual->toArray()); } + public function testFilterWithMultiplePredicatesRetainsOnlyMatchingAll(): void + { + /** @Given a lazy collection of integers */ + $collection = Collection::createLazyFrom(elements: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + + /** @When filtering with two predicates: greater than 3 and even */ + $actual = $collection->filter( + static fn(int $value): bool => $value > 3, + static fn(int $value): bool => $value % 2 === 0 + ); + + /** @Then only elements satisfying both predicates should remain */ + self::assertSame([4, 6, 8, 10], $actual->toArray(keyPreservation: KeyPreservation::DISCARD)); + } + public function testLastReturnsElement(): void { /** @Given a lazy collection with three elements */ From 7cdc2cf24ea5595e47c849d542a2f07c33dbaf4a Mon Sep 17 00:00:00 2001 From: Gustavo Freze Date: Thu, 16 Apr 2026 10:59:15 -0300 Subject: [PATCH 2/2] refactor: Introduce Materialization class and optimize pipeline operations. --- README.md | 86 ++++++++++++++++++- src/Internal/EagerPipeline.php | 44 +++------- src/Internal/Materialization.php | 37 ++++++++ .../Operations/Transforming/Segment.php | 38 ++++---- tests/EagerCollectionTest.php | 14 ++- tests/LazyCollectionTest.php | 14 ++- 6 files changed, 177 insertions(+), 56 deletions(-) create mode 100644 src/Internal/Materialization.php diff --git a/README.md b/README.md index 821ef0f..5ddd501 100644 --- a/README.md +++ b/README.md @@ -104,10 +104,16 @@ These methods enable adding, removing, and modifying elements in the Collection. * `add`: Returns a new collection with the specified elements appended. ```php - $collection->add(1, 2, 3); + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); + $collection->add(4, 5, 6); ``` ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFromEmpty(); $collection->add('X', 'Y', 'Z'); ``` @@ -116,6 +122,10 @@ These methods enable adding, removing, and modifying elements in the Collection. * `merge`: Merges the elements of another Collectible into the current Collection. ```php + use TinyBlocks\Collection\Collection; + + $collectionA = Collection::createFrom(elements: [1, 2]); + $collectionB = Collection::createFrom(elements: [3, 4]); $collectionA->merge(other: $collectionB); ``` @@ -124,6 +134,9 @@ These methods enable adding, removing, and modifying elements in the Collection. * `remove`: Returns a new collection with all occurrences of the specified element removed. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->remove(element: 1); ``` * `removeAll`: Returns a new collection with elements removed. @@ -131,11 +144,17 @@ These methods enable adding, removing, and modifying elements in the Collection. + **With a predicate**: Removes only the elements that satisfy the given predicate. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: $amounts); $collection->removeAll(predicate: static fn(Amount $amount): bool => $amount->value > 10.0); ``` + **Without a predicate**: Removes all elements from the Collection. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->removeAll(); ``` @@ -150,11 +169,17 @@ These methods enable filtering elements in the Collection based on specific cond + **With predicates**: Retains elements that satisfy the provided predicates. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: $amounts); $collection->filter(predicates: static fn(Amount $amount): bool => $amount->value > 100); ``` + **Without predicates**: Removes all falsy values (e.g., `null`, `false`, `0`, `''`, empty arrays). ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [0, 1, null, 2, '', 3]); $collection->filter(); ``` @@ -176,16 +201,20 @@ These methods enable sorting elements in the Collection based on the specified o By default, `Order::ASCENDING_KEY` is used. ```php + use TinyBlocks\Collection\Collection; use TinyBlocks\Collection\Order; + $collection = Collection::createFrom(elements: [3, 1, 2]); $collection->sort(order: Order::DESCENDING_VALUE); ``` Sort the Collection using a custom comparator to determine how elements should be compared. ```php + use TinyBlocks\Collection\Collection; use TinyBlocks\Collection\Order; + $collection = Collection::createFrom(elements: $amounts); $collection->sort( order: Order::ASCENDING_VALUE, comparator: static fn(Amount $first, Amount $second): int => $first->value <=> $second->value @@ -202,6 +231,9 @@ elements, or finding elements that match a specific condition. * `count`: Returns the total number of elements in the Collection. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->count(); ``` @@ -210,6 +242,9 @@ elements, or finding elements that match a specific condition. * `isEmpty`: Determines whether the collection has no elements. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFromEmpty(); $collection->isEmpty(); ``` @@ -219,6 +254,9 @@ elements, or finding elements that match a specific condition. When called without predicates, it returns `null`. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: $cryptos); $collection->findBy(predicates: static fn(CryptoCurrency $crypto): bool => $crypto->symbol === 'ETH'); ``` @@ -227,16 +265,25 @@ elements, or finding elements that match a specific condition. * `first`: Retrieves the first element from the Collection or returns a default value if the Collection is empty. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->first(defaultValueIfNotFound: 'fallback'); ``` * `getBy`: Retrieves an element by its zero-based index or returns a default value if the index is out of bounds. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->getBy(index: 0, defaultValueIfNotFound: 'fallback'); ``` * `last`: Retrieves the last element from the Collection or returns a default value if the Collection is empty. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->last(defaultValueIfNotFound: 'fallback'); ``` @@ -247,6 +294,9 @@ elements, or finding elements that match a specific condition. If length is not provided or set to -1, it returns all elements from the specified offset to the end. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3, 4, 5]); $collection->slice(offset: 1, length: 2); ``` @@ -260,6 +310,9 @@ These methods enable comparing collections to check for equality or to verify el for objects. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->contains(element: 5); ``` @@ -268,6 +321,10 @@ These methods enable comparing collections to check for equality or to verify el * `equals`: Compares the current Collection with another collection for element-wise equality. ```php + use TinyBlocks\Collection\Collection; + + $collectionA = Collection::createFrom(elements: [1, 2, 3]); + $collectionB = Collection::createFrom(elements: [1, 2, 3]); $collectionA->equals(other: $collectionB); ``` @@ -280,6 +337,9 @@ combining elements. initial value. This method is helpful for accumulating results, like summing or concatenating values. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [10.0, 20.0, 30.0]); $collection->reduce( accumulator: static fn(float $carry, float $amount): float => $carry + $amount, initial: 0.0 @@ -288,6 +348,9 @@ combining elements. * `joinToString`: Joins all elements into a string with the given separator. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: ['a', 'b', 'c']); $collection->joinToString(separator: ', '); ``` @@ -301,7 +364,13 @@ These methods allow the Collection's elements to be transformed or converted int The method is helpful for performing side effects, such as logging or accumulating values. ```php - $collection->each(actions: static fn(Amount $amount): void => $total += $amount->value); + use TinyBlocks\Collection\Collection; + + $total = 0.0; + $collection = Collection::createFrom(elements: $amounts); + $collection->each(actions: static function (Amount $amount) use (&$total): void { + $total += $amount->value; + }); ``` #### Grouping elements @@ -309,6 +378,9 @@ These methods allow the Collection's elements to be transformed or converted int * `groupBy`: Groups the elements in the Collection based on the provided classifier. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: $amounts); $collection->groupBy(classifier: static fn(Amount $amount): string => $amount->currency->name); ``` @@ -318,6 +390,9 @@ These methods allow the Collection's elements to be transformed or converted int elements. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->map(transformations: static fn(int $value): int => $value * 2); ``` @@ -326,6 +401,9 @@ These methods allow the Collection's elements to be transformed or converted int * `flatten`: Flattens nested iterables by exactly one level. Non-iterable elements are yielded as-is. ```php + use TinyBlocks\Collection\Collection; + + $collection = Collection::createFrom(elements: [[1, 2], [3, 4], 5]); $collection->flatten(); ``` @@ -341,8 +419,10 @@ These methods allow the Collection's elements to be transformed or converted int By default, `KeyPreservation::PRESERVE` is used. ```php + use TinyBlocks\Collection\Collection; use TinyBlocks\Mapper\KeyPreservation; + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->toArray(keyPreservation: KeyPreservation::DISCARD); ``` @@ -358,8 +438,10 @@ These methods allow the Collection's elements to be transformed or converted int By default, `KeyPreservation::PRESERVE` is used. ```php + use TinyBlocks\Collection\Collection; use TinyBlocks\Mapper\KeyPreservation; + $collection = Collection::createFrom(elements: [1, 2, 3]); $collection->toJson(keyPreservation: KeyPreservation::DISCARD); ``` diff --git a/src/Internal/EagerPipeline.php b/src/Internal/EagerPipeline.php index 7ec4c75..da9a945 100644 --- a/src/Internal/EagerPipeline.php +++ b/src/Internal/EagerPipeline.php @@ -8,51 +8,46 @@ use Generator; use TinyBlocks\Collection\Internal\Operations\Operation; -final class EagerPipeline implements Pipeline +final readonly class EagerPipeline implements Pipeline { - private ?array $cache = null; - - private function __construct( - private readonly array $source, - private readonly array $stages = [] - ) { + private function __construct(private Materialization $materialization) + { } public static function from(iterable $source): EagerPipeline { $elements = is_array($source) ? $source : iterator_to_array($source); - return new EagerPipeline(source: $elements); + return new EagerPipeline(materialization: Materialization::from(source: $elements, stages: [])); } public static function fromClosure(Closure $factory): EagerPipeline { $elements = iterator_to_array($factory()); - return new EagerPipeline(source: $elements); + return new EagerPipeline(materialization: Materialization::from(source: $elements, stages: [])); } public function pipe(Operation $operation): Pipeline { - $stages = $this->stages; - $stages[] = $operation; + $elements = $this->materialization->elements(); - return new EagerPipeline(source: $this->source, stages: $stages); + return new EagerPipeline(materialization: Materialization::from(source: $elements, stages: [$operation])); } public function count(): int { - return count($this->materialize()); + return count($this->materialization->elements()); } public function isEmpty(): bool { - return $this->materialize() === []; + return $this->materialization->elements() === []; } public function first(mixed $defaultValueIfNotFound = null): mixed { - $elements = $this->materialize(); + $elements = $this->materialization->elements(); return $elements === [] ? $defaultValueIfNotFound @@ -61,7 +56,7 @@ public function first(mixed $defaultValueIfNotFound = null): mixed public function last(mixed $defaultValueIfNotFound = null): mixed { - $elements = $this->materialize(); + $elements = $this->materialization->elements(); return $elements === [] ? $defaultValueIfNotFound @@ -70,7 +65,7 @@ public function last(mixed $defaultValueIfNotFound = null): mixed public function getBy(int $index, mixed $defaultValueIfNotFound = null): mixed { - $elements = $this->materialize(); + $elements = $this->materialization->elements(); return array_key_exists($index, $elements) ? $elements[$index] @@ -79,19 +74,6 @@ public function getBy(int $index, mixed $defaultValueIfNotFound = null): mixed public function process(): Generator { - yield from $this->materialize(); - } - - private function materialize(): array - { - if (is_null($this->cache)) { - $elements = $this->source; - foreach ($this->stages as $stage) { - $elements = $stage->apply(elements: $elements); - } - $this->cache = is_array($elements) ? $elements : iterator_to_array($elements); - } - - return $this->cache; + yield from $this->materialization->elements(); } } diff --git a/src/Internal/Materialization.php b/src/Internal/Materialization.php new file mode 100644 index 0000000..301206a --- /dev/null +++ b/src/Internal/Materialization.php @@ -0,0 +1,37 @@ +cache)) { + $elements = $this->source; + + foreach ($this->stages as $stage) { + /** @var Operation $stage */ + $elements = $stage->apply(elements: $elements); + } + + $this->cache = is_array($elements) ? $elements : iterator_to_array($elements); + } + + return $this->cache; + } +} diff --git a/src/Internal/Operations/Transforming/Segment.php b/src/Internal/Operations/Transforming/Segment.php index b8c8f62..48fb5a6 100644 --- a/src/Internal/Operations/Transforming/Segment.php +++ b/src/Internal/Operations/Transforming/Segment.php @@ -26,7 +26,23 @@ public function apply(iterable $elements): Generator } if ($this->length < -1) { - yield from $this->withTrailingBuffer($elements); + $buffer = new SplQueue(); + $skipFromEnd = abs($this->length); + $currentIndex = 0; + + foreach ($elements as $key => $value) { + if ($currentIndex++ < $this->offset) { + continue; + } + + $buffer->enqueue([$key, $value]); + + if ($buffer->count() > $skipFromEnd) { + [$yieldKey, $yieldValue] = $buffer->dequeue(); + yield $yieldKey => $yieldValue; + } + } + return; } @@ -46,24 +62,4 @@ public function apply(iterable $elements): Generator } } } - - private function withTrailingBuffer(iterable $elements): Generator - { - $buffer = new SplQueue(); - $skipFromEnd = abs($this->length); - $currentIndex = 0; - - foreach ($elements as $key => $value) { - if ($currentIndex++ < $this->offset) { - continue; - } - - $buffer->enqueue([$key, $value]); - - if ($buffer->count() > $skipFromEnd) { - [$yieldKey, $yieldValue] = $buffer->dequeue(); - yield $yieldKey => $yieldValue; - } - } - } } diff --git a/tests/EagerCollectionTest.php b/tests/EagerCollectionTest.php index fbf494f..05a3b64 100644 --- a/tests/EagerCollectionTest.php +++ b/tests/EagerCollectionTest.php @@ -1070,9 +1070,21 @@ public function testChainedOperationsWithIntegers(): void /** @And the last element should be 4 (square of 2) */ self::assertSame(4, $actual->last()); + } + + public function testReduceOverChainedOperationsWithIntegers(): void + { + /** @Given an eager collection of integers from 1 to 100 */ + $collection = Collection::createFrom(elements: range(1, 100)); + + /** @And the collection is filtered to even numbers, squared, and sorted descending */ + $pipeline = $collection + ->filter(predicates: static fn(int $value): bool => $value % 2 === 0) + ->map(transformations: static fn(int $value): int => $value ** 2) + ->sort(order: Order::DESCENDING_VALUE); /** @When reducing to calculate the sum of all squared even numbers */ - $sum = $actual->reduce( + $sum = $pipeline->reduce( accumulator: static fn(int $carry, int $value): int => $carry + $value, initial: 0 ); diff --git a/tests/LazyCollectionTest.php b/tests/LazyCollectionTest.php index 84b3114..2f6fff1 100644 --- a/tests/LazyCollectionTest.php +++ b/tests/LazyCollectionTest.php @@ -1044,9 +1044,21 @@ public function testChainedOperationsWithIntegers(): void /** @And the last element should be 4 (square of 2) */ self::assertSame(4, $actual->last()); + } + + public function testReduceOverChainedOperationsWithIntegers(): void + { + /** @Given a lazy collection of integers from 1 to 100 */ + $collection = Collection::createLazyFrom(elements: range(1, 100)); + + /** @And the collection is filtered to even numbers, squared, and sorted descending */ + $pipeline = $collection + ->filter(predicates: static fn(int $value): bool => $value % 2 === 0) + ->map(transformations: static fn(int $value): int => $value ** 2) + ->sort(order: Order::DESCENDING_VALUE); /** @When reducing to calculate the sum of all squared even numbers */ - $sum = $actual->reduce( + $sum = $pipeline->reduce( accumulator: static fn(int $carry, int $value): int => $carry + $value, initial: 0 );