From d8c618536728003b8d53597f91a5e40b47c79d70 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 03:57:12 +0000 Subject: [PATCH 1/4] feat: add Query::fingerprint() for shape-only query hashing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute a deterministic hash of query structure (method + attribute) with values excluded. Useful for grouping queries by pattern — e.g. slow-query analytics where two queries with the same shape but different parameter values should count as the same pattern. Accepts raw query strings or parsed Query objects. Order-independent. --- src/Database/Query.php | 35 +++++++++++++++++++++++++++++++++++ tests/unit/QueryTest.php | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/Database/Query.php b/src/Database/Query.php index 686a6ab37..267368399 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -419,6 +419,41 @@ public static function parseQueries(array $queries): array return $parsed; } + /** + * Compute a shape-only fingerprint of an array of queries. + * + * The fingerprint captures the structure of the queries — method and + * attribute — without values. Two query sets with the same shape but + * different parameter values produce the same fingerprint, which is + * useful for pattern-based counting and slow-query grouping. + * + * Accepts either raw query strings or parsed Query objects. + * + * @param array $queries + * @return string md5 hash of the canonical shape + * @throws QueryException + */ + public static function fingerprint(array $queries): string + { + $shapes = []; + + foreach ($queries as $query) { + if (\is_string($query)) { + $query = self::parse($query); + } + + if (!$query instanceof self) { + continue; + } + + $shapes[] = $query->getMethod() . ':' . $query->getAttribute(); + } + + \sort($shapes); + + return \md5(\implode('|', $shapes)); + } + /** * @return array */ diff --git a/tests/unit/QueryTest.php b/tests/unit/QueryTest.php index e23193ecb..be0f7511a 100644 --- a/tests/unit/QueryTest.php +++ b/tests/unit/QueryTest.php @@ -468,4 +468,39 @@ public function testNewQueryTypesInTypesArray(): void $this->assertContains(Query::TYPE_NOT_BETWEEN, Query::TYPES); $this->assertContains(Query::TYPE_ORDER_RANDOM, Query::TYPES); } + + public function testFingerprint(): void + { + $equalAlice = '{"method":"equal","attribute":"name","values":["Alice"]}'; + $equalBob = '{"method":"equal","attribute":"name","values":["Bob"]}'; + $equalEmail = '{"method":"equal","attribute":"email","values":["a@b.c"]}'; + $notEqualAlice = '{"method":"notEqual","attribute":"name","values":["Alice"]}'; + $gtAge18 = '{"method":"greaterThan","attribute":"age","values":[18]}'; + $gtAge42 = '{"method":"greaterThan","attribute":"age","values":[42]}'; + + // Same shape, different values produce the same fingerprint + $a = Query::fingerprint([$equalAlice, $gtAge18]); + $b = Query::fingerprint([$equalBob, $gtAge42]); + $this->assertSame($a, $b); + + // Different attribute produces different fingerprint + $c = Query::fingerprint([$equalEmail, $gtAge18]); + $this->assertNotSame($a, $c); + + // Different method produces different fingerprint + $d = Query::fingerprint([$notEqualAlice, $gtAge18]); + $this->assertNotSame($a, $d); + + // Order-independent + $e = Query::fingerprint([$gtAge18, $equalAlice]); + $this->assertSame($a, $e); + + // Accepts parsed Query objects + $parsed = [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]; + $f = Query::fingerprint($parsed); + $this->assertSame($a, $f); + + // Empty array returns deterministic hash + $this->assertSame(\md5(''), Query::fingerprint([])); + } } From 7661f0dbc2c87c45b2053a7a3d94424e8319aa79 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 04:42:40 +0000 Subject: [PATCH 2/4] fix: recurse into logical queries in fingerprint Previously all `and(...)`, `or(...)`, and `elemMatch(...)` queries hashed as `and:` / `or:` / `elemMatch:` regardless of their child shapes, defeating the purpose of fingerprinting for slow-query pattern grouping. The helper now recurses into logical query children, producing canonical shapes like `and(equal:name|greaterThan:age)`. Invalid array elements (non-string, non-Query) throw a QueryException instead of a fatal PHP error. Added tests for nested AND/OR differentiation, child-order independence, and rejection of invalid elements. --- src/Database/Query.php | 36 ++++++++++++++++++++++++++++++++---- tests/unit/QueryTest.php | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/Database/Query.php b/src/Database/Query.php index 267368399..10dfe91c4 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -427,11 +427,15 @@ public static function parseQueries(array $queries): array * different parameter values produce the same fingerprint, which is * useful for pattern-based counting and slow-query grouping. * + * Logical queries (`and`, `or`, `elemMatch`) are recursively fingerprinted + * so their inner structure contributes to the hash — two `and(...)` + * queries with different child shapes produce different fingerprints. + * * Accepts either raw query strings or parsed Query objects. * - * @param array $queries + * @param array $queries raw query strings or Query instances * @return string md5 hash of the canonical shape - * @throws QueryException + * @throws QueryException if an element is neither a string nor a Query */ public static function fingerprint(array $queries): string { @@ -443,10 +447,10 @@ public static function fingerprint(array $queries): string } if (!$query instanceof self) { - continue; + throw new QueryException('Invalid query element for fingerprint: expected string or Query instance'); } - $shapes[] = $query->getMethod() . ':' . $query->getAttribute(); + $shapes[] = self::queryShape($query); } \sort($shapes); @@ -454,6 +458,30 @@ public static function fingerprint(array $queries): string return \md5(\implode('|', $shapes)); } + /** + * Canonical shape string for a single Query — recursive for logical types. + * + * @param Query $query + * @return string + */ + private static function queryShape(self $query): string + { + $method = $query->getMethod(); + + if (\in_array($method, self::LOGICAL_TYPES, true)) { + $childShapes = []; + foreach ($query->getValues() as $child) { + if ($child instanceof self) { + $childShapes[] = self::queryShape($child); + } + } + \sort($childShapes); + return $method . '(' . \implode('|', $childShapes) . ')'; + } + + return $method . ':' . $query->getAttribute(); + } + /** * @return array */ diff --git a/tests/unit/QueryTest.php b/tests/unit/QueryTest.php index be0f7511a..698099b84 100644 --- a/tests/unit/QueryTest.php +++ b/tests/unit/QueryTest.php @@ -503,4 +503,36 @@ public function testFingerprint(): void // Empty array returns deterministic hash $this->assertSame(\md5(''), Query::fingerprint([])); } + + public function testFingerprintNestedLogicalQueries(): void + { + // AND queries with different inner shapes produce different fingerprints + $andEqName = Query::and([Query::equal('name', ['Alice'])]); + $andEqEmail = Query::and([Query::equal('email', ['a@b.c'])]); + $this->assertNotSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqEmail])); + + // AND queries with same inner shape produce the same fingerprint (values differ) + $andEqNameBob = Query::and([Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqNameBob])); + + // Order of children inside a logical query does not matter + $andA = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $andB = Query::and([Query::greaterThan('age', 42), Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andA]), Query::fingerprint([$andB])); + + // AND of two filters differs from OR of the same two filters + $orA = Query::or([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andA]), Query::fingerprint([$orA])); + + // AND with one child differs from AND with two children + $andOne = Query::and([Query::equal('name', ['Alice'])]); + $andTwo = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo])); + } + + public function testFingerprintRejectsInvalidElements(): void + { + $this->expectException(QueryException::class); + Query::fingerprint([42]); + } } From eb4feeeb8f1d9566f60f75016c06464b086e131d Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 05:20:27 +0000 Subject: [PATCH 3/4] fix: include elemMatch attribute in fingerprint shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logical queries and/or have an empty attribute, but elemMatch carries the field name being matched. Without including the attribute, elemMatch('tags', [...]) and elemMatch('categories', [...]) with the same inner shape would hash identically. Canonical shape is now `method:attribute(child1|child2)` for logical types — and/or are unaffected (attribute empty), elemMatch preserves the field. --- src/Database/Query.php | 3 ++- tests/unit/QueryTest.php | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Database/Query.php b/src/Database/Query.php index 10dfe91c4..c5b7b9de5 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -476,7 +476,8 @@ private static function queryShape(self $query): string } } \sort($childShapes); - return $method . '(' . \implode('|', $childShapes) . ')'; + // Attribute is empty for and/or; meaningful for elemMatch (the field being matched). + return $method . ':' . $query->getAttribute() . '(' . \implode('|', $childShapes) . ')'; } return $method . ':' . $query->getAttribute(); diff --git a/tests/unit/QueryTest.php b/tests/unit/QueryTest.php index 698099b84..358c1a659 100644 --- a/tests/unit/QueryTest.php +++ b/tests/unit/QueryTest.php @@ -528,6 +528,15 @@ public function testFingerprintNestedLogicalQueries(): void $andOne = Query::and([Query::equal('name', ['Alice'])]); $andTwo = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); $this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo])); + + // elemMatch attribute matters: same inner shape on different fields must NOT collide + $elemTags = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]); + $elemCategories = new Query(Query::TYPE_ELEM_MATCH, 'categories', [Query::equal('name', ['php'])]); + $this->assertNotSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemCategories])); + + // elemMatch values-only change (same field, same child shape) still collides — as expected + $elemTagsOther = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['js'])]); + $this->assertSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemTagsOther])); } public function testFingerprintRejectsInvalidElements(): void From f16c7ee2e2f318fb7ca9e692bfd5487f977b6b9a Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 05:57:24 +0000 Subject: [PATCH 4/4] refactor: expose shape() as an iterative instance method Per review: make the canonical-shape helper a public instance method on Query (`shape()`) rather than a private static inside `fingerprint()`, and replace the recursive walk with an iterative stack-based post-order traversal. Added test covering leaf, logical, elemMatch, and 4-level-deep nested shapes to verify iterative equivalence with the previous recursive version. --- src/Database/Query.php | 58 +++++++++++++++++++++++++++++++--------- tests/unit/QueryTest.php | 31 +++++++++++++++++++++ 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/src/Database/Query.php b/src/Database/Query.php index c5b7b9de5..d19b84259 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -427,9 +427,9 @@ public static function parseQueries(array $queries): array * different parameter values produce the same fingerprint, which is * useful for pattern-based counting and slow-query grouping. * - * Logical queries (`and`, `or`, `elemMatch`) are recursively fingerprinted - * so their inner structure contributes to the hash — two `and(...)` - * queries with different child shapes produce different fingerprints. + * Logical queries (`and`, `or`, `elemMatch`) contribute their inner + * structure to the hash via `Query::shape()` — two `and(...)` queries + * with different child shapes produce different fingerprints. * * Accepts either raw query strings or parsed Query objects. * @@ -450,7 +450,7 @@ public static function fingerprint(array $queries): string throw new QueryException('Invalid query element for fingerprint: expected string or Query instance'); } - $shapes[] = self::queryShape($query); + $shapes[] = $query->shape(); } \sort($shapes); @@ -459,28 +459,60 @@ public static function fingerprint(array $queries): string } /** - * Canonical shape string for a single Query — recursive for logical types. + * Canonical shape string for this Query — values excluded. + * + * Non-logical queries produce `method:attribute`. Logical queries + * (`and`, `or`, `elemMatch`) produce `method:attribute(child1|child2|…)` + * with children sorted so child order does not affect the shape. + * + * Implemented iteratively: walks the tree into a preorder list via a + * stack, then processes the reversed list so each node's children are + * always resolved before the node itself. * - * @param Query $query * @return string */ - private static function queryShape(self $query): string + public function shape(): string { - $method = $query->getMethod(); + // 1. Preorder flatten the tree. + $nodes = []; + $stack = [$this]; + while ($stack) { + /** @var self $node */ + $node = \array_pop($stack); + $nodes[] = $node; + + if (!\in_array($node->method, self::LOGICAL_TYPES, true)) { + continue; + } + foreach ($node->values as $child) { + if ($child instanceof self) { + $stack[] = $child; + } + } + } + + // 2. Process reversed so children are always shaped before parents. + $shapes = []; + foreach (\array_reverse($nodes) as $node) { + $id = \spl_object_id($node); + + if (!\in_array($node->method, self::LOGICAL_TYPES, true)) { + $shapes[$id] = $node->method . ':' . $node->attribute; + continue; + } - if (\in_array($method, self::LOGICAL_TYPES, true)) { $childShapes = []; - foreach ($query->getValues() as $child) { + foreach ($node->values as $child) { if ($child instanceof self) { - $childShapes[] = self::queryShape($child); + $childShapes[] = $shapes[\spl_object_id($child)]; } } \sort($childShapes); // Attribute is empty for and/or; meaningful for elemMatch (the field being matched). - return $method . ':' . $query->getAttribute() . '(' . \implode('|', $childShapes) . ')'; + $shapes[$id] = $node->method . ':' . $node->attribute . '(' . \implode('|', $childShapes) . ')'; } - return $method . ':' . $query->getAttribute(); + return $shapes[\spl_object_id($this)]; } /** diff --git a/tests/unit/QueryTest.php b/tests/unit/QueryTest.php index 358c1a659..0f1f69726 100644 --- a/tests/unit/QueryTest.php +++ b/tests/unit/QueryTest.php @@ -544,4 +544,35 @@ public function testFingerprintRejectsInvalidElements(): void $this->expectException(QueryException::class); Query::fingerprint([42]); } + + public function testShape(): void + { + // Leaf queries + $this->assertSame('equal:name', Query::equal('name', ['Alice'])->shape()); + $this->assertSame('greaterThan:age', Query::greaterThan('age', 18)->shape()); + + // Logical with empty attribute + $and = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertSame('and:(equal:name|greaterThan:age)', $and->shape()); + + // elemMatch preserves the attribute (the field being matched) + $elem = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]); + $this->assertSame('elemMatch:tags(equal:name)', $elem->shape()); + + // Deeply nested — iterative traversal must match recursive result + $deep = Query::and([ + Query::or([ + Query::equal('a', ['x']), + Query::and([ + Query::equal('b', ['y']), + Query::lessThan('c', 5), + ]), + ]), + Query::greaterThan('d', 10), + ]); + $this->assertSame( + 'and:(greaterThan:d|or:(and:(equal:b|lessThan:c)|equal:a))', + $deep->shape(), + ); + } }