From 44fe4eaaa514c81a1236761e8c0966e016b272a2 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 04:33:06 +0000 Subject: [PATCH 1/3] feat: add Query::fingerprint() for shape-only query hashing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute a deterministic hash of query structure (method + attribute) with values excluded. Useful for grouping queries by pattern — e.g. slow-query analytics where two queries with the same shape but different parameter values should count as the same pattern. Accepts raw query strings or parsed Query objects. Order-independent. --- src/Query/Query.php | 32 ++++++++++++++++++++++++++++++++ tests/Query/QueryTest.php | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/src/Query/Query.php b/src/Query/Query.php index 3af237f..b38e51e 100644 --- a/src/Query/Query.php +++ b/src/Query/Query.php @@ -449,6 +449,38 @@ public static function parseQueries(array $queries): array return $parsed; } + /** + * Compute a shape-only fingerprint of an array of queries. + * + * The fingerprint captures the structure of the queries — method and + * attribute — without values. Two query sets with the same shape but + * different parameter values produce the same fingerprint, which is + * useful for pattern-based counting and slow-query grouping. + * + * Accepts either raw query strings or parsed Query objects. + * + * @param array $queries + * @return string md5 hash of the canonical shape + * + * @throws QueryException + */ + public static function fingerprint(array $queries): string + { + $shapes = []; + + foreach ($queries as $query) { + if (\is_string($query)) { + $query = static::parse($query); + } + + $shapes[] = $query->getMethod().':'.$query->getAttribute(); + } + + \sort($shapes); + + return \md5(\implode('|', $shapes)); + } + /** * @return array */ diff --git a/tests/Query/QueryTest.php b/tests/Query/QueryTest.php index 1fb05bd..31389ed 100644 --- a/tests/Query/QueryTest.php +++ b/tests/Query/QueryTest.php @@ -138,4 +138,39 @@ public function testEmptyValues(): void $query = Query::equal('name', []); $this->assertEquals([], $query->getValues()); } + + public function testFingerprint(): void + { + $equalAlice = '{"method":"equal","attribute":"name","values":["Alice"]}'; + $equalBob = '{"method":"equal","attribute":"name","values":["Bob"]}'; + $equalEmail = '{"method":"equal","attribute":"email","values":["a@b.c"]}'; + $notEqualAlice = '{"method":"notEqual","attribute":"name","values":["Alice"]}'; + $gtAge18 = '{"method":"greaterThan","attribute":"age","values":[18]}'; + $gtAge42 = '{"method":"greaterThan","attribute":"age","values":[42]}'; + + // Same shape, different values produce the same fingerprint + $a = Query::fingerprint([$equalAlice, $gtAge18]); + $b = Query::fingerprint([$equalBob, $gtAge42]); + $this->assertSame($a, $b); + + // Different attribute produces different fingerprint + $c = Query::fingerprint([$equalEmail, $gtAge18]); + $this->assertNotSame($a, $c); + + // Different method produces different fingerprint + $d = Query::fingerprint([$notEqualAlice, $gtAge18]); + $this->assertNotSame($a, $d); + + // Order-independent + $e = Query::fingerprint([$gtAge18, $equalAlice]); + $this->assertSame($a, $e); + + // Accepts parsed Query objects + $parsed = [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]; + $f = Query::fingerprint($parsed); + $this->assertSame($a, $f); + + // Empty array returns deterministic hash + $this->assertSame(\md5(''), Query::fingerprint([])); + } } From bd6af2e98850e15d73d3bcd28bf5751eafbcf433 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 04:44:42 +0000 Subject: [PATCH 2/3] fix: recurse into logical queries in fingerprint Previously all `and(...)`, `or(...)`, and `elemMatch(...)` queries hashed as `and:` / `or:` / `elemMatch:` regardless of their child shapes, defeating the purpose of fingerprinting for slow-query pattern grouping. The helper now recurses into logical query children, producing canonical shapes like `and(equal:name|greaterThan:age)`. Invalid array elements (non-string, non-Query) throw a QueryException instead of a fatal PHP error. Added tests for nested AND/OR differentiation, child-order independence, and rejection of invalid elements. --- src/Query/Query.php | 44 ++++++++++++++++++++++++++++++++------- tests/Query/QueryTest.php | 32 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/Query/Query.php b/src/Query/Query.php index b38e51e..a0b3e56 100644 --- a/src/Query/Query.php +++ b/src/Query/Query.php @@ -457,12 +457,16 @@ public static function parseQueries(array $queries): array * different parameter values produce the same fingerprint, which is * useful for pattern-based counting and slow-query grouping. * + * Logical queries (`and`, `or`, `elemMatch`) are recursively fingerprinted + * so their inner structure contributes to the hash — two `and(...)` + * queries with different child shapes produce different fingerprints. + * * Accepts either raw query strings or parsed Query objects. * - * @param array $queries + * @param array $queries raw query strings or Query instances * @return string md5 hash of the canonical shape * - * @throws QueryException + * @throws QueryException if an element is neither a string nor a Query */ public static function fingerprint(array $queries): string { @@ -473,7 +477,11 @@ public static function fingerprint(array $queries): string $query = static::parse($query); } - $shapes[] = $query->getMethod().':'.$query->getAttribute(); + if (! $query instanceof self) { + throw new QueryException('Invalid query element for fingerprint: expected string or Query instance'); + } + + $shapes[] = self::queryShape($query); } \sort($shapes); @@ -481,6 +489,28 @@ public static function fingerprint(array $queries): string return \md5(\implode('|', $shapes)); } + /** + * Canonical shape string for a single Query — recursive for logical types. + */ + private static function queryShape(self $query): string + { + $method = $query->getMethod(); + + if (\in_array($method, self::LOGICAL_TYPES, true)) { + $childShapes = []; + foreach ($query->getValues() as $child) { + if ($child instanceof self) { + $childShapes[] = self::queryShape($child); + } + } + \sort($childShapes); + + return $method.'('.\implode('|', $childShapes).')'; + } + + return $method.':'.$query->getAttribute(); + } + /** * @return array */ @@ -856,12 +886,12 @@ public static function getCursorQueries(array $queries, bool $clone = true): arr * * @param array $queries * @return array{ - * filters: array, - * selections: array, + * filters: list, + * selections: list, * limit: int|null, * offset: int|null, - * orderAttributes: array, - * orderTypes: array, + * orderAttributes: list, + * orderTypes: list, * cursor: mixed, * cursorDirection: string|null * } diff --git a/tests/Query/QueryTest.php b/tests/Query/QueryTest.php index 31389ed..d943aeb 100644 --- a/tests/Query/QueryTest.php +++ b/tests/Query/QueryTest.php @@ -173,4 +173,36 @@ public function testFingerprint(): void // Empty array returns deterministic hash $this->assertSame(\md5(''), Query::fingerprint([])); } + + public function testFingerprintNestedLogicalQueries(): void + { + // AND queries with different inner shapes produce different fingerprints + $andEqName = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice'])]); + $andEqEmail = new Query(Query::TYPE_AND, '', [Query::equal('email', ['a@b.c'])]); + $this->assertNotSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqEmail])); + + // AND queries with same inner shape produce the same fingerprint (values differ) + $andEqNameBob = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqNameBob])); + + // Order of children inside a logical query does not matter + $andA = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $andB = new Query(Query::TYPE_AND, '', [Query::greaterThan('age', 42), Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andA]), Query::fingerprint([$andB])); + + // AND of two filters differs from OR of the same two filters + $orA = new Query(Query::TYPE_OR, '', [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andA]), Query::fingerprint([$orA])); + + // AND with one child differs from AND with two children + $andOne = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice'])]); + $andTwo = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo])); + } + + public function testFingerprintRejectsInvalidElements(): void + { + $this->expectException(\Utopia\Query\Exception::class); + Query::fingerprint([42]); + } } From 4594162a0031b1b86b7ba270a5b1463889c873d7 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 20 Apr 2026 05:20:32 +0000 Subject: [PATCH 3/3] fix: include elemMatch attribute in fingerprint shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logical queries and/or have an empty attribute, but elemMatch carries the field name being matched. Without including the attribute, elemMatch('tags', [...]) and elemMatch('categories', [...]) with the same inner shape would hash identically. Canonical shape is now `method:attribute(child1|child2)` for logical types — and/or are unaffected (attribute empty), elemMatch preserves the field. --- src/Query/Query.php | 3 ++- tests/Query/QueryTest.php | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Query/Query.php b/src/Query/Query.php index a0b3e56..624717c 100644 --- a/src/Query/Query.php +++ b/src/Query/Query.php @@ -505,7 +505,8 @@ private static function queryShape(self $query): string } \sort($childShapes); - return $method.'('.\implode('|', $childShapes).')'; + // Attribute is empty for and/or; meaningful for elemMatch (the field being matched). + return $method.':'.$query->getAttribute().'('.\implode('|', $childShapes).')'; } return $method.':'.$query->getAttribute(); diff --git a/tests/Query/QueryTest.php b/tests/Query/QueryTest.php index d943aeb..a3820df 100644 --- a/tests/Query/QueryTest.php +++ b/tests/Query/QueryTest.php @@ -198,6 +198,15 @@ public function testFingerprintNestedLogicalQueries(): void $andOne = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice'])]); $andTwo = new Query(Query::TYPE_AND, '', [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); $this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo])); + + // elemMatch attribute matters: same inner shape on different fields must NOT collide + $elemTags = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]); + $elemCategories = new Query(Query::TYPE_ELEM_MATCH, 'categories', [Query::equal('name', ['php'])]); + $this->assertNotSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemCategories])); + + // elemMatch values-only change (same field, same child shape) still collides — as expected + $elemTagsOther = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['js'])]); + $this->assertSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemTagsOther])); } public function testFingerprintRejectsInvalidElements(): void