diff --git a/src/Database/Query.php b/src/Database/Query.php index 686a6ab37..d19b84259 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -419,6 +419,102 @@ public static function parseQueries(array $queries): array return $parsed; } + /** + * Compute a shape-only fingerprint of an array of queries. + * + * The fingerprint captures the structure of the queries — method and + * attribute — without values. Two query sets with the same shape but + * different parameter values produce the same fingerprint, which is + * useful for pattern-based counting and slow-query grouping. + * + * Logical queries (`and`, `or`, `elemMatch`) contribute their inner + * structure to the hash via `Query::shape()` — two `and(...)` queries + * with different child shapes produce different fingerprints. + * + * Accepts either raw query strings or parsed Query objects. + * + * @param array $queries raw query strings or Query instances + * @return string md5 hash of the canonical shape + * @throws QueryException if an element is neither a string nor a Query + */ + public static function fingerprint(array $queries): string + { + $shapes = []; + + foreach ($queries as $query) { + if (\is_string($query)) { + $query = self::parse($query); + } + + if (!$query instanceof self) { + throw new QueryException('Invalid query element for fingerprint: expected string or Query instance'); + } + + $shapes[] = $query->shape(); + } + + \sort($shapes); + + return \md5(\implode('|', $shapes)); + } + + /** + * Canonical shape string for this Query — values excluded. + * + * Non-logical queries produce `method:attribute`. Logical queries + * (`and`, `or`, `elemMatch`) produce `method:attribute(child1|child2|…)` + * with children sorted so child order does not affect the shape. + * + * Implemented iteratively: walks the tree into a preorder list via a + * stack, then processes the reversed list so each node's children are + * always resolved before the node itself. + * + * @return string + */ + public function shape(): string + { + // 1. Preorder flatten the tree. + $nodes = []; + $stack = [$this]; + while ($stack) { + /** @var self $node */ + $node = \array_pop($stack); + $nodes[] = $node; + + if (!\in_array($node->method, self::LOGICAL_TYPES, true)) { + continue; + } + foreach ($node->values as $child) { + if ($child instanceof self) { + $stack[] = $child; + } + } + } + + // 2. Process reversed so children are always shaped before parents. + $shapes = []; + foreach (\array_reverse($nodes) as $node) { + $id = \spl_object_id($node); + + if (!\in_array($node->method, self::LOGICAL_TYPES, true)) { + $shapes[$id] = $node->method . ':' . $node->attribute; + continue; + } + + $childShapes = []; + foreach ($node->values as $child) { + if ($child instanceof self) { + $childShapes[] = $shapes[\spl_object_id($child)]; + } + } + \sort($childShapes); + // Attribute is empty for and/or; meaningful for elemMatch (the field being matched). + $shapes[$id] = $node->method . ':' . $node->attribute . '(' . \implode('|', $childShapes) . ')'; + } + + return $shapes[\spl_object_id($this)]; + } + /** * @return array */ diff --git a/tests/unit/QueryTest.php b/tests/unit/QueryTest.php index e23193ecb..0f1f69726 100644 --- a/tests/unit/QueryTest.php +++ b/tests/unit/QueryTest.php @@ -468,4 +468,111 @@ public function testNewQueryTypesInTypesArray(): void $this->assertContains(Query::TYPE_NOT_BETWEEN, Query::TYPES); $this->assertContains(Query::TYPE_ORDER_RANDOM, Query::TYPES); } + + public function testFingerprint(): void + { + $equalAlice = '{"method":"equal","attribute":"name","values":["Alice"]}'; + $equalBob = '{"method":"equal","attribute":"name","values":["Bob"]}'; + $equalEmail = '{"method":"equal","attribute":"email","values":["a@b.c"]}'; + $notEqualAlice = '{"method":"notEqual","attribute":"name","values":["Alice"]}'; + $gtAge18 = '{"method":"greaterThan","attribute":"age","values":[18]}'; + $gtAge42 = '{"method":"greaterThan","attribute":"age","values":[42]}'; + + // Same shape, different values produce the same fingerprint + $a = Query::fingerprint([$equalAlice, $gtAge18]); + $b = Query::fingerprint([$equalBob, $gtAge42]); + $this->assertSame($a, $b); + + // Different attribute produces different fingerprint + $c = Query::fingerprint([$equalEmail, $gtAge18]); + $this->assertNotSame($a, $c); + + // Different method produces different fingerprint + $d = Query::fingerprint([$notEqualAlice, $gtAge18]); + $this->assertNotSame($a, $d); + + // Order-independent + $e = Query::fingerprint([$gtAge18, $equalAlice]); + $this->assertSame($a, $e); + + // Accepts parsed Query objects + $parsed = [Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]; + $f = Query::fingerprint($parsed); + $this->assertSame($a, $f); + + // Empty array returns deterministic hash + $this->assertSame(\md5(''), Query::fingerprint([])); + } + + public function testFingerprintNestedLogicalQueries(): void + { + // AND queries with different inner shapes produce different fingerprints + $andEqName = Query::and([Query::equal('name', ['Alice'])]); + $andEqEmail = Query::and([Query::equal('email', ['a@b.c'])]); + $this->assertNotSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqEmail])); + + // AND queries with same inner shape produce the same fingerprint (values differ) + $andEqNameBob = Query::and([Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andEqName]), Query::fingerprint([$andEqNameBob])); + + // Order of children inside a logical query does not matter + $andA = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $andB = Query::and([Query::greaterThan('age', 42), Query::equal('name', ['Bob'])]); + $this->assertSame(Query::fingerprint([$andA]), Query::fingerprint([$andB])); + + // AND of two filters differs from OR of the same two filters + $orA = Query::or([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andA]), Query::fingerprint([$orA])); + + // AND with one child differs from AND with two children + $andOne = Query::and([Query::equal('name', ['Alice'])]); + $andTwo = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertNotSame(Query::fingerprint([$andOne]), Query::fingerprint([$andTwo])); + + // elemMatch attribute matters: same inner shape on different fields must NOT collide + $elemTags = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]); + $elemCategories = new Query(Query::TYPE_ELEM_MATCH, 'categories', [Query::equal('name', ['php'])]); + $this->assertNotSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemCategories])); + + // elemMatch values-only change (same field, same child shape) still collides — as expected + $elemTagsOther = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['js'])]); + $this->assertSame(Query::fingerprint([$elemTags]), Query::fingerprint([$elemTagsOther])); + } + + public function testFingerprintRejectsInvalidElements(): void + { + $this->expectException(QueryException::class); + Query::fingerprint([42]); + } + + public function testShape(): void + { + // Leaf queries + $this->assertSame('equal:name', Query::equal('name', ['Alice'])->shape()); + $this->assertSame('greaterThan:age', Query::greaterThan('age', 18)->shape()); + + // Logical with empty attribute + $and = Query::and([Query::equal('name', ['Alice']), Query::greaterThan('age', 18)]); + $this->assertSame('and:(equal:name|greaterThan:age)', $and->shape()); + + // elemMatch preserves the attribute (the field being matched) + $elem = new Query(Query::TYPE_ELEM_MATCH, 'tags', [Query::equal('name', ['php'])]); + $this->assertSame('elemMatch:tags(equal:name)', $elem->shape()); + + // Deeply nested — iterative traversal must match recursive result + $deep = Query::and([ + Query::or([ + Query::equal('a', ['x']), + Query::and([ + Query::equal('b', ['y']), + Query::lessThan('c', 5), + ]), + ]), + Query::greaterThan('d', 10), + ]); + $this->assertSame( + 'and:(greaterThan:d|or:(and:(equal:b|lessThan:c)|equal:a))', + $deep->shape(), + ); + } }