Skip to main content

drive/query/drive_document_count_query/
mod.rs

1//! Types and module structure for the `GetDocumentsCount` query.
2//!
3//! The implementation is split across siblings:
4//! - [`mode_detection`] — operator classification + `detect_mode`.
5//! - [`index_picker`] — covering-index pickers
6//!   (`find_countable_index_*`, `find_range_countable_index_*`).
7//! - [`path_query`] — the load-bearing prover/verifier-agreement
8//!   path-query builders (`aggregate_count_path_query`,
9//!   `distinct_count_path_query`, `range_clause_to_query_item`).
10//! - [`execute_point_lookup`] — Equal/In point-lookup execution
11//!   (`execute_no_proof`, `execute_with_proof`).
12//! - [`execute_range_count`] — range-mode execution + `RangeCountOptions`.
13//! - [`drive_dispatcher`] — `impl Drive` per-mode dispatchers +
14//!   `DocumentCountRequest` / `DocumentCountResponse` +
15//!   `execute_document_count_request`.
16//! - [`tests`] (cfg `server` + `test`) — integration tests.
17//!
18//! This file owns the three public types every other submodule
19//! references and the corresponding `mod` / `pub use` plumbing.
20
21use dpp::data_contract::document_type::{DocumentTypeRef, Index};
22
23use super::conditions::WhereClause;
24
25// Re-exports for the submodules and the `tests` module's
26// `use super::*;`. `WhereOperator` is used by every submodule that
27// builds path queries or executes; `QuerySyntaxError` is the canonical
28// error variant the mode detector and dispatchers surface.
29#[cfg(any(feature = "server", feature = "verify"))]
30pub use super::conditions::WhereOperator;
31#[cfg(any(feature = "server", feature = "verify"))]
32pub use crate::error::query::QuerySyntaxError;
33
34pub mod mode_detection;
35// Index pickers + path-query builders are reachable from both the
36// server prove path and the SDK proof verifier; their submodule cfgs
37// match.
38pub mod index_picker;
39pub mod path_query;
40
41// Server-side execution paths.
42#[cfg(feature = "server")]
43pub mod drive_dispatcher;
44#[cfg(feature = "server")]
45pub mod execute_point_lookup;
46#[cfg(feature = "server")]
47pub mod execute_range_count;
48#[cfg(feature = "server")]
49pub mod executors;
50
51#[cfg(feature = "server")]
52pub use drive_dispatcher::{DocumentCountRequest, DocumentCountResponse};
53#[cfg(feature = "server")]
54pub use execute_range_count::RangeCountOptions;
55
56/// Hard cap on entries the count fan-out arms ask the executor
57/// to return.
58///
59/// Count fan-out (`PerInValue`, and the Aggregate + range
60/// sub-case of `RangeNoProof`) emits at most one entry per `In`
61/// value, and `In` is structurally capped at 100 by
62/// [`super::conditions::WhereClause::in_values`]. This cap sits
63/// well above the real bound. Two reasons to pin it explicitly
64/// instead of leaning on the operator-tunable
65/// `default_query_limit`:
66///
67/// 1. `default_query_limit` is a documents-fetch knob — applying
68///    it to count fan-out can truncate aggregate sums below |In|
69///    under tighter operator tuning, silently producing wrong
70///    totals.
71/// 2. Pinning a number here keeps the dispatcher's correctness
72///    independent of operator configuration.
73///
74/// `1024` is high enough that the cap never fires under the
75/// current `WhereClause::in_values` policy. If a future code
76/// change makes it reachable, treat that as a signal to revisit
77/// the bound before raising the constant.
78///
79/// # Pattern: failsafe cap for structurally-bounded ops
80///
81/// This is the prototype of a small project convention: when an
82/// executor-level operation has a structural upper bound enforced
83/// upstream (here, `WhereClause::in_values()`'s 100-cap on the In
84/// array), pin a failsafe cap at the executor boundary that sits
85/// well above the upstream bound rather than reusing an unrelated
86/// operator-tunable limit. The failsafe never fires under the
87/// upstream constraint — it exists to (a) keep behavior
88/// independent of operator config, and (b) localize the blast
89/// radius if the upstream constraint ever loosens. Constants
90/// added under this pattern should follow the
91/// `MAX_<OPERATION>_AS_FAILSAFE` naming so the role is visible
92/// at the use site.
93#[cfg(feature = "server")]
94pub const MAX_LIMIT_AS_FAILSAFE: u32 = 1024;
95
96/// Platform-wide **maximum** outer-walk cap for carrier-aggregate
97/// range-outer proofs (chapter 30 G8: `outer_range_field > X AND
98/// inner_acor_field > Y` with `group_by = [outer_range_field]` and
99/// `prove = true`).
100///
101/// The cap bounds the proof size: bytes grow linearly with the
102/// number of outer matches (~1 700 B per outer key in this
103/// chapter's widget fixture; `10 × 1 700 B ≈ 17 KB` worst case).
104/// 10 keeps the worst-case proof comfortably inside Tier-1 of the
105/// visualizer's shareable-link guidance (< 20 KB).
106///
107/// **Caller semantics:**
108/// - `request.limit = None` → server uses `MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
109///   (the default).
110/// - `request.limit = Some(n)` with `n ≤ MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
111///   → accepted; the dispatcher passes `n` through to
112///   `SizedQuery::limit` so the prover walks exactly `n` outer matches.
113/// - `request.limit = Some(n)` with `n > MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
114///   → rejected with `InvalidLimit`. The cap is a hard ceiling: callers
115///   that want more results must call repeatedly with disjoint
116///   outer-range windows.
117///
118/// Why the ceiling is a hardcoded compile-time constant rather
119/// than `drive_config.max_query_limit` (the operator-tunable
120/// runtime value): on the prove path, `SizedQuery::limit` is
121/// part of the serialized `PathQuery` and feeds the merk-root
122/// reconstruction. Anchoring the ceiling to a compile-time
123/// constant guarantees prover and verifier agree on what the
124/// "default when None" value is, regardless of operator config
125/// (same rationale as `RangeDistinctProof`'s use of
126/// `crate::config::DEFAULT_QUERY_LIMIT`).
127pub const MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT: u16 = 10;
128
129#[cfg(feature = "server")]
130#[cfg(test)]
131mod tests;
132
133/// A query to count documents using CountTree elements in the index path.
134///
135/// This struct encapsulates all the information needed to perform a count
136/// query on a document type's countable index.
137#[derive(Debug, Clone)]
138pub struct DriveDocumentCountQuery<'a> {
139    /// The document type to count
140    pub document_type: DocumentTypeRef<'a>,
141    /// The contract id (32 bytes)
142    pub contract_id: [u8; 32],
143    /// The document type name
144    pub document_type_name: String,
145    /// The countable index to use
146    pub index: &'a Index,
147    /// The equality where clauses that match index prefix properties
148    pub where_clauses: Vec<WhereClause>,
149}
150
151/// An entry in a split count result, containing the serialized
152/// key(s) and the count of documents matching them.
153///
154/// For flat queries (per-`In`-value mode without a range, or
155/// per-distinct-value-in-range mode without an `In` on prefix) only
156/// `key` is meaningful and `in_key` is `None`.
157///
158/// For compound range-distinct queries (an `In` clause on a prefix
159/// property plus a range on the terminator) BOTH keys are carried:
160/// `in_key` is the In-fork's prefix value and `key` is the
161/// terminator value. Cross-fork aggregation is intentionally NOT
162/// done server-side — emitting the unmerged per-(in_key, key) shape
163/// lets `limit` push directly into grovedb (no pre-merge issue),
164/// keeps proof verification straightforward (no absence-proof
165/// gymnastics for omitted In branches), and gives callers strictly
166/// more information than a flat histogram. Callers reduce
167/// client-side when they want the sum.
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct SplitCountEntry {
170    /// The serialized prefix key for compound queries (the `In`
171    /// value for this fork). `None` for flat queries.
172    pub in_key: Option<Vec<u8>>,
173    /// The serialized terminator/value key for this entry.
174    pub key: Vec<u8>,
175    /// The count of documents matching this `(in_key, key)` tuple
176    /// (or just `key` for flat queries).
177    ///
178    /// Three-valued by design:
179    /// - `Some(n)` with `n > 0` — verified count for an entry the
180    ///   underlying data path materialized.
181    /// - `Some(0)` — caller queried this branch and the executor
182    ///   confirmed zero matching documents. Emitted by the no-proof
183    ///   point-lookup path's aggregated total wrapper (a single
184    ///   summed entry whose value can be 0) and by the no-proof range
185    ///   executors when their walk returns nothing. Not emitted
186    ///   per-In-branch under the current shape — see `None` below.
187    /// - `None` — reserved for a future absence-proof variant. The
188    ///   current `point_lookup_count_path_query` doesn't set
189    ///   `absence_proofs_for_non_existing_searched_keys: true`, so
190    ///   absent In branches are **omitted from the verified entry
191    ///   list entirely** (grovedb's `verify_query` doesn't surface
192    ///   `(path, key, None)` triples for them). Callers that need to
193    ///   distinguish "queried but absent" diff the request's In array
194    ///   against the returned entries by key. The variant exists in
195    ///   the type signature so a future path-query change that flips
196    ///   the flag surfaces absences via `count: None` without a
197    ///   breaking struct change — distinguishable from `Some(0)`
198    ///   (which a zero-count CountTree could never produce on its own
199    ///   since zero-count CountTrees aren't materialized in merk).
200    pub count: Option<u64>,
201}
202
203/// SQL-shaped count-query mode — names the response shape the
204/// caller asked for via `(select, group_by)` on the wire.
205///
206/// **Two count-mode enums coexist in this module.** This one names
207/// the *output shape* the request produces (single aggregate vs
208/// per-group entries). [`DocumentCountMode`] below names the
209/// *executor strategy* (which proof primitive / which walk path
210/// Drive uses to compute that shape). `CountMode` lives on
211/// [`DocumentCountRequest`] as the caller-supplied contract;
212/// `DocumentCountMode` is derived from `(CountMode, where_clauses,
213/// prove)` by [`DriveDocumentCountQuery::detect_mode`] just before
214/// dispatch.
215///
216/// **Result shape vs. executor strategy.** Each variant names a
217/// result shape — the per-variant docstring lists the
218/// where-clause shapes that route to that result shape and
219/// notes which executor strategy
220/// [`DriveDocumentCountQuery::detect_mode`] picks for each.
221/// `(in_field, range_field)` combinations on the same request
222/// are accepted on multiple `CountMode` variants — the executor
223/// strategy distinguishes them. Upstream routing
224/// (drive-abci's `validate_and_route`) picks the `CountMode`
225/// from the caller's `group_by`; downstream `detect_mode`
226/// converts the `(CountMode, where_clauses, prove)` triple into
227/// the resolved [`DocumentCountMode`].
228#[derive(Debug, Clone, Copy, PartialEq, Eq)]
229pub enum CountMode {
230    /// `select=COUNT, group_by=[]`. Single u64 result.
231    ///
232    /// Where-clause shapes accepted:
233    /// - empty (relies on a `documentsCountable: true` doctype),
234    /// - Equal-only (fully covered by a `countable: true` index),
235    /// - one `In` (per-In fan-out, summed server-side),
236    /// - one range (uses `AggregateCountOnRange` for prove,
237    ///   `RangeNoProof` for no-proof),
238    /// - one `In` + one range on the no-proof path (per-In fan-out
239    ///   each doing a range walk; prove is rejected).
240    ///
241    /// `limit` is structurally meaningless (aggregate is one row)
242    /// and is rejected upstream when set.
243    Aggregate,
244
245    /// `select=COUNT, group_by=[in_field]`. One entry per `In` value.
246    ///
247    /// Where-clause shapes accepted:
248    /// - one `In` clause on `group_by[0]` (no range clause): the
249    ///   canonical shape — routes to `PointLookupProof` on the
250    ///   prove path, `PerInValue` on the no-proof path.
251    /// - one `In` on `group_by[0]` AND a range clause on a
252    ///   different field: routes to
253    ///   `RangeAggregateCarrierProof` on the prove path
254    ///   (grovedb #663 carrier-ACOR — one verified `u64` per
255    ///   In branch, range collapsed) and `RangeNoProof` on the
256    ///   no-prove path (per-In-branch range walk). Both produce
257    ///   entries that line up with the caller's GROUP BY shape.
258    ///
259    /// `limit` is rejected upstream when set. The In array is
260    /// already capped at 100 entries by `WhereClause::in_values()`,
261    /// so the result size is bounded by construction; a separate
262    /// `limit` would either be redundant (≤ 100) or would silently
263    /// truncate the proof to fewer In branches than the caller
264    /// asked for (because the PointLookupProof path can't represent
265    /// a partial-In-array selection in its `SizedQuery`). Callers
266    /// that want fewer branches narrow the In array directly.
267    GroupByIn,
268
269    /// `select=COUNT, group_by=[range_field]`. One entry per distinct
270    /// value within the range.
271    ///
272    /// Where-clause shapes accepted:
273    /// - one range clause on `group_by[0]` (no `In` clause):
274    ///   canonical RangeDistinctProof / RangeNoProof distinct.
275    /// - one range on `group_by[0]` AND an `In` clause on a
276    ///   different field: prove path keeps `RangeDistinctProof`
277    ///   with In-fanout via grovedb subquery; no-prove path uses
278    ///   `RangeNoProof` distinct on the merged result. Per-
279    ///   distinct-value entries cover both branches of the In.
280    /// - two range clauses on different fields, the second
281    ///   being `group_by[0]`: routes to
282    ///   `RangeAggregateCarrierProof` (outer range + inner-ACOR
283    ///   carrier per grovedb #664 outer-range cap). See
284    ///   `outer_range_plus_inner_range_with_prove_and_group_by_range_routes_to_carrier_proof`
285    ///   for the regression test pinning this shape.
286    ///
287    /// `limit` caps the number of distinct values; on the prove
288    /// path it's validated-not-clamped (oversized values rejected
289    /// with `InvalidLimit`).
290    GroupByRange,
291
292    /// `select=COUNT, group_by=[in_field, range_field]`. One entry
293    /// per `(in_key, range_key)` pair.
294    ///
295    /// Where-clause invariants: an `In` clause on `group_by[0]`
296    /// AND a range clause on `group_by[1]` (match-any over
297    /// the where-clauses list — clause ordering on the wire
298    /// doesn't affect routing).
299    /// `limit` is a **global cap on the emitted `(in_key, key)` lex
300    /// stream**, not per-In-branch. The executor pushes a single
301    /// `SizedQuery::limit` over the compound walk, so a request
302    /// with `|In| = 3` and `limit = 5` returns at most 5 entries
303    /// total across all In branches (ordered by `(in_key, key)`,
304    /// direction from the first `order_by` clause). On the prove
305    /// path it's validated-not-clamped (oversized values rejected
306    /// with `InvalidLimit`).
307    GroupByCompound,
308}
309
310impl CountMode {
311    /// `true` for [`Self::Aggregate`] (single-row response);
312    /// `false` for the three grouped variants. See each variant's
313    /// docstring for the per-shape semantics.
314    pub fn is_aggregate(self) -> bool {
315        matches!(self, Self::Aggregate)
316    }
317
318    /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
319    /// — the two variants whose proof shape requires per-distinct-
320    /// value `KVCount` ops. See each variant's docstring for the
321    /// per-shape proof routing.
322    pub fn requires_distinct_walk(self) -> bool {
323        matches!(self, Self::GroupByRange | Self::GroupByCompound)
324    }
325
326    /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
327    /// — the two variants whose result size isn't structurally
328    /// bounded. [`Self::Aggregate`] and [`Self::GroupByIn`] reject
329    /// `limit` upstream; see each variant's docstring for the
330    /// per-shape reasoning.
331    pub fn accepts_limit(self) -> bool {
332        matches!(self, Self::GroupByRange | Self::GroupByCompound)
333    }
334}
335
336/// Classification of a count query's shape, used to dispatch to the
337/// right executor. Returned by
338/// [`DriveDocumentCountQuery::detect_mode`].
339///
340/// The discriminator is purely a function of the where-clause
341/// operators + the caller's [`CountMode`] + `prove`; it does not
342/// depend on the contract's index set. Picking a covering index for
343/// the chosen mode is a separate step that requires the document
344/// type's `BTreeMap<String, Index>`.
345#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum DocumentCountMode {
347    /// No range, no `In` — single summed entry with empty key. Reads
348    /// the `CountTree` count directly at the indexed path.
349    Total,
350    /// Exactly one `In` clause, no range — one entry per (deduped)
351    /// `In` value, each computed as the count at that single value.
352    /// The `In` doubles as the per-value split signal.
353    PerInValue,
354    /// Exactly one range clause, no proof — walks the property-name
355    /// `ProvableCountTree`'s children inside the range. Returns either
356    /// a single summed entry or per-distinct-value entries depending
357    /// on whether the caller's [`CountMode`] requires a distinct walk
358    /// ([`CountMode::GroupByRange`] / [`CountMode::GroupByCompound`])
359    /// or not ([`CountMode::Aggregate`]).
360    RangeNoProof,
361    /// Exactly one range clause + `prove = true` +
362    /// [`CountMode::Aggregate`] — produces a grovedb
363    /// `AggregateCountOnRange` proof that verifies to a single u64.
364    /// The merk-level primitive returns one aggregate; per-distinct-
365    /// value entries with proof go through [`Self::RangeDistinctProof`]
366    /// instead.
367    RangeProof,
368    /// Exactly one range clause + `prove = true` +
369    /// [`CountMode::GroupByRange`] or [`CountMode::GroupByCompound`]
370    /// — produces a regular range proof against the property-name
371    /// `ProvableCountTree`. The
372    /// proof's `KVCount(key, value, count)` ops carry per-distinct-
373    /// value counts, each cryptographically committed via
374    /// `node_hash_with_count` to the merk root. The verifier walks the
375    /// proof op stream and emits a per-key count map, no opt-in
376    /// aggregate-collapse wrapper. Proof size is O(distinct values
377    /// matched) rather than the O(log n) of [`Self::RangeProof`], but
378    /// still much smaller than materialize-and-count.
379    RangeDistinctProof,
380    /// No range clause + `prove = true` — produces a per-branch
381    /// `Element::CountTree` proof. Either an unfiltered total
382    /// (`documents_countable: true` fast path, proving the
383    /// doctype's primary-key CountTree directly) or a covered
384    /// Equal/`In` lookup against a `countable: true` index (proving
385    /// one CountTree element per matched branch via
386    /// [`DriveDocumentCountQuery::point_lookup_count_path_query`]).
387    /// Proof size is O(k × log n) where k is the number of covered
388    /// branches (1 for the empty-where fast path and Equal-only
389    /// fully-covered case; ≤ |In values| for In-on-prefix). No
390    /// document materialization, no `u16::MAX` matching-docs cap —
391    /// the merk-level `count_value` IS the result, the SDK
392    /// extracts it via `verify_point_lookup_count_proof`.
393    PointLookupProof,
394    /// Exactly one `In` clause + one range clause + `prove = true`
395    /// + [`CountMode::GroupByIn`] — produces a grovedb carrier
396    /// `AggregateCountOnRange` proof: one outer-key descent per
397    /// `In` value, each terminating in an ACOR boundary walk over
398    /// the per-branch range subtree. Returns one `(in_key, u64)`
399    /// pair per resolved In branch — same per-key aggregate
400    /// semantics as the no-proof per-In fan-out, just verifiable.
401    ///
402    /// Proof size is `O(|In values| · (log B + log C'))` where `B`
403    /// is the In-property's distinct-value count and `C'` is the
404    /// terminator subtree's distinct-value count. Smaller than the
405    /// alternative [`Self::RangeDistinctProof`] (which scales with
406    /// the number of distinct in-range terminator values per
407    /// branch, not per-branch log-bound boundary nodes) and
408    /// preserves per-In aggregate granularity that GROUP BY
409    /// `[in_field, range_field]` can't express.
410    ///
411    /// Path-query shape (see
412    /// [`DriveDocumentCountQuery::carrier_aggregate_count_path_query`]):
413    /// outer Keys = serialized In values; subquery_path = ranged
414    /// property name; subquery = ACOR(range). Verified via
415    /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`]
416    /// (returns `Vec<(Vec<u8>, u64)>`).
417    ///
418    /// Enabled by grovedb PR #663 ("allow AggregateCountOnRange as
419    /// carrier subquery"). Before that PR this shape was rejected
420    /// in [`Self::detect_mode`] with the message "range count
421    /// queries with an `in` clause are not supported on the
422    /// aggregate prove path".
423    RangeAggregateCarrierProof,
424}