Skip to main content

drive/query/drive_document_count_query/
mod.rs

1//! Types and module structure for the `GetDocumentsCount` query.
2//!
3//! The implementation is split across siblings:
4//! - [`mode_detection`] — operator classification + `detect_mode`.
5//! - [`index_picker`] — covering-index pickers
6//!   (`find_countable_index_*`, `find_range_countable_index_*`).
7//! - [`path_query`] — the load-bearing prover/verifier-agreement
8//!   path-query builders (`aggregate_count_path_query`,
9//!   `distinct_count_path_query`, `range_clause_to_query_item`).
10//! - [`execute_point_lookup`] — Equal/In point-lookup execution
11//!   (`execute_no_proof`, `execute_with_proof`).
12//! - [`execute_range_count`] — range-mode execution + `RangeCountOptions`.
13//! - [`drive_dispatcher`] — `impl Drive` per-mode dispatchers +
14//!   `DocumentCountRequest` / `DocumentCountResponse` +
15//!   `execute_document_count_request`.
16//! - [`tests`] (cfg `server` + `test`) — integration tests.
17//!
18//! This file owns the three public types every other submodule
19//! references and the corresponding `mod` / `pub use` plumbing.
20
21use dpp::data_contract::document_type::{DocumentTypeRef, Index};
22
23use super::conditions::WhereClause;
24
25// Re-exports for the submodules and the `tests` module's
26// `use super::*;`. `WhereOperator` is used by every submodule that
27// builds path queries or executes; `QuerySyntaxError` is the canonical
28// error variant the mode detector and dispatchers surface.
29#[cfg(any(feature = "server", feature = "verify"))]
30pub use super::conditions::WhereOperator;
31#[cfg(any(feature = "server", feature = "verify"))]
32pub use crate::error::query::QuerySyntaxError;
33
34pub mod mode_detection;
35// Index pickers + path-query builders are reachable from both the
36// server prove path and the SDK proof verifier; their submodule cfgs
37// match.
38pub mod index_picker;
39pub mod path_query;
40
41// Server-side execution paths.
42#[cfg(feature = "server")]
43pub mod drive_dispatcher;
44#[cfg(feature = "server")]
45pub mod execute_point_lookup;
46#[cfg(feature = "server")]
47pub mod execute_range_count;
48#[cfg(feature = "server")]
49pub mod executors;
50
51#[cfg(feature = "server")]
52pub use drive_dispatcher::{DocumentCountRequest, DocumentCountResponse};
53#[cfg(feature = "server")]
54pub use execute_range_count::RangeCountOptions;
55
56/// Hard cap on entries the count fan-out arms ask the executor
57/// to return.
58///
59/// Count fan-out (`PerInValue`, and the Aggregate + range
60/// sub-case of `RangeNoProof`) emits at most one entry per `In`
61/// value, and `In` is structurally capped at 100 by
62/// [`super::conditions::WhereClause::in_values`]. This cap sits
63/// well above the real bound. Two reasons to pin it explicitly
64/// instead of leaning on the operator-tunable
65/// `default_query_limit`:
66///
67/// 1. `default_query_limit` is a documents-fetch knob — applying
68///    it to count fan-out can truncate aggregate sums below |In|
69///    under tighter operator tuning, silently producing wrong
70///    totals.
71/// 2. Pinning a number here keeps the dispatcher's correctness
72///    independent of operator configuration.
73///
74/// `1024` is high enough that the cap never fires under the
75/// current `WhereClause::in_values` policy. If a future code
76/// change makes it reachable, treat that as a signal to revisit
77/// the bound before raising the constant.
78///
79/// # Pattern: failsafe cap for structurally-bounded ops
80///
81/// This is the prototype of a small project convention: when an
82/// executor-level operation has a structural upper bound enforced
83/// upstream (here, `WhereClause::in_values()`'s 100-cap on the In
84/// array), pin a failsafe cap at the executor boundary that sits
85/// well above the upstream bound rather than reusing an unrelated
86/// operator-tunable limit. The failsafe never fires under the
87/// upstream constraint — it exists to (a) keep behavior
88/// independent of operator config, and (b) localize the blast
89/// radius if the upstream constraint ever loosens. Constants
90/// added under this pattern should follow the
91/// `MAX_<OPERATION>_AS_FAILSAFE` naming so the role is visible
92/// at the use site.
93#[cfg(feature = "server")]
94pub const MAX_LIMIT_AS_FAILSAFE: u32 = 1024;
95
96/// Platform-wide **maximum** outer-walk cap for carrier-aggregate
97/// range-outer proofs (chapter 30 G8: `outer_range_field > X AND
98/// inner_acor_field > Y` with `group_by = [outer_range_field]` and
99/// `prove = true`).
100///
101/// The cap bounds the proof size: bytes grow linearly with the
102/// number of outer matches (~1 700 B per outer key in this
103/// chapter's widget fixture; `10 × 1 700 B ≈ 17 KB` worst case).
104/// 10 keeps the worst-case proof comfortably inside Tier-1 of the
105/// visualizer's shareable-link guidance (< 20 KB).
106///
107/// **Caller semantics:**
108/// - `request.limit = None` → server uses `MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
109///   (the default).
110/// - `request.limit = Some(n)` with `n ≤ MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
111///   → accepted; the dispatcher passes `n` through to
112///   `SizedQuery::limit` so the prover walks exactly `n` outer matches.
113/// - `request.limit = Some(n)` with `n > MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
114///   → rejected with `InvalidLimit`. The cap is a hard ceiling: callers
115///   that want more results must call repeatedly with disjoint
116///   outer-range windows.
117///
118/// Why the ceiling is a hardcoded compile-time constant rather
119/// than `drive_config.max_query_limit` (the operator-tunable
120/// runtime value): on the prove path, `SizedQuery::limit` is
121/// part of the serialized `PathQuery` and feeds the merk-root
122/// reconstruction. Anchoring the ceiling to a compile-time
123/// constant guarantees prover and verifier agree on what the
124/// "default when None" value is, regardless of operator config
125/// (same rationale as `RangeDistinctProof`'s use of
126/// `crate::config::DEFAULT_QUERY_LIMIT`).
127pub const MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT: u16 = 10;
128
129#[cfg(feature = "server")]
130#[cfg(test)]
131mod tests;
132
133/// A query to count documents using CountTree elements in the index path.
134///
135/// This struct encapsulates all the information needed to perform a count
136/// query on a document type's countable index.
137#[derive(Debug, Clone)]
138pub struct DriveDocumentCountQuery<'a> {
139    /// The document type to count
140    pub document_type: DocumentTypeRef<'a>,
141    /// The contract id (32 bytes)
142    pub contract_id: [u8; 32],
143    /// The document type name
144    pub document_type_name: String,
145    /// The countable index to use
146    pub index: &'a Index,
147    /// The equality where clauses that match index prefix properties
148    pub where_clauses: Vec<WhereClause>,
149}
150
151/// An entry in a split count result, containing the serialized
152/// key(s) and the count of documents matching them.
153///
154/// For flat queries (per-`In`-value mode without a range, or
155/// per-distinct-value-in-range mode without an `In` on prefix) only
156/// `key` is meaningful and `in_key` is `None`.
157///
158/// For compound range-distinct queries (an `In` clause on a prefix
159/// property plus a range on the terminator) BOTH keys are carried:
160/// `in_key` is the In-fork's prefix value and `key` is the
161/// terminator value. Cross-fork aggregation is intentionally NOT
162/// done server-side — emitting the unmerged per-(in_key, key) shape
163/// lets `limit` push directly into grovedb (no pre-merge issue),
164/// keeps proof verification straightforward (no absence-proof
165/// gymnastics for omitted In branches), and gives callers strictly
166/// more information than a flat histogram. Callers reduce
167/// client-side when they want the sum.
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct SplitCountEntry {
170    /// The serialized prefix key for compound queries (the `In`
171    /// value for this fork). `None` for flat queries.
172    pub in_key: Option<Vec<u8>>,
173    /// The serialized terminator/value key for this entry.
174    pub key: Vec<u8>,
175    /// The count of documents matching this `(in_key, key)` tuple
176    /// (or just `key` for flat queries).
177    ///
178    /// Three-valued by design:
179    /// - `Some(n)` with `n > 0` — verified count for an entry the
180    ///   underlying data path materialized.
181    /// - `Some(0)` — caller queried this branch and the executor
182    ///   confirmed zero matching documents. Emitted by the no-proof
183    ///   point-lookup path's aggregated total wrapper (a single
184    ///   summed entry whose value can be 0) and by the no-proof range
185    ///   executors when their walk returns nothing. Not emitted
186    ///   per-In-branch under the current shape — see `None` below.
187    /// - `None` — reserved for a future absence-proof variant. The
188    ///   current `point_lookup_count_path_query` doesn't set
189    ///   `absence_proofs_for_non_existing_searched_keys: true`, so
190    ///   absent In branches are **omitted from the verified entry
191    ///   list entirely** (grovedb's `verify_query` doesn't surface
192    ///   `(path, key, None)` triples for them). Callers that need to
193    ///   distinguish "queried but absent" diff the request's In array
194    ///   against the returned entries by key. The variant exists in
195    ///   the type signature so a future path-query change that flips
196    ///   the flag surfaces absences via `count: None` without a
197    ///   breaking struct change — distinguishable from `Some(0)`
198    ///   (which a zero-count CountTree could never produce on its own
199    ///   since zero-count CountTrees aren't materialized in merk).
200    pub count: Option<u64>,
201}
202
203/// SQL-shaped count-query mode — names the response shape the
204/// caller asked for via `(select, group_by)` on the wire.
205///
206/// **Two count-mode enums coexist in this module.** This one names
207/// the *output shape* the request produces (single aggregate vs
208/// per-group entries). [`DocumentCountMode`] below names the
209/// *executor strategy* (which proof primitive / which walk path
210/// Drive uses to compute that shape). `CountMode` lives on
211/// [`DocumentCountRequest`] as the caller-supplied contract;
212/// `DocumentCountMode` is derived from `(CountMode, where_clauses,
213/// prove)` by [`DriveDocumentCountQuery::detect_mode`] just before
214/// dispatch.
215///
216/// The invariants below are enforced upstream (in drive-abci's
217/// `validate_and_route`) before a `DocumentCountRequest` is built.
218/// They're documented here so any new caller knows the
219/// shape-validity contract attached to each variant.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221pub enum CountMode {
222    /// `select=COUNT, group_by=[]`. Single u64 result.
223    ///
224    /// Where-clause shapes accepted:
225    /// - empty (relies on a `documentsCountable: true` doctype),
226    /// - Equal-only (fully covered by a `countable: true` index),
227    /// - one `In` (per-In fan-out, summed server-side),
228    /// - one range (uses `AggregateCountOnRange` for prove,
229    ///   `RangeNoProof` for no-proof),
230    /// - one `In` + one range on the no-proof path (per-In fan-out
231    ///   each doing a range walk; prove is rejected).
232    ///
233    /// `limit` is structurally meaningless (aggregate is one row)
234    /// and is rejected upstream when set.
235    Aggregate,
236
237    /// `select=COUNT, group_by=[in_field]`. One entry per `In` value.
238    ///
239    /// Where-clause invariants: exactly one `In` clause whose field
240    /// matches `group_by[0]`; no range clause.
241    ///
242    /// `limit` is rejected upstream when set. The In array is
243    /// already capped at 100 entries by `WhereClause::in_values()`,
244    /// so the result size is bounded by construction; a separate
245    /// `limit` would either be redundant (≤ 100) or would silently
246    /// truncate the proof to fewer In branches than the caller
247    /// asked for (because the PointLookupProof path can't represent
248    /// a partial-In-array selection in its `SizedQuery`). Callers
249    /// that want fewer branches narrow the In array directly.
250    GroupByIn,
251
252    /// `select=COUNT, group_by=[range_field]`. One entry per distinct
253    /// value within the range.
254    ///
255    /// Where-clause invariants: exactly one range clause whose field
256    /// matches `group_by[0]`; no `In` clause.
257    /// `limit` caps the number of distinct values; on the prove
258    /// path it's validated-not-clamped (oversized values rejected
259    /// with `InvalidLimit`).
260    GroupByRange,
261
262    /// `select=COUNT, group_by=[in_field, range_field]`. One entry
263    /// per `(in_key, range_key)` pair.
264    ///
265    /// Where-clause invariants: exactly one `In` clause on `group_by[0]`
266    /// AND exactly one range clause on `group_by[1]`.
267    /// `limit` is a **global cap on the emitted `(in_key, key)` lex
268    /// stream**, not per-In-branch. The executor pushes a single
269    /// `SizedQuery::limit` over the compound walk, so a request
270    /// with `|In| = 3` and `limit = 5` returns at most 5 entries
271    /// total across all In branches (ordered by `(in_key, key)`,
272    /// direction from the first `order_by` clause). On the prove
273    /// path it's validated-not-clamped (oversized values rejected
274    /// with `InvalidLimit`).
275    GroupByCompound,
276}
277
278impl CountMode {
279    /// `true` for [`Self::Aggregate`] (single-row response);
280    /// `false` for the three grouped variants. See each variant's
281    /// docstring for the per-shape semantics.
282    pub fn is_aggregate(self) -> bool {
283        matches!(self, Self::Aggregate)
284    }
285
286    /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
287    /// — the two variants whose proof shape requires per-distinct-
288    /// value `KVCount` ops. See each variant's docstring for the
289    /// per-shape proof routing.
290    pub fn requires_distinct_walk(self) -> bool {
291        matches!(self, Self::GroupByRange | Self::GroupByCompound)
292    }
293
294    /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
295    /// — the two variants whose result size isn't structurally
296    /// bounded. [`Self::Aggregate`] and [`Self::GroupByIn`] reject
297    /// `limit` upstream; see each variant's docstring for the
298    /// per-shape reasoning.
299    pub fn accepts_limit(self) -> bool {
300        matches!(self, Self::GroupByRange | Self::GroupByCompound)
301    }
302}
303
304/// Classification of a count query's shape, used to dispatch to the
305/// right executor. Returned by
306/// [`DriveDocumentCountQuery::detect_mode`].
307///
308/// The discriminator is purely a function of the where-clause
309/// operators + the caller's [`CountMode`] + `prove`; it does not
310/// depend on the contract's index set. Picking a covering index for
311/// the chosen mode is a separate step that requires the document
312/// type's `BTreeMap<String, Index>`.
313#[derive(Debug, Clone, Copy, PartialEq, Eq)]
314pub enum DocumentCountMode {
315    /// No range, no `In` — single summed entry with empty key. Reads
316    /// the `CountTree` count directly at the indexed path.
317    Total,
318    /// Exactly one `In` clause, no range — one entry per (deduped)
319    /// `In` value, each computed as the count at that single value.
320    /// The `In` doubles as the per-value split signal.
321    PerInValue,
322    /// Exactly one range clause, no proof — walks the property-name
323    /// `ProvableCountTree`'s children inside the range. Returns either
324    /// a single summed entry or per-distinct-value entries depending
325    /// on whether the caller's [`CountMode`] requires a distinct walk
326    /// ([`CountMode::GroupByRange`] / [`CountMode::GroupByCompound`])
327    /// or not ([`CountMode::Aggregate`]).
328    RangeNoProof,
329    /// Exactly one range clause + `prove = true` +
330    /// [`CountMode::Aggregate`] — produces a grovedb
331    /// `AggregateCountOnRange` proof that verifies to a single u64.
332    /// The merk-level primitive returns one aggregate; per-distinct-
333    /// value entries with proof go through [`Self::RangeDistinctProof`]
334    /// instead.
335    RangeProof,
336    /// Exactly one range clause + `prove = true` +
337    /// [`CountMode::GroupByRange`] or [`CountMode::GroupByCompound`]
338    /// — produces a regular range proof against the property-name
339    /// `ProvableCountTree`. The
340    /// proof's `KVCount(key, value, count)` ops carry per-distinct-
341    /// value counts, each cryptographically committed via
342    /// `node_hash_with_count` to the merk root. The verifier walks the
343    /// proof op stream and emits a per-key count map, no opt-in
344    /// aggregate-collapse wrapper. Proof size is O(distinct values
345    /// matched) rather than the O(log n) of [`Self::RangeProof`], but
346    /// still much smaller than materialize-and-count.
347    RangeDistinctProof,
348    /// No range clause + `prove = true` — produces a per-branch
349    /// `Element::CountTree` proof. Either an unfiltered total
350    /// (`documents_countable: true` fast path, proving the
351    /// doctype's primary-key CountTree directly) or a covered
352    /// Equal/`In` lookup against a `countable: true` index (proving
353    /// one CountTree element per matched branch via
354    /// [`DriveDocumentCountQuery::point_lookup_count_path_query`]).
355    /// Proof size is O(k × log n) where k is the number of covered
356    /// branches (1 for the empty-where fast path and Equal-only
357    /// fully-covered case; ≤ |In values| for In-on-prefix). No
358    /// document materialization, no `u16::MAX` matching-docs cap —
359    /// the merk-level `count_value` IS the result, the SDK
360    /// extracts it via `verify_point_lookup_count_proof`.
361    PointLookupProof,
362    /// Exactly one `In` clause + one range clause + `prove = true`
363    /// + [`CountMode::GroupByIn`] — produces a grovedb carrier
364    /// `AggregateCountOnRange` proof: one outer-key descent per
365    /// `In` value, each terminating in an ACOR boundary walk over
366    /// the per-branch range subtree. Returns one `(in_key, u64)`
367    /// pair per resolved In branch — same per-key aggregate
368    /// semantics as the no-proof per-In fan-out, just verifiable.
369    ///
370    /// Proof size is `O(|In values| · (log B + log C'))` where `B`
371    /// is the In-property's distinct-value count and `C'` is the
372    /// terminator subtree's distinct-value count. Smaller than the
373    /// alternative [`Self::RangeDistinctProof`] (which scales with
374    /// the number of distinct in-range terminator values per
375    /// branch, not per-branch log-bound boundary nodes) and
376    /// preserves per-In aggregate granularity that GROUP BY
377    /// `[in_field, range_field]` can't express.
378    ///
379    /// Path-query shape (see
380    /// [`DriveDocumentCountQuery::carrier_aggregate_count_path_query`]):
381    /// outer Keys = serialized In values; subquery_path = ranged
382    /// property name; subquery = ACOR(range). Verified via
383    /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`]
384    /// (returns `Vec<(Vec<u8>, u64)>`).
385    ///
386    /// Enabled by grovedb PR #663 ("allow AggregateCountOnRange as
387    /// carrier subquery"). Before that PR this shape was rejected
388    /// in [`Self::detect_mode`] with the message "range count
389    /// queries with an `in` clause are not supported on the
390    /// aggregate prove path".
391    RangeAggregateCarrierProof,
392}