drive/query/drive_document_count_query/mod.rs
1//! Types and module structure for the `GetDocumentsCount` query.
2//!
3//! The implementation is split across siblings:
4//! - [`mode_detection`] — operator classification + `detect_mode`.
5//! - [`index_picker`] — covering-index pickers
6//! (`find_countable_index_*`, `find_range_countable_index_*`).
7//! - [`path_query`] — the load-bearing prover/verifier-agreement
8//! path-query builders (`aggregate_count_path_query`,
9//! `distinct_count_path_query`, `range_clause_to_query_item`).
10//! - [`execute_point_lookup`] — Equal/In point-lookup execution
11//! (`execute_no_proof`, `execute_with_proof`).
12//! - [`execute_range_count`] — range-mode execution + `RangeCountOptions`.
13//! - [`drive_dispatcher`] — `impl Drive` per-mode dispatchers +
14//! `DocumentCountRequest` / `DocumentCountResponse` +
15//! `execute_document_count_request`.
16//! - [`tests`] (cfg `server` + `test`) — integration tests.
17//!
18//! This file owns the three public types every other submodule
19//! references and the corresponding `mod` / `pub use` plumbing.
20
21use dpp::data_contract::document_type::{DocumentTypeRef, Index};
22
23use super::conditions::WhereClause;
24
25// Re-exports for the submodules and the `tests` module's
26// `use super::*;`. `WhereOperator` is used by every submodule that
27// builds path queries or executes; `QuerySyntaxError` is the canonical
28// error variant the mode detector and dispatchers surface.
29#[cfg(any(feature = "server", feature = "verify"))]
30pub use super::conditions::WhereOperator;
31#[cfg(any(feature = "server", feature = "verify"))]
32pub use crate::error::query::QuerySyntaxError;
33
34pub mod mode_detection;
35// Index pickers + path-query builders are reachable from both the
36// server prove path and the SDK proof verifier; their submodule cfgs
37// match.
38pub mod index_picker;
39pub mod path_query;
40
41// Server-side execution paths.
42#[cfg(feature = "server")]
43pub mod drive_dispatcher;
44#[cfg(feature = "server")]
45pub mod execute_point_lookup;
46#[cfg(feature = "server")]
47pub mod execute_range_count;
48#[cfg(feature = "server")]
49pub mod executors;
50
51#[cfg(feature = "server")]
52pub use drive_dispatcher::{DocumentCountRequest, DocumentCountResponse};
53#[cfg(feature = "server")]
54pub use execute_range_count::RangeCountOptions;
55
56/// Hard cap on entries the count fan-out arms ask the executor
57/// to return.
58///
59/// Count fan-out (`PerInValue`, and the Aggregate + range
60/// sub-case of `RangeNoProof`) emits at most one entry per `In`
61/// value, and `In` is structurally capped at 100 by
62/// [`super::conditions::WhereClause::in_values`]. This cap sits
63/// well above the real bound. Two reasons to pin it explicitly
64/// instead of leaning on the operator-tunable
65/// `default_query_limit`:
66///
67/// 1. `default_query_limit` is a documents-fetch knob — applying
68/// it to count fan-out can truncate aggregate sums below |In|
69/// under tighter operator tuning, silently producing wrong
70/// totals.
71/// 2. Pinning a number here keeps the dispatcher's correctness
72/// independent of operator configuration.
73///
74/// `1024` is high enough that the cap never fires under the
75/// current `WhereClause::in_values` policy. If a future code
76/// change makes it reachable, treat that as a signal to revisit
77/// the bound before raising the constant.
78///
79/// # Pattern: failsafe cap for structurally-bounded ops
80///
81/// This is the prototype of a small project convention: when an
82/// executor-level operation has a structural upper bound enforced
83/// upstream (here, `WhereClause::in_values()`'s 100-cap on the In
84/// array), pin a failsafe cap at the executor boundary that sits
85/// well above the upstream bound rather than reusing an unrelated
86/// operator-tunable limit. The failsafe never fires under the
87/// upstream constraint — it exists to (a) keep behavior
88/// independent of operator config, and (b) localize the blast
89/// radius if the upstream constraint ever loosens. Constants
90/// added under this pattern should follow the
91/// `MAX_<OPERATION>_AS_FAILSAFE` naming so the role is visible
92/// at the use site.
93#[cfg(feature = "server")]
94pub const MAX_LIMIT_AS_FAILSAFE: u32 = 1024;
95
96/// Platform-wide **maximum** outer-walk cap for carrier-aggregate
97/// range-outer proofs (chapter 30 G8: `outer_range_field > X AND
98/// inner_acor_field > Y` with `group_by = [outer_range_field]` and
99/// `prove = true`).
100///
101/// The cap bounds the proof size: bytes grow linearly with the
102/// number of outer matches (~1 700 B per outer key in this
103/// chapter's widget fixture; `10 × 1 700 B ≈ 17 KB` worst case).
104/// 10 keeps the worst-case proof comfortably inside Tier-1 of the
105/// visualizer's shareable-link guidance (< 20 KB).
106///
107/// **Caller semantics:**
108/// - `request.limit = None` → server uses `MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
109/// (the default).
110/// - `request.limit = Some(n)` with `n ≤ MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
111/// → accepted; the dispatcher passes `n` through to
112/// `SizedQuery::limit` so the prover walks exactly `n` outer matches.
113/// - `request.limit = Some(n)` with `n > MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`
114/// → rejected with `InvalidLimit`. The cap is a hard ceiling: callers
115/// that want more results must call repeatedly with disjoint
116/// outer-range windows.
117///
118/// Why the ceiling is a hardcoded compile-time constant rather
119/// than `drive_config.max_query_limit` (the operator-tunable
120/// runtime value): on the prove path, `SizedQuery::limit` is
121/// part of the serialized `PathQuery` and feeds the merk-root
122/// reconstruction. Anchoring the ceiling to a compile-time
123/// constant guarantees prover and verifier agree on what the
124/// "default when None" value is, regardless of operator config
125/// (same rationale as `RangeDistinctProof`'s use of
126/// `crate::config::DEFAULT_QUERY_LIMIT`).
127pub const MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT: u16 = 10;
128
129#[cfg(feature = "server")]
130#[cfg(test)]
131mod tests;
132
133/// A query to count documents using CountTree elements in the index path.
134///
135/// This struct encapsulates all the information needed to perform a count
136/// query on a document type's countable index.
137#[derive(Debug, Clone)]
138pub struct DriveDocumentCountQuery<'a> {
139 /// The document type to count
140 pub document_type: DocumentTypeRef<'a>,
141 /// The contract id (32 bytes)
142 pub contract_id: [u8; 32],
143 /// The document type name
144 pub document_type_name: String,
145 /// The countable index to use
146 pub index: &'a Index,
147 /// The equality where clauses that match index prefix properties
148 pub where_clauses: Vec<WhereClause>,
149}
150
151/// An entry in a split count result, containing the serialized
152/// key(s) and the count of documents matching them.
153///
154/// For flat queries (per-`In`-value mode without a range, or
155/// per-distinct-value-in-range mode without an `In` on prefix) only
156/// `key` is meaningful and `in_key` is `None`.
157///
158/// For compound range-distinct queries (an `In` clause on a prefix
159/// property plus a range on the terminator) BOTH keys are carried:
160/// `in_key` is the In-fork's prefix value and `key` is the
161/// terminator value. Cross-fork aggregation is intentionally NOT
162/// done server-side — emitting the unmerged per-(in_key, key) shape
163/// lets `limit` push directly into grovedb (no pre-merge issue),
164/// keeps proof verification straightforward (no absence-proof
165/// gymnastics for omitted In branches), and gives callers strictly
166/// more information than a flat histogram. Callers reduce
167/// client-side when they want the sum.
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct SplitCountEntry {
170 /// The serialized prefix key for compound queries (the `In`
171 /// value for this fork). `None` for flat queries.
172 pub in_key: Option<Vec<u8>>,
173 /// The serialized terminator/value key for this entry.
174 pub key: Vec<u8>,
175 /// The count of documents matching this `(in_key, key)` tuple
176 /// (or just `key` for flat queries).
177 ///
178 /// Three-valued by design:
179 /// - `Some(n)` with `n > 0` — verified count for an entry the
180 /// underlying data path materialized.
181 /// - `Some(0)` — caller queried this branch and the executor
182 /// confirmed zero matching documents. Emitted by the no-proof
183 /// point-lookup path's aggregated total wrapper (a single
184 /// summed entry whose value can be 0) and by the no-proof range
185 /// executors when their walk returns nothing. Not emitted
186 /// per-In-branch under the current shape — see `None` below.
187 /// - `None` — reserved for a future absence-proof variant. The
188 /// current `point_lookup_count_path_query` doesn't set
189 /// `absence_proofs_for_non_existing_searched_keys: true`, so
190 /// absent In branches are **omitted from the verified entry
191 /// list entirely** (grovedb's `verify_query` doesn't surface
192 /// `(path, key, None)` triples for them). Callers that need to
193 /// distinguish "queried but absent" diff the request's In array
194 /// against the returned entries by key. The variant exists in
195 /// the type signature so a future path-query change that flips
196 /// the flag surfaces absences via `count: None` without a
197 /// breaking struct change — distinguishable from `Some(0)`
198 /// (which a zero-count CountTree could never produce on its own
199 /// since zero-count CountTrees aren't materialized in merk).
200 pub count: Option<u64>,
201}
202
203/// SQL-shaped count-query mode — names the response shape the
204/// caller asked for via `(select, group_by)` on the wire.
205///
206/// **Two count-mode enums coexist in this module.** This one names
207/// the *output shape* the request produces (single aggregate vs
208/// per-group entries). [`DocumentCountMode`] below names the
209/// *executor strategy* (which proof primitive / which walk path
210/// Drive uses to compute that shape). `CountMode` lives on
211/// [`DocumentCountRequest`] as the caller-supplied contract;
212/// `DocumentCountMode` is derived from `(CountMode, where_clauses,
213/// prove)` by [`DriveDocumentCountQuery::detect_mode`] just before
214/// dispatch.
215///
216/// The invariants below are enforced upstream (in drive-abci's
217/// `validate_and_route`) before a `DocumentCountRequest` is built.
218/// They're documented here so any new caller knows the
219/// shape-validity contract attached to each variant.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221pub enum CountMode {
222 /// `select=COUNT, group_by=[]`. Single u64 result.
223 ///
224 /// Where-clause shapes accepted:
225 /// - empty (relies on a `documentsCountable: true` doctype),
226 /// - Equal-only (fully covered by a `countable: true` index),
227 /// - one `In` (per-In fan-out, summed server-side),
228 /// - one range (uses `AggregateCountOnRange` for prove,
229 /// `RangeNoProof` for no-proof),
230 /// - one `In` + one range on the no-proof path (per-In fan-out
231 /// each doing a range walk; prove is rejected).
232 ///
233 /// `limit` is structurally meaningless (aggregate is one row)
234 /// and is rejected upstream when set.
235 Aggregate,
236
237 /// `select=COUNT, group_by=[in_field]`. One entry per `In` value.
238 ///
239 /// Where-clause invariants: exactly one `In` clause whose field
240 /// matches `group_by[0]`; no range clause.
241 ///
242 /// `limit` is rejected upstream when set. The In array is
243 /// already capped at 100 entries by `WhereClause::in_values()`,
244 /// so the result size is bounded by construction; a separate
245 /// `limit` would either be redundant (≤ 100) or would silently
246 /// truncate the proof to fewer In branches than the caller
247 /// asked for (because the PointLookupProof path can't represent
248 /// a partial-In-array selection in its `SizedQuery`). Callers
249 /// that want fewer branches narrow the In array directly.
250 GroupByIn,
251
252 /// `select=COUNT, group_by=[range_field]`. One entry per distinct
253 /// value within the range.
254 ///
255 /// Where-clause invariants: exactly one range clause whose field
256 /// matches `group_by[0]`; no `In` clause.
257 /// `limit` caps the number of distinct values; on the prove
258 /// path it's validated-not-clamped (oversized values rejected
259 /// with `InvalidLimit`).
260 GroupByRange,
261
262 /// `select=COUNT, group_by=[in_field, range_field]`. One entry
263 /// per `(in_key, range_key)` pair.
264 ///
265 /// Where-clause invariants: exactly one `In` clause on `group_by[0]`
266 /// AND exactly one range clause on `group_by[1]`.
267 /// `limit` is a **global cap on the emitted `(in_key, key)` lex
268 /// stream**, not per-In-branch. The executor pushes a single
269 /// `SizedQuery::limit` over the compound walk, so a request
270 /// with `|In| = 3` and `limit = 5` returns at most 5 entries
271 /// total across all In branches (ordered by `(in_key, key)`,
272 /// direction from the first `order_by` clause). On the prove
273 /// path it's validated-not-clamped (oversized values rejected
274 /// with `InvalidLimit`).
275 GroupByCompound,
276}
277
278impl CountMode {
279 /// `true` for [`Self::Aggregate`] (single-row response);
280 /// `false` for the three grouped variants. See each variant's
281 /// docstring for the per-shape semantics.
282 pub fn is_aggregate(self) -> bool {
283 matches!(self, Self::Aggregate)
284 }
285
286 /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
287 /// — the two variants whose proof shape requires per-distinct-
288 /// value `KVCount` ops. See each variant's docstring for the
289 /// per-shape proof routing.
290 pub fn requires_distinct_walk(self) -> bool {
291 matches!(self, Self::GroupByRange | Self::GroupByCompound)
292 }
293
294 /// `true` for [`Self::GroupByRange`] and [`Self::GroupByCompound`]
295 /// — the two variants whose result size isn't structurally
296 /// bounded. [`Self::Aggregate`] and [`Self::GroupByIn`] reject
297 /// `limit` upstream; see each variant's docstring for the
298 /// per-shape reasoning.
299 pub fn accepts_limit(self) -> bool {
300 matches!(self, Self::GroupByRange | Self::GroupByCompound)
301 }
302}
303
304/// Classification of a count query's shape, used to dispatch to the
305/// right executor. Returned by
306/// [`DriveDocumentCountQuery::detect_mode`].
307///
308/// The discriminator is purely a function of the where-clause
309/// operators + the caller's [`CountMode`] + `prove`; it does not
310/// depend on the contract's index set. Picking a covering index for
311/// the chosen mode is a separate step that requires the document
312/// type's `BTreeMap<String, Index>`.
313#[derive(Debug, Clone, Copy, PartialEq, Eq)]
314pub enum DocumentCountMode {
315 /// No range, no `In` — single summed entry with empty key. Reads
316 /// the `CountTree` count directly at the indexed path.
317 Total,
318 /// Exactly one `In` clause, no range — one entry per (deduped)
319 /// `In` value, each computed as the count at that single value.
320 /// The `In` doubles as the per-value split signal.
321 PerInValue,
322 /// Exactly one range clause, no proof — walks the property-name
323 /// `ProvableCountTree`'s children inside the range. Returns either
324 /// a single summed entry or per-distinct-value entries depending
325 /// on whether the caller's [`CountMode`] requires a distinct walk
326 /// ([`CountMode::GroupByRange`] / [`CountMode::GroupByCompound`])
327 /// or not ([`CountMode::Aggregate`]).
328 RangeNoProof,
329 /// Exactly one range clause + `prove = true` +
330 /// [`CountMode::Aggregate`] — produces a grovedb
331 /// `AggregateCountOnRange` proof that verifies to a single u64.
332 /// The merk-level primitive returns one aggregate; per-distinct-
333 /// value entries with proof go through [`Self::RangeDistinctProof`]
334 /// instead.
335 RangeProof,
336 /// Exactly one range clause + `prove = true` +
337 /// [`CountMode::GroupByRange`] or [`CountMode::GroupByCompound`]
338 /// — produces a regular range proof against the property-name
339 /// `ProvableCountTree`. The
340 /// proof's `KVCount(key, value, count)` ops carry per-distinct-
341 /// value counts, each cryptographically committed via
342 /// `node_hash_with_count` to the merk root. The verifier walks the
343 /// proof op stream and emits a per-key count map, no opt-in
344 /// aggregate-collapse wrapper. Proof size is O(distinct values
345 /// matched) rather than the O(log n) of [`Self::RangeProof`], but
346 /// still much smaller than materialize-and-count.
347 RangeDistinctProof,
348 /// No range clause + `prove = true` — produces a per-branch
349 /// `Element::CountTree` proof. Either an unfiltered total
350 /// (`documents_countable: true` fast path, proving the
351 /// doctype's primary-key CountTree directly) or a covered
352 /// Equal/`In` lookup against a `countable: true` index (proving
353 /// one CountTree element per matched branch via
354 /// [`DriveDocumentCountQuery::point_lookup_count_path_query`]).
355 /// Proof size is O(k × log n) where k is the number of covered
356 /// branches (1 for the empty-where fast path and Equal-only
357 /// fully-covered case; ≤ |In values| for In-on-prefix). No
358 /// document materialization, no `u16::MAX` matching-docs cap —
359 /// the merk-level `count_value` IS the result, the SDK
360 /// extracts it via `verify_point_lookup_count_proof`.
361 PointLookupProof,
362 /// Exactly one `In` clause + one range clause + `prove = true`
363 /// + [`CountMode::GroupByIn`] — produces a grovedb carrier
364 /// `AggregateCountOnRange` proof: one outer-key descent per
365 /// `In` value, each terminating in an ACOR boundary walk over
366 /// the per-branch range subtree. Returns one `(in_key, u64)`
367 /// pair per resolved In branch — same per-key aggregate
368 /// semantics as the no-proof per-In fan-out, just verifiable.
369 ///
370 /// Proof size is `O(|In values| · (log B + log C'))` where `B`
371 /// is the In-property's distinct-value count and `C'` is the
372 /// terminator subtree's distinct-value count. Smaller than the
373 /// alternative [`Self::RangeDistinctProof`] (which scales with
374 /// the number of distinct in-range terminator values per
375 /// branch, not per-branch log-bound boundary nodes) and
376 /// preserves per-In aggregate granularity that GROUP BY
377 /// `[in_field, range_field]` can't express.
378 ///
379 /// Path-query shape (see
380 /// [`DriveDocumentCountQuery::carrier_aggregate_count_path_query`]):
381 /// outer Keys = serialized In values; subquery_path = ranged
382 /// property name; subquery = ACOR(range). Verified via
383 /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`]
384 /// (returns `Vec<(Vec<u8>, u64)>`).
385 ///
386 /// Enabled by grovedb PR #663 ("allow AggregateCountOnRange as
387 /// carrier subquery"). Before that PR this shape was rejected
388 /// in [`Self::detect_mode`] with the message "range count
389 /// queries with an `in` clause are not supported on the
390 /// aggregate prove path".
391 RangeAggregateCarrierProof,
392}