Skip to main content

drive/query/drive_document_count_query/
execute_range_count.rs

1//! Range execution paths for the count query.
2//!
3//! Three executors all keyed on a `range_countable: true` index:
4//! - [`DriveDocumentCountQuery::execute_range_count_no_proof`] — Rust-
5//!   side walk of the property-name `ProvableCountTree`'s children,
6//!   returning per-(in_key, key) entries (or a single sum) without a
7//!   proof.
8//! - [`DriveDocumentCountQuery::execute_aggregate_count_with_proof`] —
9//!   grovedb `AggregateCountOnRange` proof, returning a single u64.
10//! - [`DriveDocumentCountQuery::execute_distinct_count_with_proof`] —
11//!   regular range proof against the `ProvableCountTree`, returning
12//!   per-key `KVCount` ops bound to the merk root.
13//!
14//! Point-lookup execution (Equal/In with no range) lives in
15//! [`super::execute_point_lookup`](super::execute_point_lookup).
16//!
17//! Whole module is gated `feature = "server"` via the parent's
18//! `pub mod execute_range_count;` declaration.
19
20use super::super::conditions::{WhereClause, WhereOperator};
21use super::{DriveDocumentCountQuery, SplitCountEntry};
22use crate::drive::Drive;
23use crate::error::query::QuerySyntaxError;
24use crate::error::Error;
25use dpp::data_contract::document_type::methods::DocumentTypeV0Methods;
26use dpp::version::PlatformVersion;
27use grovedb::query_result_type::QueryResultType;
28use grovedb::TransactionArg;
29use grovedb_costs::CostContext;
30
31/// Pagination + ordering knobs for `execute_range_count_no_proof`.
32///
33/// Mirrors the protobuf request fields on
34/// `GetDocumentsCountRequestV0` so the drive-abci handler can pass them
35/// through unmodified. `distinct = false` collapses the range walk to a
36/// single summed entry; `distinct = true` returns one entry per distinct
37/// property value within the range.
38#[derive(Debug, Clone, Default)]
39pub struct RangeCountOptions {
40    /// When `true`, return one [`SplitCountEntry`] per distinct property
41    /// value within the range. When `false`, return a single entry
42    /// (empty `key`) summing all per-value counts.
43    pub distinct: bool,
44    /// Maximum number of entries to return. Only meaningful when
45    /// `distinct = true`. `None` means no limit.
46    ///
47    /// To paginate, callers narrow the range itself (`color >
48    /// <last-key-from-previous-page>`). There's no cursor field
49    /// because a single-`bytes` cursor would be ambiguous for
50    /// compound (`In + range + distinct`) queries whose natural sort
51    /// is `(in_key, key)`, and range narrowing has the same
52    /// expressivity for the simple cases.
53    pub limit: Option<u32>,
54    /// Sort order for distinct entries. `true` (default) is ascending by
55    /// serialized key bytes. Ignored when `distinct = false`.
56    pub order_by_ascending: bool,
57}
58
59impl DriveDocumentCountQuery<'_> {
60    /// Executes a range-aware count query against a `range_countable`
61    /// index. Path layout is `[contract_doc, doctype, prefix...,
62    /// range_prop_name]`, whose children are the per-value
63    /// `CountTree` leaves keyed by the range property's serialized
64    /// value.
65    ///
66    /// The caller picks the index via
67    /// [`Self::find_range_countable_index_for_where_clauses`]; this
68    /// method assumes:
69    /// - `self.index.range_countable == true`
70    /// - All `Equal` / `In` where clauses cover the index prefix
71    /// - Exactly one range-operator where clause hits the index's last
72    ///   property
73    ///
74    /// ## Execution strategies by mode
75    ///
76    /// - **Flat summed** (no `In`, `distinct = false`): single
77    ///   `query_aggregate_count` call against the merk-level
78    ///   `AggregateCountOnRange` primitive. O(log n).
79    /// - **Compound summed** (`In` on prefix, `distinct = false`):
80    ///   per-In-value fan-out — one `query_aggregate_count` call per
81    ///   matched In branch, summed in Rust. Bounded by the In
82    ///   array's 100-element cap (enforced by
83    ///   [`WhereClause::in_values`]) times O(log n), so worst-case
84    ///   work is 100 × O(log n) regardless of how many documents
85    ///   the range actually matches. Closes the request-amplification
86    ///   surface a pre-fix walk-and-sum implementation had: that
87    ///   path materialized every matched `(in_key, key)` element
88    ///   even though the response was still a single aggregate
89    ///   `u64`.
90    /// - **Distinct mode** (`distinct = true`, with or without
91    ///   `In` on prefix): walks the unified
92    ///   [`Self::distinct_count_path_query`] and emits one entry per
93    ///   matched `(in_key, key)` pair. The path query carries
94    ///   `options.limit` (clamped to `max_query_limit` upstream by
95    ///   the dispatcher) and `options.order_by_ascending`, so
96    ///   per-query work is O(limit × log n). Cross-fork aggregation
97    ///   is intentionally NOT performed server-side; callers reduce
98    ///   by `key` client-side if they want a flat histogram. See the
99    ///   book chapter ("No-Merge Compound Semantics") for the rationale.
100    ///
101    /// ## Returned entry shape
102    ///
103    /// When `options.distinct = false`, returns a single entry with
104    /// `in_key = None`, empty `key`, and `count` equal to the sum of
105    /// all matched per-value counts. When `options.distinct = true`,
106    /// returns one entry per emitted `(in_key, key)` pair, after
107    /// applying `order_by_ascending` and `limit` over the
108    /// lexicographic `(in_key, key)` tuple.
109    pub fn execute_range_count_no_proof(
110        &self,
111        drive: &Drive,
112        options: &RangeCountOptions,
113        transaction: TransactionArg,
114        platform_version: &PlatformVersion,
115    ) -> Result<Vec<SplitCountEntry>, Error> {
116        let drive_version = &platform_version.drive;
117        let has_in_on_prefix = self
118            .where_clauses
119            .iter()
120            .any(|wc| wc.operator == WhereOperator::In);
121
122        // Summed mode (both flat and compound `In + range`) goes
123        // through grovedb's `AggregateCountOnRange` primitive
124        // (`query_aggregate_count`), bounding per-query work to
125        // O(log n) per merk-tree fan-out. Compound mode loops over
126        // the In values (≤100 per the `in_values()` validator cap
127        // in `WhereClause::in_values()`) and issues one aggregate
128        // call per value, then sums the results — total bound is
129        // O(|In| × log n), independent of how many documents
130        // actually match the range.
131        //
132        // The pre-fix walk-and-sum path materialized every matched
133        // `(in_key, key)` element via `query_raw` to sum them in
134        // Rust. With one broad range × 100 In values that scans
135        // potentially millions of CountTree elements even though
136        // the response is still a single aggregate `u64` — a
137        // classic request-amplification surface on a public DAPI
138        // endpoint. The per-In fan-out closes that surface.
139        if !options.distinct {
140            if has_in_on_prefix {
141                let in_clause = self
142                    .where_clauses
143                    .iter()
144                    .find(|wc| wc.operator == WhereOperator::In)
145                    .ok_or_else(|| {
146                        Error::Query(QuerySyntaxError::InvalidWhereClauseComponents(
147                            "compound summed range count path requires an `in` clause; \
148                             dispatcher bug if reached without one",
149                        ))
150                    })?;
151                // `in_values()` enforces non-empty, ≤100, no-duplicates
152                // — same defensive cap every other In consumer in
153                // drive uses. Without it a single 64 MiB gRPC request
154                // could schedule arbitrarily many backend aggregate
155                // reads.
156                let in_values = in_clause.in_values().into_data_with_error()??;
157                let other_clauses: Vec<WhereClause> = self
158                    .where_clauses
159                    .iter()
160                    .filter(|wc| wc.operator != WhereOperator::In)
161                    .cloned()
162                    .collect();
163
164                let mut total: u64 = 0;
165                let mut seen_keys: std::collections::BTreeSet<Vec<u8>> =
166                    std::collections::BTreeSet::new();
167                for value in in_values.iter() {
168                    // Dedupe by serialized canonical key, not by raw
169                    // Value, so that distinct DPP values that
170                    // collapse to the same indexed bytes don't get
171                    // double-counted. `in_values()` already rejects
172                    // raw-Value duplicates, but this is defense-in-
173                    // depth against future Value variants that
174                    // serialize identically (e.g. integer vs
175                    // float-with-zero-fraction).
176                    let key_bytes = self.document_type.serialize_value_for_key(
177                        in_clause.field.as_str(),
178                        value,
179                        platform_version,
180                    )?;
181                    if !seen_keys.insert(key_bytes) {
182                        continue;
183                    }
184
185                    // Per-In-value query: replace the In clause with
186                    // an Equal on the specific value. The resulting
187                    // shape is flat (no In, Equal-prefix + range
188                    // terminator), so `aggregate_count_path_query`
189                    // accepts it and `query_aggregate_count` walks
190                    // boundary nodes in O(log n).
191                    let mut clauses_for_value = other_clauses.clone();
192                    clauses_for_value.push(WhereClause {
193                        field: in_clause.field.clone(),
194                        operator: WhereOperator::Equal,
195                        value: value.clone(),
196                    });
197                    let per_value_query = DriveDocumentCountQuery {
198                        document_type: self.document_type,
199                        contract_id: self.contract_id,
200                        document_type_name: self.document_type_name.clone(),
201                        index: self.index,
202                        where_clauses: clauses_for_value,
203                    };
204                    let path_query =
205                        per_value_query.aggregate_count_path_query(platform_version)?;
206                    // Destructure the `CostContext` explicitly rather than
207                    // calling `.unwrap()` on it: `CostContext::unwrap` is
208                    // infallible (it just drops the cost field), but the
209                    // visual pattern collides with `Option/Result::unwrap`
210                    // and makes review noisier. Cost is discarded here
211                    // because the per-mode dispatcher in `drive_dispatcher`
212                    // wraps these executors with its own fee accounting —
213                    // see the module-level docstring.
214                    let CostContext { value, cost: _ } = drive.grove.query_aggregate_count(
215                        &path_query,
216                        transaction,
217                        &drive_version.grove_version,
218                    );
219                    let count = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
220                    total = total.saturating_add(count);
221                }
222                return Ok(vec![SplitCountEntry {
223                    in_key: None,
224                    key: Vec::new(),
225                    // Range-summed total derived from the executor's
226                    // per-In aggregate fan-out — verified count.
227                    count: Some(total),
228                }]);
229            }
230            // Flat summed (no In on prefix): single aggregate read.
231            let path_query = self.aggregate_count_path_query(platform_version)?;
232            // See In-fan-out branch above for the destructure rationale.
233            let CostContext { value, cost: _ } = drive.grove.query_aggregate_count(
234                &path_query,
235                transaction,
236                &drive_version.grove_version,
237            );
238            let count = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
239            return Ok(vec![SplitCountEntry {
240                in_key: None,
241                key: Vec::new(),
242                // Single `AggregateCountOnRange` read — explicit
243                // verified count.
244                count: Some(count),
245            }]);
246        }
247
248        // Distinct mode (with or without In on prefix): walk and
249        // emit per-`(in_key, key)` entries. Bounded by the request's
250        // `limit` clause — the dispatcher already clamped that to
251        // `max_query_limit`, so this walk is O(limit × log n) and
252        // can't blow past the operator's DoS budget.
253        //
254        // Builds a single path query via the unified
255        // `distinct_count_path_query` builder. For an Equal-only
256        // prefix this collapses to a flat range-only query at the
257        // terminator's property-name subtree; for an In-on-prefix
258        // it becomes a compound query with one outer `Key` per In
259        // value (sorted lex-ascending by the builder) plus a
260        // `subquery_path`/`subquery` descending to the terminator's
261        // range item. The builder pushes the caller's `limit` and
262        // `order_by_ascending` directly into grovedb so the walk
263        // stops at `limit` elements in the requested direction —
264        // no Rust-side sort/reverse/truncate needed.
265        let (path_query_limit, left_to_right) =
266            (options.limit.map(|l| l as u16), options.order_by_ascending);
267        let path_query =
268            self.distinct_count_path_query(path_query_limit, left_to_right, platform_version)?;
269        let base_path_len = path_query.path.len();
270
271        let mut drive_operations = vec![];
272        let result = drive.grove_get_raw_path_query(
273            &path_query,
274            transaction,
275            // PathKeyElementTrio so we can recover the In value from
276            // the emitted element's full path (for compound queries
277            // the In value sits at `path[base_path_len]` — the first
278            // segment beyond the path query's `path`).
279            QueryResultType::QueryPathKeyElementTrioResultType,
280            &mut drive_operations,
281            drive_version,
282        );
283        let elements = match result {
284            Ok((elements, _)) => elements,
285            Err(Error::GroveDB(e))
286                if matches!(
287                    e.as_ref(),
288                    grovedb::Error::PathNotFound(_)
289                        | grovedb::Error::PathParentLayerNotFound(_)
290                        | grovedb::Error::PathKeyNotFound(_)
291                ) =>
292            {
293                // No matching prefix path — distinct mode returns
294                // an empty entry list. (Summed modes returned earlier
295                // via the aggregate fast path, so the empty case is
296                // distinct-only here.)
297                return Ok(Vec::new());
298            }
299            Err(e) => return Err(e),
300        };
301
302        // Walk emitted `(path, key, element)` triples and build the
303        // unmerged entry list. For compound (In-on-prefix) queries
304        // the In value sits at `path[base_path_len]`; for flat
305        // queries `path.len() == base_path_len` so `in_key` is
306        // `None`. We DO NOT collapse multiple emitted entries with
307        // the same `key` into one — that's the whole point of the
308        // no-merge contract.
309        let mut entries: Vec<SplitCountEntry> = Vec::new();
310        for triple in elements.to_path_key_elements() {
311            let (path, key, element) = triple;
312            let count = element.count_value_or_default();
313            if count == 0 {
314                continue;
315            }
316            let in_key = if has_in_on_prefix && path.len() > base_path_len {
317                Some(path[base_path_len].clone())
318            } else {
319                None
320            };
321            // Distinct-walk emits one entry per distinct value
322            // with its verified count; always `Some(_)`.
323            entries.push(SplitCountEntry {
324                in_key,
325                key,
326                count: Some(count),
327            });
328        }
329
330        // Distinct mode: grovedb already emitted entries in the
331        // requested direction (controlled by `left_to_right`) and
332        // truncated to the path-query limit, so we return the entry
333        // list as-is. The In keys are lex-sorted by the builder
334        // (see `distinct_count_path_query`), so the natural emit
335        // order is `(in_key_lex_asc, key_lex_asc)` for ascending
336        // and `(in_key_lex_desc, key_lex_desc)` for descending —
337        // the documented order contract holds by construction.
338        //
339        // For pagination, callers narrow the range bound itself
340        // (`color > <last-key>` for the next page) rather than
341        // passing a cursor — see `RangeCountOptions::limit` doc.
342        Ok(entries)
343    }
344
345    /// Generates a grovedb `AggregateCountOnRange` proof for a
346    /// range-count query against a `range_countable` index. The returned
347    /// proof bytes can be verified client-side via
348    /// `GroveDb::verify_aggregate_count_query`, which yields
349    /// `(root_hash, count)` — replacing the materialize-and-count proof
350    /// path that capped at `u16::MAX` documents.
351    ///
352    /// Limitations vs. [`Self::execute_range_count_no_proof`]:
353    /// - Returns ONLY the total count (a single number, no
354    ///   per-distinct-value entries) — `AggregateCountOnRange` is a
355    ///   single-aggregate primitive at the merk layer.
356    /// - Requires the prefix to resolve to exactly one path. `In` on
357    ///   prefix properties is not supported because grovedb's aggregate
358    ///   primitive only lifts a single inner range.
359    pub fn execute_aggregate_count_with_proof(
360        &self,
361        drive: &Drive,
362        transaction: TransactionArg,
363        platform_version: &PlatformVersion,
364    ) -> Result<Vec<u8>, Error> {
365        let drive_version = &platform_version.drive;
366        let path_query = self.aggregate_count_path_query(platform_version)?;
367        // Destructure rather than `.unwrap()` — see the In fan-out branch
368        // in `execute_range_count_no_proof` for rationale.
369        let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
370            &path_query,
371            None,
372            transaction,
373            &drive_version.grove_version,
374        );
375        let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
376        Ok(proof)
377    }
378
379    /// Generates a regular grovedb range proof against this count
380    /// query's `range_countable` index — the distinct-counts-with-
381    /// proof companion to [`Self::execute_aggregate_count_with_proof`].
382    ///
383    /// No new prover code: the leaf is a `ProvableCountTree` and
384    /// merk's existing `prove_query` already emits `KVCount(key,
385    /// value, count)` per matched in-range key (via
386    /// `to_kv_count_node`). Each `count` is hash-bound to the merk
387    /// root via `node_hash_with_count`, so the per-key correctness
388    /// guarantee comes for free with the standard hash-chain check —
389    /// the SDK-side
390    /// [`drive_proof_verifier::verify_distinct_count_proof`] just
391    /// pulls the counts out of the proof's op stream after the
392    /// integrity check passes.
393    ///
394    /// Trade-off vs. the aggregate prove path:
395    /// - Returns per-distinct-value counts (one `(key, count)` per
396    ///   matched lot value), not just a single sum.
397    /// - Proof size is O(distinct values matched), not O(log n) — so
398    ///   ~1 `KVCount` op per matched key instead of subtree collapse
399    ///   via `HashWithCount`. Still strictly smaller than
400    ///   materialize-and-count, which would emit each underlying doc.
401    pub fn execute_distinct_count_with_proof(
402        &self,
403        drive: &Drive,
404        limit: u16,
405        left_to_right: bool,
406        transaction: TransactionArg,
407        platform_version: &PlatformVersion,
408    ) -> Result<Vec<u8>, Error> {
409        let drive_version = &platform_version.drive;
410        let path_query =
411            self.distinct_count_path_query(Some(limit), left_to_right, platform_version)?;
412        // Destructure rather than `.unwrap()` — see the In fan-out branch
413        // in `execute_range_count_no_proof` for rationale.
414        let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
415            &path_query,
416            None,
417            transaction,
418            &drive_version.grove_version,
419        );
420        let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
421        Ok(proof)
422    }
423
424    /// Generates a grovedb **carrier** `AggregateCountOnRange` proof
425    /// for `In + range` queries with `group_by = [in_field]`. The
426    /// proof commits one aggregate count per resolved In branch
427    /// via grovedb's carrier-subquery composition
428    /// ([PR #663](https://github.com/dashpay/grovedb/pull/663)).
429    ///
430    /// Path query: see
431    /// [`Self::carrier_aggregate_count_path_query`].
432    ///
433    /// Trade-off vs. the alternative
434    /// [`Self::execute_distinct_count_with_proof`]
435    /// (`GroupByCompound` shape):
436    /// - **This** (carrier-ACOR): O(|In| · (log B + log C')) proof
437    ///   bytes. One commit per merk-tree boundary node per In
438    ///   branch — preserves the per-branch aggregate granularity
439    ///   that `group_by = [in_field, range_field]` can't express
440    ///   (the compound shape commits per-distinct-value-pair
441    ///   entries).
442    /// - **Alternative** (distinct compound): O(|In| · R · log C')
443    ///   where R is distinct in-range values per branch. Carries
444    ///   strictly more information (one `(in_key, range_key)`
445    ///   pair per resolved doc) at substantially larger bytes.
446    ///
447    /// Verified client-side via
448    /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`],
449    /// which returns `(RootHash, Vec<(Vec<u8>, u64)>)`.
450    pub fn execute_carrier_aggregate_count_with_proof(
451        &self,
452        drive: &Drive,
453        limit: Option<u16>,
454        transaction: TransactionArg,
455        platform_version: &PlatformVersion,
456    ) -> Result<Vec<u8>, Error> {
457        let drive_version = &platform_version.drive;
458        let path_query = self.carrier_aggregate_count_path_query(limit, platform_version)?;
459        // Same destructure pattern as the sibling aggregate / distinct
460        // executors. `get_proved_path_query` returns `CostContext<Result>`;
461        // ignoring the cost field is the same pattern those use today.
462        let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
463            &path_query,
464            None,
465            transaction,
466            &drive_version.grove_version,
467        );
468        let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
469        Ok(proof)
470    }
471}