drive/query/drive_document_count_query/execute_range_count.rs
1//! Range execution paths for the count query.
2//!
3//! Three executors all keyed on a `range_countable: true` index:
4//! - [`DriveDocumentCountQuery::execute_range_count_no_proof`] — Rust-
5//! side walk of the property-name `ProvableCountTree`'s children,
6//! returning per-(in_key, key) entries (or a single sum) without a
7//! proof.
8//! - [`DriveDocumentCountQuery::execute_aggregate_count_with_proof`] —
9//! grovedb `AggregateCountOnRange` proof, returning a single u64.
10//! - [`DriveDocumentCountQuery::execute_distinct_count_with_proof`] —
11//! regular range proof against the `ProvableCountTree`, returning
12//! per-key `KVCount` ops bound to the merk root.
13//!
14//! Point-lookup execution (Equal/In with no range) lives in
15//! [`super::execute_point_lookup`](super::execute_point_lookup).
16//!
17//! Whole module is gated `feature = "server"` via the parent's
18//! `pub mod execute_range_count;` declaration.
19
20use super::super::conditions::{WhereClause, WhereOperator};
21use super::{DriveDocumentCountQuery, SplitCountEntry};
22use crate::drive::Drive;
23use crate::error::query::QuerySyntaxError;
24use crate::error::Error;
25use dpp::data_contract::document_type::methods::DocumentTypeV0Methods;
26use dpp::version::PlatformVersion;
27use grovedb::query_result_type::QueryResultType;
28use grovedb::TransactionArg;
29use grovedb_costs::CostContext;
30
31/// Pagination + ordering knobs for `execute_range_count_no_proof`.
32///
33/// Mirrors the protobuf request fields on
34/// `GetDocumentsCountRequestV0` so the drive-abci handler can pass them
35/// through unmodified. `distinct = false` collapses the range walk to a
36/// single summed entry; `distinct = true` returns one entry per distinct
37/// property value within the range.
38#[derive(Debug, Clone, Default)]
39pub struct RangeCountOptions {
40 /// When `true`, return one [`SplitCountEntry`] per distinct property
41 /// value within the range. When `false`, return a single entry
42 /// (empty `key`) summing all per-value counts.
43 pub distinct: bool,
44 /// Maximum number of entries to return. Only meaningful when
45 /// `distinct = true`. `None` means no limit.
46 ///
47 /// To paginate, callers narrow the range itself (`color >
48 /// <last-key-from-previous-page>`). There's no cursor field
49 /// because a single-`bytes` cursor would be ambiguous for
50 /// compound (`In + range + distinct`) queries whose natural sort
51 /// is `(in_key, key)`, and range narrowing has the same
52 /// expressivity for the simple cases.
53 pub limit: Option<u32>,
54 /// Sort order for distinct entries. `true` (default) is ascending by
55 /// serialized key bytes. Ignored when `distinct = false`.
56 pub order_by_ascending: bool,
57}
58
59impl DriveDocumentCountQuery<'_> {
60 /// Executes a range-aware count query against a `range_countable`
61 /// index. Path layout is `[contract_doc, doctype, prefix...,
62 /// range_prop_name]`, whose children are the per-value
63 /// `CountTree` leaves keyed by the range property's serialized
64 /// value.
65 ///
66 /// The caller picks the index via
67 /// [`Self::find_range_countable_index_for_where_clauses`]; this
68 /// method assumes:
69 /// - `self.index.range_countable == true`
70 /// - All `Equal` / `In` where clauses cover the index prefix
71 /// - Exactly one range-operator where clause hits the index's last
72 /// property
73 ///
74 /// ## Execution strategies by mode
75 ///
76 /// - **Flat summed** (no `In`, `distinct = false`): single
77 /// `query_aggregate_count` call against the merk-level
78 /// `AggregateCountOnRange` primitive. O(log n).
79 /// - **Compound summed** (`In` on prefix, `distinct = false`):
80 /// per-In-value fan-out — one `query_aggregate_count` call per
81 /// matched In branch, summed in Rust. Bounded by the In
82 /// array's 100-element cap (enforced by
83 /// [`WhereClause::in_values`]) times O(log n), so worst-case
84 /// work is 100 × O(log n) regardless of how many documents
85 /// the range actually matches. Closes the request-amplification
86 /// surface a pre-fix walk-and-sum implementation had: that
87 /// path materialized every matched `(in_key, key)` element
88 /// even though the response was still a single aggregate
89 /// `u64`.
90 /// - **Distinct mode** (`distinct = true`, with or without
91 /// `In` on prefix): walks the unified
92 /// [`Self::distinct_count_path_query`] and emits one entry per
93 /// matched `(in_key, key)` pair. The path query carries
94 /// `options.limit` (clamped to `max_query_limit` upstream by
95 /// the dispatcher) and `options.order_by_ascending`, so
96 /// per-query work is O(limit × log n). Cross-fork aggregation
97 /// is intentionally NOT performed server-side; callers reduce
98 /// by `key` client-side if they want a flat histogram. See the
99 /// book chapter ("No-Merge Compound Semantics") for the rationale.
100 ///
101 /// ## Returned entry shape
102 ///
103 /// When `options.distinct = false`, returns a single entry with
104 /// `in_key = None`, empty `key`, and `count` equal to the sum of
105 /// all matched per-value counts. When `options.distinct = true`,
106 /// returns one entry per emitted `(in_key, key)` pair, after
107 /// applying `order_by_ascending` and `limit` over the
108 /// lexicographic `(in_key, key)` tuple.
109 pub fn execute_range_count_no_proof(
110 &self,
111 drive: &Drive,
112 options: &RangeCountOptions,
113 transaction: TransactionArg,
114 platform_version: &PlatformVersion,
115 ) -> Result<Vec<SplitCountEntry>, Error> {
116 let drive_version = &platform_version.drive;
117 let has_in_on_prefix = self
118 .where_clauses
119 .iter()
120 .any(|wc| wc.operator == WhereOperator::In);
121
122 // Summed mode (both flat and compound `In + range`) goes
123 // through grovedb's `AggregateCountOnRange` primitive
124 // (`query_aggregate_count`), bounding per-query work to
125 // O(log n) per merk-tree fan-out. Compound mode loops over
126 // the In values (≤100 per the `in_values()` validator cap
127 // in `WhereClause::in_values()`) and issues one aggregate
128 // call per value, then sums the results — total bound is
129 // O(|In| × log n), independent of how many documents
130 // actually match the range.
131 //
132 // The pre-fix walk-and-sum path materialized every matched
133 // `(in_key, key)` element via `query_raw` to sum them in
134 // Rust. With one broad range × 100 In values that scans
135 // potentially millions of CountTree elements even though
136 // the response is still a single aggregate `u64` — a
137 // classic request-amplification surface on a public DAPI
138 // endpoint. The per-In fan-out closes that surface.
139 if !options.distinct {
140 if has_in_on_prefix {
141 let in_clause = self
142 .where_clauses
143 .iter()
144 .find(|wc| wc.operator == WhereOperator::In)
145 .ok_or_else(|| {
146 Error::Query(QuerySyntaxError::InvalidWhereClauseComponents(
147 "compound summed range count path requires an `in` clause; \
148 dispatcher bug if reached without one",
149 ))
150 })?;
151 // `in_values()` enforces non-empty, ≤100, no-duplicates
152 // — same defensive cap every other In consumer in
153 // drive uses. Without it a single 64 MiB gRPC request
154 // could schedule arbitrarily many backend aggregate
155 // reads.
156 let in_values = in_clause.in_values().into_data_with_error()??;
157 let other_clauses: Vec<WhereClause> = self
158 .where_clauses
159 .iter()
160 .filter(|wc| wc.operator != WhereOperator::In)
161 .cloned()
162 .collect();
163
164 let mut total: u64 = 0;
165 let mut seen_keys: std::collections::BTreeSet<Vec<u8>> =
166 std::collections::BTreeSet::new();
167 for value in in_values.iter() {
168 // Dedupe by serialized canonical key, not by raw
169 // Value, so that distinct DPP values that
170 // collapse to the same indexed bytes don't get
171 // double-counted. `in_values()` already rejects
172 // raw-Value duplicates, but this is defense-in-
173 // depth against future Value variants that
174 // serialize identically (e.g. integer vs
175 // float-with-zero-fraction).
176 let key_bytes = self.document_type.serialize_value_for_key(
177 in_clause.field.as_str(),
178 value,
179 platform_version,
180 )?;
181 if !seen_keys.insert(key_bytes) {
182 continue;
183 }
184
185 // Per-In-value query: replace the In clause with
186 // an Equal on the specific value. The resulting
187 // shape is flat (no In, Equal-prefix + range
188 // terminator), so `aggregate_count_path_query`
189 // accepts it and `query_aggregate_count` walks
190 // boundary nodes in O(log n).
191 let mut clauses_for_value = other_clauses.clone();
192 clauses_for_value.push(WhereClause {
193 field: in_clause.field.clone(),
194 operator: WhereOperator::Equal,
195 value: value.clone(),
196 });
197 let per_value_query = DriveDocumentCountQuery {
198 document_type: self.document_type,
199 contract_id: self.contract_id,
200 document_type_name: self.document_type_name.clone(),
201 index: self.index,
202 where_clauses: clauses_for_value,
203 };
204 let path_query =
205 per_value_query.aggregate_count_path_query(platform_version)?;
206 // Destructure the `CostContext` explicitly rather than
207 // calling `.unwrap()` on it: `CostContext::unwrap` is
208 // infallible (it just drops the cost field), but the
209 // visual pattern collides with `Option/Result::unwrap`
210 // and makes review noisier. Cost is discarded here
211 // because the per-mode dispatcher in `drive_dispatcher`
212 // wraps these executors with its own fee accounting —
213 // see the module-level docstring.
214 let CostContext { value, cost: _ } = drive.grove.query_aggregate_count(
215 &path_query,
216 transaction,
217 &drive_version.grove_version,
218 );
219 let count = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
220 total = total.saturating_add(count);
221 }
222 return Ok(vec![SplitCountEntry {
223 in_key: None,
224 key: Vec::new(),
225 // Range-summed total derived from the executor's
226 // per-In aggregate fan-out — verified count.
227 count: Some(total),
228 }]);
229 }
230 // Flat summed (no In on prefix): single aggregate read.
231 let path_query = self.aggregate_count_path_query(platform_version)?;
232 // See In-fan-out branch above for the destructure rationale.
233 let CostContext { value, cost: _ } = drive.grove.query_aggregate_count(
234 &path_query,
235 transaction,
236 &drive_version.grove_version,
237 );
238 let count = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
239 return Ok(vec![SplitCountEntry {
240 in_key: None,
241 key: Vec::new(),
242 // Single `AggregateCountOnRange` read — explicit
243 // verified count.
244 count: Some(count),
245 }]);
246 }
247
248 // Distinct mode (with or without In on prefix): walk and
249 // emit per-`(in_key, key)` entries. Bounded by the request's
250 // `limit` clause — the dispatcher already clamped that to
251 // `max_query_limit`, so this walk is O(limit × log n) and
252 // can't blow past the operator's DoS budget.
253 //
254 // Builds a single path query via the unified
255 // `distinct_count_path_query` builder. For an Equal-only
256 // prefix this collapses to a flat range-only query at the
257 // terminator's property-name subtree; for an In-on-prefix
258 // it becomes a compound query with one outer `Key` per In
259 // value (sorted lex-ascending by the builder) plus a
260 // `subquery_path`/`subquery` descending to the terminator's
261 // range item. The builder pushes the caller's `limit` and
262 // `order_by_ascending` directly into grovedb so the walk
263 // stops at `limit` elements in the requested direction —
264 // no Rust-side sort/reverse/truncate needed.
265 let (path_query_limit, left_to_right) =
266 (options.limit.map(|l| l as u16), options.order_by_ascending);
267 let path_query =
268 self.distinct_count_path_query(path_query_limit, left_to_right, platform_version)?;
269 let base_path_len = path_query.path.len();
270
271 let mut drive_operations = vec![];
272 let result = drive.grove_get_raw_path_query(
273 &path_query,
274 transaction,
275 // PathKeyElementTrio so we can recover the In value from
276 // the emitted element's full path (for compound queries
277 // the In value sits at `path[base_path_len]` — the first
278 // segment beyond the path query's `path`).
279 QueryResultType::QueryPathKeyElementTrioResultType,
280 &mut drive_operations,
281 drive_version,
282 );
283 let elements = match result {
284 Ok((elements, _)) => elements,
285 Err(Error::GroveDB(e))
286 if matches!(
287 e.as_ref(),
288 grovedb::Error::PathNotFound(_)
289 | grovedb::Error::PathParentLayerNotFound(_)
290 | grovedb::Error::PathKeyNotFound(_)
291 ) =>
292 {
293 // No matching prefix path — distinct mode returns
294 // an empty entry list. (Summed modes returned earlier
295 // via the aggregate fast path, so the empty case is
296 // distinct-only here.)
297 return Ok(Vec::new());
298 }
299 Err(e) => return Err(e),
300 };
301
302 // Walk emitted `(path, key, element)` triples and build the
303 // unmerged entry list. For compound (In-on-prefix) queries
304 // the In value sits at `path[base_path_len]`; for flat
305 // queries `path.len() == base_path_len` so `in_key` is
306 // `None`. We DO NOT collapse multiple emitted entries with
307 // the same `key` into one — that's the whole point of the
308 // no-merge contract.
309 let mut entries: Vec<SplitCountEntry> = Vec::new();
310 for triple in elements.to_path_key_elements() {
311 let (path, key, element) = triple;
312 let count = element.count_value_or_default();
313 if count == 0 {
314 continue;
315 }
316 let in_key = if has_in_on_prefix && path.len() > base_path_len {
317 Some(path[base_path_len].clone())
318 } else {
319 None
320 };
321 // Distinct-walk emits one entry per distinct value
322 // with its verified count; always `Some(_)`.
323 entries.push(SplitCountEntry {
324 in_key,
325 key,
326 count: Some(count),
327 });
328 }
329
330 // Distinct mode: grovedb already emitted entries in the
331 // requested direction (controlled by `left_to_right`) and
332 // truncated to the path-query limit, so we return the entry
333 // list as-is. The In keys are lex-sorted by the builder
334 // (see `distinct_count_path_query`), so the natural emit
335 // order is `(in_key_lex_asc, key_lex_asc)` for ascending
336 // and `(in_key_lex_desc, key_lex_desc)` for descending —
337 // the documented order contract holds by construction.
338 //
339 // For pagination, callers narrow the range bound itself
340 // (`color > <last-key>` for the next page) rather than
341 // passing a cursor — see `RangeCountOptions::limit` doc.
342 Ok(entries)
343 }
344
345 /// Generates a grovedb `AggregateCountOnRange` proof for a
346 /// range-count query against a `range_countable` index. The returned
347 /// proof bytes can be verified client-side via
348 /// `GroveDb::verify_aggregate_count_query`, which yields
349 /// `(root_hash, count)` — replacing the materialize-and-count proof
350 /// path that capped at `u16::MAX` documents.
351 ///
352 /// Limitations vs. [`Self::execute_range_count_no_proof`]:
353 /// - Returns ONLY the total count (a single number, no
354 /// per-distinct-value entries) — `AggregateCountOnRange` is a
355 /// single-aggregate primitive at the merk layer.
356 /// - Requires the prefix to resolve to exactly one path. `In` on
357 /// prefix properties is not supported because grovedb's aggregate
358 /// primitive only lifts a single inner range.
359 pub fn execute_aggregate_count_with_proof(
360 &self,
361 drive: &Drive,
362 transaction: TransactionArg,
363 platform_version: &PlatformVersion,
364 ) -> Result<Vec<u8>, Error> {
365 let drive_version = &platform_version.drive;
366 let path_query = self.aggregate_count_path_query(platform_version)?;
367 // Destructure rather than `.unwrap()` — see the In fan-out branch
368 // in `execute_range_count_no_proof` for rationale.
369 let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
370 &path_query,
371 None,
372 transaction,
373 &drive_version.grove_version,
374 );
375 let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
376 Ok(proof)
377 }
378
379 /// Generates a regular grovedb range proof against this count
380 /// query's `range_countable` index — the distinct-counts-with-
381 /// proof companion to [`Self::execute_aggregate_count_with_proof`].
382 ///
383 /// No new prover code: the leaf is a `ProvableCountTree` and
384 /// merk's existing `prove_query` already emits `KVCount(key,
385 /// value, count)` per matched in-range key (via
386 /// `to_kv_count_node`). Each `count` is hash-bound to the merk
387 /// root via `node_hash_with_count`, so the per-key correctness
388 /// guarantee comes for free with the standard hash-chain check —
389 /// the SDK-side
390 /// [`drive_proof_verifier::verify_distinct_count_proof`] just
391 /// pulls the counts out of the proof's op stream after the
392 /// integrity check passes.
393 ///
394 /// Trade-off vs. the aggregate prove path:
395 /// - Returns per-distinct-value counts (one `(key, count)` per
396 /// matched lot value), not just a single sum.
397 /// - Proof size is O(distinct values matched), not O(log n) — so
398 /// ~1 `KVCount` op per matched key instead of subtree collapse
399 /// via `HashWithCount`. Still strictly smaller than
400 /// materialize-and-count, which would emit each underlying doc.
401 pub fn execute_distinct_count_with_proof(
402 &self,
403 drive: &Drive,
404 limit: u16,
405 left_to_right: bool,
406 transaction: TransactionArg,
407 platform_version: &PlatformVersion,
408 ) -> Result<Vec<u8>, Error> {
409 let drive_version = &platform_version.drive;
410 let path_query =
411 self.distinct_count_path_query(Some(limit), left_to_right, platform_version)?;
412 // Destructure rather than `.unwrap()` — see the In fan-out branch
413 // in `execute_range_count_no_proof` for rationale.
414 let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
415 &path_query,
416 None,
417 transaction,
418 &drive_version.grove_version,
419 );
420 let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
421 Ok(proof)
422 }
423
424 /// Generates a grovedb **carrier** `AggregateCountOnRange` proof
425 /// for `In + range` queries with `group_by = [in_field]`. The
426 /// proof commits one aggregate count per resolved In branch
427 /// via grovedb's carrier-subquery composition
428 /// ([PR #663](https://github.com/dashpay/grovedb/pull/663)).
429 ///
430 /// Path query: see
431 /// [`Self::carrier_aggregate_count_path_query`].
432 ///
433 /// Trade-off vs. the alternative
434 /// [`Self::execute_distinct_count_with_proof`]
435 /// (`GroupByCompound` shape):
436 /// - **This** (carrier-ACOR): O(|In| · (log B + log C')) proof
437 /// bytes. One commit per merk-tree boundary node per In
438 /// branch — preserves the per-branch aggregate granularity
439 /// that `group_by = [in_field, range_field]` can't express
440 /// (the compound shape commits per-distinct-value-pair
441 /// entries).
442 /// - **Alternative** (distinct compound): O(|In| · R · log C')
443 /// where R is distinct in-range values per branch. Carries
444 /// strictly more information (one `(in_key, range_key)`
445 /// pair per resolved doc) at substantially larger bytes.
446 ///
447 /// Verified client-side via
448 /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`],
449 /// which returns `(RootHash, Vec<(Vec<u8>, u64)>)`.
450 pub fn execute_carrier_aggregate_count_with_proof(
451 &self,
452 drive: &Drive,
453 limit: Option<u16>,
454 transaction: TransactionArg,
455 platform_version: &PlatformVersion,
456 ) -> Result<Vec<u8>, Error> {
457 let drive_version = &platform_version.drive;
458 let path_query = self.carrier_aggregate_count_path_query(limit, platform_version)?;
459 // Same destructure pattern as the sibling aggregate / distinct
460 // executors. `get_proved_path_query` returns `CostContext<Result>`;
461 // ignoring the cost field is the same pattern those use today.
462 let CostContext { value, cost: _ } = drive.grove.get_proved_path_query(
463 &path_query,
464 None,
465 transaction,
466 &drive_version.grove_version,
467 );
468 let proof = value.map_err(|e| Error::GroveDB(Box::new(e)))?;
469 Ok(proof)
470 }
471}