Skip to main content

drive/query/drive_document_count_query/
mode_detection.rs

1//! Mode detection + operator classification for the count query.
2//!
3//! Pure functions on the where-clause shape + request flags — no
4//! Drive, no contract, no indexes. Used both server-side (to pick an
5//! executor) and verifier-side (to validate the request before
6//! attempting verification).
7
8use super::super::conditions::{WhereClause, WhereOperator};
9use super::{CountMode, DocumentCountMode, DriveDocumentCountQuery};
10#[cfg(any(feature = "server", feature = "verify"))]
11use crate::error::query::QuerySyntaxError;
12
13impl DriveDocumentCountQuery<'_> {
14    /// Returns `true` if the where-clause operator is one the count fast path
15    /// can serve via point-lookups in a CountTree.
16    ///
17    /// Today that's `Equal` (one path) and `In` (cartesian fork over the listed
18    /// values). Range operators (`>`, `<`, `Between*`, `StartsWith`) need a
19    /// boundary walk that the current PathQuery infrastructure cannot express;
20    /// callers detect those via [`Self::has_unsupported_operator`] and surface
21    /// an error instead of silently returning a wrong count.
22    ///
23    /// `pub(super)` so the sibling [`index_picker`](super::index_picker) module
24    /// can call it from `Self::is_indexable_for_count`; not part of the public
25    /// API.
26    pub(super) fn is_indexable_for_count(op: WhereOperator) -> bool {
27        matches!(op, WhereOperator::Equal | WhereOperator::In)
28    }
29
30    /// Returns `true` if `op` is a range operator that can be served by a
31    /// `range_countable` index walking the property-name `ProvableCountTree`'s
32    /// children. The non-prefix portion of a range count query carries
33    /// exactly one range operator on the index's last property.
34    pub fn is_range_operator(op: WhereOperator) -> bool {
35        matches!(
36            op,
37            WhereOperator::GreaterThan
38                | WhereOperator::GreaterThanOrEquals
39                | WhereOperator::LessThan
40                | WhereOperator::LessThanOrEquals
41                | WhereOperator::Between
42                | WhereOperator::BetweenExcludeBounds
43                | WhereOperator::BetweenExcludeLeft
44                | WhereOperator::BetweenExcludeRight
45                | WhereOperator::StartsWith
46        )
47    }
48
49    /// Returns `true` if any where clause uses an operator the count fast path
50    /// cannot serve. Callers should treat this as a query-rejection signal.
51    pub fn has_unsupported_operator(where_clauses: &[WhereClause]) -> bool {
52        where_clauses
53            .iter()
54            .any(|wc| !Self::is_indexable_for_count(wc.operator))
55    }
56
57    /// Classify a count query's mode from its where clauses + request flags.
58    ///
59    /// This is the protocol-version-agnostic shape detection that
60    /// decides which executor (one of the six
61    /// [`DocumentCountMode`] variants — `Total` / `PerInValue` /
62    /// `RangeNoProof` / `RangeProof` / `RangeDistinctProof` /
63    /// `PointLookupProof`) the request maps to. The returned
64    /// `DocumentCountMode` discriminates among the dispatcher's
65    /// match arms; concrete pagination / index-picker inputs flow
66    /// through the call sites separately.
67    ///
68    /// All validation that depends only on the where clauses + flags
69    /// (multiple range clauses, range mixed with `In`, distinct mode on
70    /// the prove path, distinct mode without a range clause, etc.) is
71    /// done here and surfaces as
72    /// [`QuerySyntaxError::InvalidWhereClauseComponents`]. Validation
73    /// that depends on the contract's index set (no covering index)
74    /// stays at the call site since it requires the
75    /// `&BTreeMap<String, Index>`.
76    ///
77    /// **Versioning note**: this function is the v0 routing table.
78    /// Production callers go through [`Self::detect_mode_versioned`]
79    /// (which dispatches on
80    /// `platform_version.drive.methods.document.query.detect_count_mode`)
81    /// so future protocol versions can change the routing table behind
82    /// a method-version bump. Tests that want to pin the v0 contract
83    /// directly call this function.
84    #[cfg(any(feature = "server", feature = "verify"))]
85    pub fn detect_mode(
86        where_clauses: &[WhereClause],
87        mode: CountMode,
88        prove: bool,
89    ) -> Result<DocumentCountMode, QuerySyntaxError> {
90        // The caller-supplied `CountMode` is the SQL-shape contract
91        // (Aggregate / GroupByIn / GroupByRange / GroupByCompound).
92        // Translate it back to the "distinct walk required?" boolean
93        // the per-mode tuple match below was written against. Pure
94        // mechanical mapping: the distinct walk is what the two
95        // range-grouped variants need.
96        let distinct = mode.requires_distinct_walk();
97        // Reject any operator that's neither an indexable point operator
98        // (Equal/In) nor a range operator. Defense-in-depth: the request
99        // shape forbids these elsewhere, but folding the check in here
100        // keeps the mode-detection contract self-contained.
101        //
102        // `startsWith` IS in `is_range_operator` and routes through the
103        // same `Range(a..b)` path as `betweenExcludeRight` — the
104        // half-open upper bound is computed by byte-incrementing the
105        // serialized prefix's last byte (see `range_clause_to_query_item`,
106        // mirroring `conditions.rs:1129`'s normal-docs encoding).
107        for wc in where_clauses {
108            if !Self::is_indexable_for_count(wc.operator) && !Self::is_range_operator(wc.operator) {
109                return Err(QuerySyntaxError::InvalidWhereClauseComponents(
110                    "count query supports only `==`, `in`, and range operators",
111                ));
112            }
113        }
114
115        let range_count = where_clauses
116            .iter()
117            .filter(|wc| Self::is_range_operator(wc.operator))
118            .count();
119        let in_count = where_clauses
120            .iter()
121            .filter(|wc| wc.operator == WhereOperator::In)
122            .count();
123
124        // `range_count > 1` is rejected for every shape EXCEPT the
125        // carrier-ACOR with outer range (G8): when the caller asks
126        // for `GroupByRange` + `prove = true` and there are exactly
127        // two range clauses on different fields (one on the index's
128        // first property — the carrier — and one on the terminator —
129        // the inner ACOR), grovedb's carrier-subquery composition
130        // ([PR #663](https://github.com/dashpay/grovedb/pull/663)
131        // shipped the carrier shape; [PR #664](https://github.com/dashpay/grovedb/pull/664)
132        // permits `SizedQuery::limit` on it) makes this a well-formed
133        // single-proof shape. The two ranges must be on distinct
134        // fields — combining two-sided ranges on the same field still
135        // routes through `between*` as before.
136        if range_count > 1 {
137            let two_ranges_on_distinct_fields = where_clauses
138                .iter()
139                .filter(|wc| Self::is_range_operator(wc.operator))
140                .map(|wc| wc.field.as_str())
141                .collect::<std::collections::BTreeSet<_>>()
142                .len()
143                == range_count;
144            if !(prove
145                && matches!(mode, CountMode::GroupByRange)
146                && range_count == 2
147                && in_count == 0
148                && two_ranges_on_distinct_fields)
149            {
150                return Err(QuerySyntaxError::InvalidWhereClauseComponents(
151                    "count query supports at most one range where-clause; combine \
152                     two-sided ranges via `between*` instead of separate `>` / `<` \
153                     clauses, or use `group_by = [outer_range_field]` with `prove = \
154                     true` for the carrier-aggregate shape with one outer range and \
155                     one inner ACOR range on a different field",
156                ));
157            }
158        }
159        if in_count > 1 {
160            return Err(QuerySyntaxError::InvalidWhereClauseComponents(
161                "count query supports at most one `in` where-clause; the In carries \
162                 the split property and only one split dimension is supported per request",
163            ));
164        }
165
166        let has_range = range_count == 1;
167        let has_in = in_count == 1;
168
169        // `range + In` on the prove path used to be rejected
170        // wholesale because grovedb's `AggregateCountOnRange`
171        // primitive wraps a single inner range and historically
172        // couldn't cartesian-fork. As of grovedb PR #663 it CAN
173        // appear under outer `Keys` via the carrier-subquery
174        // composition, so the rejection is now conditional:
175        // - `CountMode::GroupByIn` (single-field group_by on the
176        //   In field): routes to the new
177        //   `DocumentCountMode::RangeAggregateCarrierProof` —
178        //   produces one (in_key, u64) pair per In branch via a
179        //   carrier-ACOR PathQuery (see
180        //   [`super::path_query::carrier_aggregate_count_path_query`]).
181        // - `CountMode::GroupByRange` / `GroupByCompound` (distinct
182        //   modes): route to `RangeDistinctProof` as before.
183        // - `CountMode::Aggregate` (no group_by): still rejected.
184        //   With no group_by the caller asks for a single summed
185        //   count across all In branches, but the carrier-ACOR
186        //   primitive returns one count per branch — collapsing
187        //   that back to a single sum at the verifier requires
188        //   trusting the SDK to add them, which is exactly what
189        //   the prove path is supposed to avoid (the verifier
190        //   should get the consensus-committed answer, not
191        //   compute a derived one).
192        if has_range && has_in && prove && !distinct && !matches!(mode, CountMode::GroupByIn) {
193            return Err(QuerySyntaxError::InvalidWhereClauseComponents(
194                "range count queries with an `in` clause are not supported on the \
195                 aggregate prove path; use `group_by = [in_field]` for the carrier \
196                 per-In-aggregate proof (one u64 per branch), `group_by = [in_field, \
197                 range_field]` for the compound distinct-prove path (per-distinct-\
198                 value entries), or `prove = false` for the no-proof variant",
199            ));
200        }
201
202        if distinct && !has_range && range_count != 2 {
203            return Err(QuerySyntaxError::InvalidWhereClauseComponents(
204                "GROUP BY on a range field requires a range where-clause; the \
205                 range field must appear in `where` for the distinct walk to \
206                 have a window to iterate over",
207            ));
208        }
209
210        // G8 short-circuit: `GroupByRange + prove` with exactly two
211        // range clauses on distinct fields routes to the carrier
212        // ACOR shape. One range becomes the outer dimension of the
213        // carrier walk; the other range becomes the inner ACOR
214        // target. `SizedQuery::limit` caps the outer walk (per
215        // [grovedb PR #664](https://github.com/dashpay/grovedb/pull/664)).
216        // The validity of "which range is the outer / which is the
217        // inner" depends on the picked index — checked in the
218        // path-query builder against the index's property order.
219        if prove && matches!(mode, CountMode::GroupByRange) && range_count == 2 && in_count == 0 {
220            return Ok(DocumentCountMode::RangeAggregateCarrierProof);
221        }
222
223        Ok(match (has_range, has_in, prove, distinct) {
224            // Range + prove + distinct (with or without In on
225            // prefix): per-distinct-value counts come from a
226            // regular range proof against the property-name
227            // `ProvableCountTree`. With In on prefix the path
228            // query uses grovedb's subquery primitive to
229            // cartesian-fork; the verifier walks the same
230            // compound shape.
231            (true, _, true, true) => DocumentCountMode::RangeDistinctProof,
232            // Range + prove + summed (no In): `AggregateCountOnRange`
233            // collapse — single u64 verified out. The In case is
234            // rejected above.
235            (true, false, true, false) => DocumentCountMode::RangeProof,
236            // Range + no-proof: the executor uses the same
237            // `distinct_count_path_query` builder; In on prefix
238            // forks via grovedb subquery at execution time. Sum
239            // vs. distinct comes from `RangeCountOptions.distinct`
240            // applied to the merged result.
241            (true, _, false, _) => DocumentCountMode::RangeNoProof,
242            (false, true, false, _) => DocumentCountMode::PerInValue,
243            // `In` + `prove = true` (no range): route to the
244            // CountTree-element proof path. The shared
245            // `point_lookup_count_path_query` builder emits one
246            // `Element::CountTree` per matched In branch (via
247            // outer `Key`s + `[0]` subquery); the SDK's
248            // `verify_point_lookup_count_proof` extracts
249            // `count_value_or_default()` from each verified
250            // element, and the `FromProof<DocumentQuery>` impl
251            // for `DocumentSplitCounts` returns them as
252            // per-In-value entries. Proof size is O(|In values|
253            // × log n) — no document materialization, no
254            // `u16::MAX` cap on matching docs.
255            (false, true, true, _) => DocumentCountMode::PointLookupProof,
256            // No range, no In, `prove = true`: same CountTree-
257            // element proof shape — either the documents_countable
258            // primary-key CountTree fast path (empty where) or
259            // a single per-branch CountTree element for an
260            // Equal-only fully-covered query.
261            (false, false, true, _) => DocumentCountMode::PointLookupProof,
262            (false, false, false, _) => DocumentCountMode::Total,
263            // Range + In + prove + !distinct: the GroupByIn case
264            // routes to the new carrier-ACOR proof shape (grovedb
265            // PR #663); the Aggregate case is rejected above.
266            (true, true, true, false) if matches!(mode, CountMode::GroupByIn) => {
267                DocumentCountMode::RangeAggregateCarrierProof
268            }
269            (true, true, true, false) => {
270                unreachable!("range + In + prove + Aggregate is rejected before the dispatch match")
271            }
272        })
273    }
274
275    /// Versioned dispatcher around [`Self::detect_mode`]. Production
276    /// callers (the count dispatcher and the SDK's
277    /// `verify_count_query` helper) go through this so a future
278    /// protocol version can adjust the routing table behind a
279    /// method-version bump without an API churn for tests.
280    ///
281    /// Routes through
282    /// `platform_version.drive.methods.document.query.detect_count_mode`.
283    /// Today only `0` is defined and maps to [`Self::detect_mode`]
284    /// verbatim.
285    #[cfg(any(feature = "server", feature = "verify"))]
286    pub fn detect_mode_versioned(
287        where_clauses: &[WhereClause],
288        mode: CountMode,
289        prove: bool,
290        platform_version: &dpp::version::PlatformVersion,
291    ) -> Result<DocumentCountMode, QuerySyntaxError> {
292        match platform_version
293            .drive
294            .methods
295            .document
296            .query
297            .detect_count_mode
298        {
299            0 => Self::detect_mode(where_clauses, mode, prove),
300            version => Err(QuerySyntaxError::Unsupported(format!(
301                "detect_count_mode: unknown method version {version}; only 0 is supported"
302            ))),
303        }
304    }
305}