Loading...
Searching...
No Matches
UnionArray.h
Go to the documentation of this file.
1// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
2
3#ifndef AWKWARD_UNIONARRAY_H_
4#define AWKWARD_UNIONARRAY_H_
5
6#include <string>
7#include <memory>
8#include <vector>
9
10#include "awkward/common.h"
11#include "awkward/Slice.h"
12#include "awkward/Index.h"
13#include "awkward/Content.h"
14
15namespace awkward {
20 public:
23 UnionForm(bool has_identities,
24 const util::Parameters& parameters,
25 const FormKey& form_key,
26 Index::Form tags,
27 Index::Form index,
28 const std::vector<FormPtr>& contents);
29
31 tags() const;
32
34 index() const;
35
36 const std::vector<FormPtr>
37 contents() const;
38
39 int64_t
40 numcontents() const;
41
42 const FormPtr
43 content(int64_t index) const;
44
45 const TypePtr
46 type(const util::TypeStrs& typestrs) const override;
47
48 void
49 tojson_part(ToJson& builder, bool verbose) const override;
50
51 const FormPtr
52 shallow_copy() const override;
53
54 const FormPtr
55 with_form_key(const FormKey& form_key) const override;
56
57 const std::string
58 purelist_parameter(const std::string& key) const override;
59
60 bool
61 purelist_isregular() const override;
62
63 int64_t
64 purelist_depth() const override;
65
66 bool
67 dimension_optiontype() const override;
68
69 const std::pair<int64_t, int64_t>
70 minmax_depth() const override;
71
72 const std::pair<bool, int64_t>
73 branch_depth() const override;
74
75 int64_t
76 numfields() const override;
77
78 int64_t
79 fieldindex(const std::string& key) const override;
80
81 const std::string
82 key(int64_t fieldindex) const override;
83
84 bool
85 haskey(const std::string& key) const override;
86
87 const std::vector<std::string>
88 keys() const override;
89
90 bool
91 istuple() const override;
92
93 bool
94 equal(const FormPtr& other,
95 bool check_identities,
96 bool check_parameters,
97 bool check_form_key,
98 bool compatibility_check) const override;
99
100 const FormPtr
101 getitem_field(const std::string& key) const override;
102
103 const FormPtr
104 getitem_fields(const std::vector<std::string>& keys) const override;
105
106 private:
107 Index::Form tags_;
108 Index::Form index_;
109 const std::vector<FormPtr> contents_;
110 };
111
123 template <typename T, typename I>
124 class
125#ifdef AWKWARD_UNIONARRAY_NO_EXTERN_TEMPLATE
127#endif
128 UnionArrayOf: public Content {
129 public:
131 static const IndexOf<I>
133
135 static const IndexOf<I>
137
139 static const std::pair<IndexOf<T>, IndexOf<I>>
141 const std::vector<Index64>& counts);
142
157 UnionArrayOf<T, I>(const IdentitiesPtr& identities,
158 const util::Parameters& parameters,
159 const IndexOf<T> tags,
160 const IndexOf<I>& index,
161 const ContentPtrVec& contents);
162
169 const IndexOf<T>
170 tags() const;
171
175 const IndexOf<I>
176 index() const;
177
180 const ContentPtrVec
181 contents() const;
182
184 int64_t
185 numcontents() const;
186
188 const ContentPtr
189 content(int64_t index) const;
190
195 const ContentPtr
196 project(int64_t index) const;
197
210 const ContentPtr
211 simplify_uniontype(bool merge, bool mergebool) const;
212
215 const std::string
216 classname() const override;
217
218 void
219 setidentities() override;
220
221 void
222 setidentities(const IdentitiesPtr& identities) override;
223
224 const TypePtr
225 type(const util::TypeStrs& typestrs) const override;
226
227 const FormPtr
228 form(bool materialize) const override;
229
231 kernels() const override;
232
233 void
234 caches(std::vector<ArrayCachePtr>& out) const override;
235
236 const std::string
237 tostring_part(const std::string& indent,
238 const std::string& pre,
239 const std::string& post) const override;
240
241 void
242 tojson_part(ToJson& builder, bool include_beginendlist) const override;
243
244 void
245 nbytes_part(std::map<size_t, int64_t>& largest) const override;
246
250 int64_t
251 length() const override;
252
253 const ContentPtr
254 shallow_copy() const override;
255
256 const ContentPtr
257 deep_copy(bool copyarrays,
258 bool copyindexes,
259 bool copyidentities) const override;
260
261 void
262 check_for_iteration() const override;
263
264 const ContentPtr
265 getitem_nothing() const override;
266
267 const ContentPtr
268 getitem_at(int64_t at) const override;
269
270 const ContentPtr
271 getitem_at_nowrap(int64_t at) const override;
272
273 const ContentPtr
274 getitem_range(int64_t start, int64_t stop) const override;
275
276 const ContentPtr
277 getitem_range_nowrap(int64_t start, int64_t stop) const override;
278
279 const ContentPtr
280 getitem_field(const std::string& key) const override;
281
282 const ContentPtr
283 getitem_field(const std::string& key,
284 const Slice& only_fields) const override;
285
286 const ContentPtr
287 getitem_fields(const std::vector<std::string>& keys) const override;
288
289 const ContentPtr
290 getitem_fields(const std::vector<std::string>& keys,
291 const Slice& only_fields) const override;
292
293 const ContentPtr
295 const Slice& tail,
296 const Index64& advanced) const override;
297
298 const ContentPtr
299 carry(const Index64& carry, bool allow_lazy) const override;
300
301 int64_t
302 purelist_depth() const override;
303
304 const std::pair<int64_t, int64_t>
305 minmax_depth() const override;
306
307 const std::pair<bool, int64_t>
308 branch_depth() const override;
309
310 int64_t
311 numfields() const override;
312
313 int64_t
314 fieldindex(const std::string& key) const override;
315
316 const std::string
317 key(int64_t fieldindex) const override;
318
319 bool
320 haskey(const std::string& key) const override;
321
322 const std::vector<std::string>
323 keys() const override;
324
325 bool
326 istuple() const override;
327
328 // operations
329 const std::string
330 validityerror(const std::string& path) const override;
331
336 const ContentPtr
337 shallow_simplify() const override;
338
339 const ContentPtr
340 num(int64_t axis, int64_t depth) const override;
341
342 const std::pair<Index64, ContentPtr>
343 offsets_and_flattened(int64_t axis, int64_t depth) const override;
344
345 bool
346 mergeable(const ContentPtr& other, bool mergebool) const override;
347
348 bool
349 referentially_equal(const ContentPtr& other) const override;
350
351 const ContentPtr
352 reverse_merge(const ContentPtr& other) const override;
353
354 const std::pair<ContentPtrVec, ContentPtrVec>
355 merging_strategy(const ContentPtrVec& others) const override;
356
357 const ContentPtr
358 mergemany(const ContentPtrVec& others) const override;
359
360 const SliceItemPtr
361 asslice() const override;
362
363 const ContentPtr
364 fillna(const ContentPtr& value) const override;
365
366 const ContentPtr
367 rpad(int64_t target, int64_t axis, int64_t depth) const override;
368
369 const ContentPtr
370 rpad_and_clip(int64_t target,
371 int64_t axis,
372 int64_t depth) const override;
373
374 const ContentPtr
375 reduce_next(const Reducer& reducer,
376 int64_t negaxis,
377 const Index64& starts,
378 const Index64& shifts,
379 const Index64& parents,
380 int64_t outlength,
381 bool mask,
382 bool keepdims) const override;
383
384 const ContentPtr
385 sort_next(int64_t negaxis,
386 const Index64& starts,
387 const Index64& parents,
388 int64_t outlength,
389 bool ascending,
390 bool stable) const override;
391
392 const ContentPtr
393 argsort_next(int64_t negaxis,
394 const Index64& starts,
395 const Index64& shifts,
396 const Index64& parents,
397 int64_t outlength,
398 bool ascending,
399 bool stable) const override;
400
401 const ContentPtr
402 localindex(int64_t axis, int64_t depth) const override;
403
404 const ContentPtr
405 combinations(int64_t n,
406 bool replacement,
407 const util::RecordLookupPtr& recordlookup,
408 const util::Parameters& parameters,
409 int64_t axis,
410 int64_t depth) const override;
411
412 const ContentPtr
414 const Slice& tail,
415 const Index64& advanced) const override;
416
417 const ContentPtr
419 const Slice& tail,
420 const Index64& advanced) const override;
421
422 const ContentPtr
424 const Slice& tail,
425 const Index64& advanced) const override;
426
427 const ContentPtr
429 const Slice& tail,
430 const Index64& advanced) const override;
431
432 const ContentPtr
433 getitem_next_jagged(const Index64& slicestarts,
434 const Index64& slicestops,
435 const SliceArray64& slicecontent,
436 const Slice& tail) const override;
437
438 const ContentPtr
439 getitem_next_jagged(const Index64& slicestarts,
440 const Index64& slicestops,
441 const SliceMissing64& slicecontent,
442 const Slice& tail) const override;
443
444 const ContentPtr
445 getitem_next_jagged(const Index64& slicestarts,
446 const Index64& slicestops,
447 const SliceJagged64& slicecontent,
448 const Slice& tail) const override;
449
450 const ContentPtr
451 copy_to(kernel::lib ptr_lib) const override;
452
453 const ContentPtr
454 numbers_to_type(const std::string& name) const override;
455
457 bool
458 is_unique() const override;
459
461 const ContentPtr
462 unique() const override;
463
465 bool
466 is_subrange_equal(const Index64& start, const Index64& stop) const override;
467
468 protected:
469 template <typename S>
470 const ContentPtr
472 const Index64& slicestops,
473 const S& slicecontent,
474 const Slice& tail) const;
475
476 private:
477 const IndexOf<T> tags_;
478 const IndexOf<I> index_;
479 const ContentPtrVec contents_;
480 };
481
482#ifndef AWKWARD_UNIONARRAY_NO_EXTERN_TEMPLATE
483 extern template class UnionArrayOf<int8_t, int32_t>;
484 extern template class UnionArrayOf<int8_t, uint32_t>;
485 extern template class UnionArrayOf<int8_t, int64_t>;
486#endif
487
491}
492
493#endif // AWKWARD_UNIONARRAY_H_
Abstract superclass of all array node types (flat hierarchy). Any Content can be nested within any ot...
Definition: Content.h:276
Abstract superclass of all array node forms, which expresses the nesting structure without any large ...
Definition: Content.h:39
A contiguous, one-dimensional array of integers used to represent data structures,...
Definition: Index.h:82
Form
Integer type of an Index, used by ListForm, IndexedForm, etc.
Definition: Index.h:34
Abstract class for all reducer algorithms.
Definition: Reducer.h:20
Represents an array of integers in a slice (possibly converted from an array of booleans).
Definition: Slice.h:229
Represents an integer in a tuple of slices passed to __getitem__ in Python.
Definition: Slice.h:58
Represents an array of nested lists, where the content may be SliceArrayOf, SliceMissingOf,...
Definition: Slice.h:515
Represents a SliceArrayOf, SliceMissingOf, or SliceJaggedOf with missing values: None (no equivalent ...
Definition: Slice.h:435
Represents a Python slice object (usual syntax: array[start:stop:step]).
Definition: Slice.h:93
A sequence of SliceItem objects representing a tuple passed to Python's __getitem__.
Definition: Slice.h:585
Definition: json.h:21
Represents heterogeneous data by interleaving several contents, indicating which is relevant at a giv...
Definition: UnionArray.h:128
int64_t purelist_depth() const override
The list-depth of this array, not counting any contained within a RecordArray.
const ContentPtr argsort_next(int64_t negaxis, const Index64 &starts, const Index64 &shifts, const Index64 &parents, int64_t outlength, bool ascending, bool stable) const override
This array sorted indices.
void caches(std::vector< ArrayCachePtr > &out) const override
Accumulates all the unique #ArrayCache objects from nested #VirtualArray nodes. (Uniqueness is determ...
const ContentPtr getitem_fields(const std::vector< std::string > &keys) const override
This array with the first nested RecordArray replaced by a RecordArray of a given subset of keys.
const ContentPtr getitem_next_jagged(const Index64 &slicestarts, const Index64 &slicestops, const SliceArray64 &slicecontent, const Slice &tail) const override
Internal function that propagates a jagged array (array with irregular-length dimensions) slice throu...
const ContentPtr getitem_at(int64_t at) const override
Returns the element at a given position in the array, handling negative indexing and bounds-checking ...
void setidentities() override
Assign a surrogate index of Identities to this array (in-place).
static const std::pair< IndexOf< T >, IndexOf< I > > nested_tags_index(const Index64 &offsets, const std::vector< Index64 > &counts)
Generates tags and index for concatenating arrays with axis != 0.
const std::pair< ContentPtrVec, ContentPtrVec > merging_strategy(const ContentPtrVec &others) const override
Partitions this array plus a list of others into a head sequence and a tail sequence:
const ContentPtr getitem_field(const std::string &key, const Slice &only_fields) const override
const ContentPtr getitem_next(const SliceArray64 &array, const Slice &tail, const Index64 &advanced) const override
Internal function that propagates a generic getitem request through one axis (including advanced inde...
const ContentPtr unique() const override
Returns an array where all components are unique.
const ContentPtr project(int64_t index) const
Returns all items in the array corresponding to one of the contents, in the order that they appear in...
const ContentPtr sort_next(int64_t negaxis, const Index64 &starts, const Index64 &parents, int64_t outlength, bool ascending, bool stable) const override
This array sorted.
const ContentPtr reverse_merge(const ContentPtr &other) const override
Merges a single other with this array in reverse order: other first, this last.
void tojson_part(ToJson &builder, bool include_beginendlist) const override
Internal function to produce a JSON representation one node at a time.
const ContentPtr getitem_fields(const std::vector< std::string > &keys, const Slice &only_fields) const override
int64_t length() const override
The number of elements in the array.
const ContentPtr deep_copy(bool copyarrays, bool copyindexes, bool copyidentities) const override
Copies this node and all nodes hierarchically nested within it, optionally copying the associated arr...
const std::string validityerror(const std::string &path) const override
Returns an error message if this array is invalid; otherwise, returns an empty string.
const ContentPtr getitem_next(const SliceAt &at, const Slice &tail, const Index64 &advanced) const override
Internal function that propagates a generic getitem request through one axis (including advanced inde...
bool istuple() const override
Returns true if the outermost RecordArray is a tuple.
const IndexOf< T > tags() const
Small integers indicating which of the contents to draw from for each element of the heterogeneous ar...
const ContentPtr getitem_nothing() const override
Internal function to get an empty slice (with the correct type).
kernel::lib kernels() const override
Returns the kernel library enum for all nested ptr_lib within the array's tree structure....
const ContentPtr simplify_uniontype(bool merge, bool mergebool) const
If any of the contents is also a UnionArray, combine this array and its contents into a single-level ...
const ContentPtr carry(const Index64 &carry, bool allow_lazy) const override
Returns an array of the same type with elements filtered, rearranged, and possibly duplicated by the ...
const ContentPtr combinations(int64_t n, bool replacement, const util::RecordLookupPtr &recordlookup, const util::Parameters &parameters, int64_t axis, int64_t depth) const override
Tuples or records of all n-tuple combinations of list items at some axis depth.
void setidentities(const IdentitiesPtr &identities) override
Assign a specified set of Identities to this array (in-place).
int64_t numcontents() const
The number of contents.
static const IndexOf< I > sparse_index(int64_t len)
Generates an index in which index[i] = i.
const ContentPtr localindex(int64_t axis, int64_t depth) const override
A (possibly nested) array of integers indicating the positions of elements within each nested list.
const ContentPtr getitem_range_nowrap(int64_t start, int64_t stop) const override
Subinterval of this array, without handling negative indexing or bounds-checking.
const std::pair< Index64, ContentPtr > offsets_and_flattened(int64_t axis, int64_t depth) const override
Returns (a) an offsets Index and (b) a flattened version of the array at some axis depth.
int64_t fieldindex(const std::string &key) const override
The position of a tuple or record key name if this array contains a RecordArray.
void check_for_iteration() const override
Performs up-front validity checks on an array so that they don't have to be checked in getitem_at_now...
const std::vector< std::string > keys() const override
A list of RecordArray keys or an empty list if this array does not contain a RecordArray.
const ContentPtr getitem_next_jagged_generic(const Index64 &slicestarts, const Index64 &slicestops, const S &slicecontent, const Slice &tail) const
const ContentPtr getitem_field(const std::string &key) const override
This array with the first nested RecordArray replaced by the field at key.
const std::pair< bool, int64_t > branch_depth() const override
Returns (a) whether the list-depth of this array "branches," or differs when followed through differe...
const ContentPtr rpad(int64_t target, int64_t axis, int64_t depth) const override
If axis = 0, returns a view of this array padded on the right with None values to have a minimum leng...
const ContentPtr getitem_next(const SliceItemPtr &head, const Slice &tail, const Index64 &advanced) const override
Internal function that propagates a generic getitem request through one axis (including advanced inde...
const ContentPtr getitem_at_nowrap(int64_t at) const override
Returns the element at a given position in the array, without handling negative indexing or bounds-ch...
void nbytes_part(std::map< size_t, int64_t > &largest) const override
Internal function used to calculate nbytes.
const ContentPtr mergemany(const ContentPtrVec &others) const override
Returns an array with this and the others concatenated (in order, this first, others last).
const std::string key(int64_t fieldindex) const override
The record name associated with a given field index or the tuple index as a string (e....
const ContentPtr content(int64_t index) const
Returns contents[index].
const ContentPtr shallow_copy() const override
Copies this node without copying any nodes hierarchically nested within it or any array/index/identit...
const std::pair< int64_t, int64_t > minmax_depth() const override
Returns (a) the minimum list-depth and (b) the maximum list-depth of the array, which can differ if t...
bool is_unique() const override
Returns 'true' if all components of the array are unique.
const FormPtr form(bool materialize) const override
Low-level Form describing all the features of this array except the actual data buffers (Index,...
const SliceItemPtr asslice() const override
Converts this array into a SliceItem that can be used in getitem.
const std::string tostring_part(const std::string &indent, const std::string &pre, const std::string &post) const override
Internal function to build an output string for tostring.
const ContentPtr rpad_and_clip(int64_t target, int64_t axis, int64_t depth) const override
If axis = 0, returns a view of this array padded on the right.
const ContentPtr getitem_next(const SliceJagged64 &jagged, const Slice &tail, const Index64 &advanced) const override
Internal function that propagates a generic getitem request through one axis (including advanced inde...
const ContentPtr getitem_range(int64_t start, int64_t stop) const override
Subinterval of this array, handling negative indexing and bounds-checking like Python.
bool is_subrange_equal(const Index64 &start, const Index64 &stop) const override
Returns 'true' if subranges are equal.
const ContentPtr fillna(const ContentPtr &value) const override
Returns this array with None values replaced by a given value.
int64_t numfields() const override
The number of fields in the first nested tuple or records or -1 if this array does not contain a Reco...
bool referentially_equal(const ContentPtr &other) const override
Returns true if this array has all the same buffers and parameters as other; false otherwise.
const TypePtr type(const util::TypeStrs &typestrs) const override
High-level Type describing this array.
const ContentPtr num(int64_t axis, int64_t depth) const override
The length of this array (as a NumpyArray scalar) if axis = 0 or the lengths of subarrays (as an arra...
bool mergeable(const ContentPtr &other, bool mergebool) const override
Returns true if this array can be merged with the other; false otherwise.
bool haskey(const std::string &key) const override
Returns true if the array contains a RecordArray with the specified key; false otherwise.
const ContentPtr getitem_next_jagged(const Index64 &slicestarts, const Index64 &slicestops, const SliceJagged64 &slicecontent, const Slice &tail) const override
Internal function that propagates a jagged array (array with irregular-length dimensions) slice throu...
const ContentPtr getitem_next_jagged(const Index64 &slicestarts, const Index64 &slicestops, const SliceMissing64 &slicecontent, const Slice &tail) const override
Internal function that propagates a jagged array (array with irregular-length dimensions) slice throu...
const ContentPtr getitem_next(const SliceRange &range, const Slice &tail, const Index64 &advanced) const override
Internal function that propagates a generic getitem request through one axis (including advanced inde...
const ContentPtr shallow_simplify() const override
Returns an equivalent array simplified at one level only using simplify_optiontype if an option-type ...
const IndexOf< I > index() const
Positions within the contents to find each item.
const ContentPtr reduce_next(const Reducer &reducer, int64_t negaxis, const Index64 &starts, const Index64 &shifts, const Index64 &parents, int64_t outlength, bool mask, bool keepdims) const override
This array with one axis removed by applying a Reducer (e.g. "sum", "max", "any", "all).
const ContentPtrVec contents() const
std::vector of Content instances representing each of the possible types.
const ContentPtr numbers_to_type(const std::string &name) const override
Change the leaf types to 'totype'.
static const IndexOf< I > regular_index(const IndexOf< T > &tags)
Generates an index in which index[tags == i][i] = i.
const std::string classname() const override
User-friendly name of this class: "UnionArray8_32", "UnionArray8_U32", or "UnionArray8_64".
const ContentPtr copy_to(kernel::lib ptr_lib) const override
Recursively copies components of the array from main memory to a GPU (if ptr_lib == kernel::lib::cuda...
Form describing UnionArray.
Definition: UnionArray.h:19
int64_t purelist_depth() const override
The list-depth of this array, not counting any contained within a RecordForm.
const FormPtr with_form_key(const FormKey &form_key) const override
Copies this node, adding or replacing a form_key.
Index::Form tags() const
bool equal(const FormPtr &other, bool check_identities, bool check_parameters, bool check_form_key, bool compatibility_check) const override
Returns true if this Form is equal to the other Form; false otherwise.
bool istuple() const override
Returns true if the outermost RecordArray is a tuple.
const FormPtr shallow_copy() const override
Copies this node without copying any nodes hierarchically nested within it.
const FormPtr content(int64_t index) const
const std::vector< FormPtr > contents() const
void tojson_part(ToJson &builder, bool verbose) const override
Internal function to produce a JSON representation one node at a time.
int64_t numcontents() const
const FormPtr getitem_fields(const std::vector< std::string > &keys) const override
Returns the Form that would result from a fields-slice.
int64_t fieldindex(const std::string &key) const override
The position of a tuple or record key name if this array contains a RecordForm.
const std::vector< std::string > keys() const override
A list of RecordArray keys or an empty list if this array does not contain a RecordArray.
const std::pair< bool, int64_t > branch_depth() const override
Returns (a) whether the list-depth of this array "branches," or differs when followed through differe...
const std::string key(int64_t fieldindex) const override
The record name associated with a given field index or the tuple index as a string (e....
const std::pair< int64_t, int64_t > minmax_depth() const override
Returns (a) the minimum list-depth and (b) the maximum list-depth of the array, which can differ if t...
const FormPtr getitem_field(const std::string &key) const override
Returns the Form that would result from a field-slice.
Index::Form index() const
UnionForm(bool has_identities, const util::Parameters &parameters, const FormKey &form_key, Index::Form tags, Index::Form index, const std::vector< FormPtr > &contents)
Creates a UnionForm. See UnionArray for documentation.
int64_t numfields() const override
The number of fields in the first nested tuple or records or -1 if this array does not contain a Reco...
const TypePtr type(const util::TypeStrs &typestrs) const override
High-level Type describing this Form.
const std::string purelist_parameter(const std::string &key) const override
The parameter associated with key at the first level that has a non-null value, descending only as de...
bool haskey(const std::string &key) const override
Returns true if the array contains a RecordForm with the specified key; false otherwise.
bool purelist_isregular() const override
Returns true if all nested lists down to the first RecordForm are RegularForm nodes; false otherwise.
bool dimension_optiontype() const override
Returns true if this dimension has option-type; false otherwise.
#define LIBAWKWARD_EXPORT_SYMBOL
Definition: common.h:45
int64_t len(const T &self)
Definition: content.h:46
lib
Definition: kernel-dispatch.h:20
std::map< std::string, std::string > Parameters
Definition: util.h:165
std::shared_ptr< RecordLookup > RecordLookupPtr
Definition: util.h:130
std::map< std::string, std::string > TypeStrs
Definition: util.h:215
Definition: BitMaskedArray.h:15
std::shared_ptr< std::string > FormKey
Definition: Content.h:19
std::vector< std::shared_ptr< Content > > ContentPtrVec
Definition: Content.h:16
std::shared_ptr< SliceItem > SliceItemPtr
Definition: Slice.h:15
std::shared_ptr< Content > ContentPtr
Definition: Content.h:15
std::shared_ptr< Form > FormPtr
Definition: Content.h:18
std::shared_ptr< Type > TypePtr
Definition: Content.h:23
std::shared_ptr< Identities > IdentitiesPtr
Definition: Identities.h:16