Scanner C++ API
sampler.h
1 /* Copyright 2016 Carnegie Mellon University
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #pragma once
17 
18 #include "scanner/engine/metadata.h"
19 #include "scanner/engine/table_meta_cache.h"
20 #include "scanner/util/common.h"
21 #include "scanner/util/profiler.h"
22 
23 #include <vector>
24 
25 namespace scanner {
26 namespace internal {
27 
28 /* Types of sampling
29  - All: selects all rows from the table
30  - Stride: selects every Nth row (with optional offset)
31  - Range: select all rows within [start, end)
32  - Strided Range: select every Nth row within [start, end)
33  - Gather: select arbitrary set of rows
34 
35  Requiring access to more than metadata:
36  - Filter: select all rows where some predicate holds on one of the columns
37  */
38 
40  public:
41  DomainSampler(const std::string& name)
42  : name_(name) {}
43 
44  virtual ~DomainSampler() {}
45 
46  const std::string& name() const { return name_; }
47 
48  virtual Result validate() = 0;
49 
50  virtual Result get_upstream_rows(const std::vector<i64>& downstream_rows,
51  std::vector<i64>& upstream_rows) const = 0;
52 
53  virtual Result get_num_downstream_rows(
54  i64 num_upstream_rows,
55  i64& num_downstream_rows) const = 0;
56 
57  virtual Result get_downstream_rows(
58  const std::vector<i64>& upstream_rows,
59  std::vector<i64>& downstream_rows,
60  std::vector<i64>& downstream_upstream_mapping) const = 0;
61 
62  protected:
63  std::string name_;
64 };
65 
66 Result
67 make_domain_sampler_instance(const std::string& sampler_type,
68  const std::vector<u8>& sampler_args,
69  DomainSampler*& sampler);
70 
72  std::vector<i64> rows;
73 };
74 
75 class Partitioner {
76  public:
77  Partitioner(const std::string& name, i64 num_rows)
78  : name_(name), num_rows_(num_rows) {}
79 
80  virtual ~Partitioner() {}
81 
82  const std::string& name() const { return name_; }
83 
84  virtual Result validate() = 0;
85 
86  virtual i64 total_rows() const = 0;
87 
88  virtual i64 total_groups() const = 0;
89 
90  virtual std::vector<i64> total_rows_per_group() const = 0;
91 
92  virtual PartitionGroup next_group() = 0;
93 
94  virtual void reset() = 0;
95 
96  virtual PartitionGroup group_at(i64 group_idx) = 0;
97 
98  virtual i64 offset_at_group(i64 group_idx) const = 0;
99 
100  protected:
101  std::string name_;
102  i64 num_rows_;
103 };
104 
105 Result make_partitioner_instance(const std::string& sampler_type,
106  const std::vector<u8>& sampler_args,
107  i64 num_rows, Partitioner*& partitioner);
108 }
109 }
Definition: sampler.h:75
Definition: sampler.h:71
Definition: database.cpp:36
Definition: sampler.h:39