Scanner C++ API
metadata.h
1 /* Copyright 2016 Carnegie Mellon University
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #pragma once
17 
18 #include "scanner/util/common.h"
19 #include "scanner/util/storehouse.h"
20 #include "storehouse/storage_backend.h"
21 
22 #include <set>
23 
24 namespace scanner {
25 namespace internal {
26 
29 const std::string& get_database_path();
30 
31 void set_database_path(std::string path);
32 
33 const std::string get_scanner_path();
34 
35 void set_scanner_path(std::string path);
36 
37 inline std::string database_metadata_path() {
38  return get_database_path() + "db_metadata.bin";
39 }
40 
41 inline std::string table_megafile_path() {
42  return get_database_path() + "table_megafile.bin";
43 }
44 
45 inline std::string table_directory(i32 table_id) {
46  return get_database_path() + "tables/" + std::to_string(table_id);
47 }
48 
49 inline std::string table_descriptor_path(i32 table_id) {
50  return table_directory(table_id) + "/descriptor.bin";
51 }
52 
53 inline std::string table_item_output_path(i32 table_id, i32 column_id,
54  i32 item_id) {
55  return table_directory(table_id) + "/" + std::to_string(column_id) + "_" +
56  std::to_string(item_id) + ".bin";
57 }
58 
59 inline std::string table_item_video_metadata_path(i32 table_id, i32 column_id,
60  i32 item_id) {
61  return table_directory(table_id) + "/" + std::to_string(column_id) + "_" +
62  std::to_string(item_id) + "_video_metadata.bin";
63 }
64 
65 inline std::string table_item_metadata_path(i32 table_id, i32 column_id,
66  i32 item_id) {
67  return table_directory(table_id) + "/" + std::to_string(column_id) + "_" +
68  std::to_string(item_id) + "_metadata.bin";
69 }
70 
71 inline std::string bulk_job_directory(i32 bulk_job_id) {
72  return get_database_path() + "jobs/" + std::to_string(bulk_job_id);
73 }
74 
75 inline std::string bulk_job_descriptor_path(i32 bulk_job_id) {
76  return bulk_job_directory(bulk_job_id) + "/descriptor.bin";
77 }
78 
79 inline std::string bulk_job_master_profiler_path(i32 bulk_job_id) {
80  return bulk_job_directory(bulk_job_id) + "/profile_master.bin";
81 }
82 
83 inline std::string bulk_job_worker_profiler_path(i32 bulk_job_id, i32 node) {
84  return bulk_job_directory(bulk_job_id) + "/profile_" + std::to_string(node) +
85  ".bin";
86 }
87 
90 
91 template <typename T>
92 class Metadata {
93  public:
94  using Descriptor = T;
95  Metadata() {}
96  Metadata(const Descriptor& d) : descriptor_(d) {}
97 
98  Descriptor& get_descriptor() const { return descriptor_; }
99 
100  std::string descriptor_path() const;
101 
102  protected:
103  mutable Descriptor descriptor_;
104 };
105 
106 class DatabaseMetadata : public Metadata<proto::DatabaseDescriptor> {
107  public:
109  DatabaseMetadata(const Descriptor& descriptor);
110 
111  const Descriptor& get_descriptor() const;
112 
113  static std::string descriptor_path();
114 
115  std::vector<std::string> table_names() const;
116 
117  bool has_table(const std::string& table) const;
118  bool has_table(i32 table_id) const;
119  i32 get_table_id(const std::string& table) const;
120  const std::string& get_table_name(i32 table_id) const;
121  i32 add_table(const std::string& table);
122  void commit_table(i32 table_id);
123  bool table_is_committed(i32 table_id) const;
124  void remove_table(i32 table_id);
125 
126  const std::vector<std::string>& bulk_job_names() const;
127 
128  bool has_bulk_job(const std::string& job) const;
129  bool has_bulk_job(i32 job_id) const;
130  i32 get_bulk_job_id(const std::string& job_name) const;
131  const std::string& get_bulk_job_name(i32 job_id) const;
132  i32 add_bulk_job(const std::string& job_name);
133  void commit_bulk_job(i32 job_id);
134  bool bulk_job_is_committed(i32 job_id) const;
135  void remove_bulk_job(i32 job_id);
136 
137  private:
138  i32 next_table_id_;
139  i32 next_bulk_job_id_;
140  std::unordered_map<i32, std::string> table_id_names_;
141  std::unordered_map<std::string, i32> table_name_ids_;
142  std::unordered_map<i32, bool> table_committed_;
143 
144  std::unordered_map<i32, std::string> bulk_job_id_names_;
145  std::unordered_map<i32, bool> bulk_job_committed_;
146 };
147 
148 class VideoMetadata : public Metadata<proto::VideoDescriptor> {
149  public:
150  VideoMetadata();
151  VideoMetadata(const Descriptor& descriptor);
152 
153  static std::string descriptor_path(i32 table_id, i32 column_id, i32 item_id);
154 
155  i32 table_id() const;
156  i32 column_id() const;
157  i32 item_id() const;
158  i32 frames() const;
159  i32 width() const;
160  i32 height() const;
161  i32 channels() const;
162  proto::FrameType frame_type() const;
163  proto::VideoDescriptor::VideoCodecType codec_type() const;
164  i64 num_encoded_videos() const;
165  std::vector<i64> frames_per_video() const;
166  std::vector<i64> keyframes_per_video() const;
167  std::vector<i64> size_per_video() const;
168 
169  std::vector<u64> keyframe_indices() const;
170  std::vector<u64> sample_offsets() const;
171  std::vector<u64> sample_sizes() const;
172  std::vector<u8> metadata() const;
173  std::string data_path() const;
174  bool inplace() const;
175 };
176 
178  : public Metadata<proto::ImageFormatGroupDescriptor> {
179  public:
181  ImageFormatGroupMetadata(const Descriptor& descriptor);
182 
183  i32 num_images() const;
184  i32 width() const;
185  i32 height() const;
186  ImageEncodingType encoding_type() const;
187  ImageColorSpace color_space() const;
188  std::vector<i64> compressed_sizes() const;
189 };
190 
191 class BulkJobMetadata : public Metadata<proto::BulkJobDescriptor> {
192  public:
193  BulkJobMetadata();
194  BulkJobMetadata(const Descriptor& job);
195 
196  static std::string descriptor_path(i32 job_id);
197 
198  i32 id() const;
199 
200  std::string name() const;
201 
202  i32 io_packet_size() const;
203 
204  i32 work_packet_size() const;
205 
206  i32 num_nodes() const;
207 
208  const std::vector<proto::Column>& columns() const;
209 
210  i32 column_id(const std::string& column_name) const;
211 
212  // i64 rows_in_table(const std::string& name) const;
213 
214  // i64 total_rows() const;
215 
216  private:
217  std::vector<Column> columns_;
218  std::unordered_map<std::string, i32> column_ids_;
219  std::vector<std::string> table_names_;
220  mutable std::unordered_map<std::string, i64> rows_in_table_;
221 };
222 
223 class TableMetadata : public Metadata<proto::TableDescriptor> {
224  public:
225  TableMetadata();
226  TableMetadata(const Descriptor& table);
227 
228  static std::string descriptor_path(i32 table_id);
229 
230  i32 id() const;
231 
232  std::string name() const;
233 
234  i64 num_rows() const;
235 
236  std::vector<i64> end_rows() const;
237 
238  const std::vector<proto::Column>& columns() const;
239 
240  bool has_column(const std::string& name) const;
241 
242  std::string column_name(i32 column_id) const;
243 
244  i32 column_id(const std::string& name) const;
245 
246  ColumnType column_type(i32 column_id) const;
247 
248  private:
249  std::vector<proto::Column> columns_;
250 };
251 
254 
255 inline std::string index_column_name() { return "index"; }
256 
257 inline std::string frame_column_name() { return "frame"; }
258 
259 inline std::string frame_info_column_name() { return "frame_info"; }
260 
263 
264 template <typename T>
265 void serialize_db_proto(storehouse::WriteFile* file, const T& descriptor) {
266  size_t size = descriptor.ByteSizeLong();
267  std::vector<u8> data(size);
268  descriptor.SerializeToArray(data.data(), size);
269  s_write(file, data.data(), size);
270 }
271 
272 template <typename T>
273 T deserialize_db_proto(storehouse::RandomReadFile* file, u64& pos) {
274  T descriptor;
275  uint64_t size;
276  BACKOFF_FAIL(file->get_size(size),
277  "while trying to get size for " + file->path());
278  std::vector<u8> data = storehouse::read_entire_file(
279  file, pos, std::max((size_t)size, (size_t)1024 * 1024));
280  descriptor.ParseFromArray(data.data(), data.size());
281  return descriptor;
282 }
283 
284 template <typename T>
285 void write_db_proto(storehouse::StorageBackend* storage, T db_proto) {
286  std::unique_ptr<storehouse::WriteFile> output_file;
287  const std::string& desc_path =
288  db_proto.Metadata<typename T::Descriptor>::descriptor_path();
289  BACKOFF_FAIL(make_unique_write_file(storage, desc_path, output_file),
290  "while trying to make write file for " + desc_path);
291  serialize_db_proto<typename T::Descriptor>(output_file.get(),
292  db_proto.get_descriptor());
293  BACKOFF_FAIL(output_file->save(),
294  "while trying to save " + output_file->path());
295 }
296 
297 template <typename T>
298 T read_db_proto(storehouse::StorageBackend* storage, const std::string& path) {
299  std::unique_ptr<storehouse::RandomReadFile> db_in_file;
300  BACKOFF_FAIL(make_unique_random_read_file(storage, path, db_in_file),
301  "while trying to make read file for " + path);
302  u64 pos = 0;
303  return T(deserialize_db_proto<typename T::Descriptor>(db_in_file.get(), pos));
304 }
305 
306 template <typename T>
307 using WriteFn = void (*)(storehouse::StorageBackend* storage, T db_proto);
308 
309 template <typename T>
310 using ReadFn = T (*)(storehouse::StorageBackend* storage,
311  const std::string& path);
312 
313 constexpr WriteFn<DatabaseMetadata> write_database_metadata =
314  write_db_proto<DatabaseMetadata>;
315 constexpr ReadFn<DatabaseMetadata> read_database_metadata =
316  read_db_proto<DatabaseMetadata>;
317 
318 void write_table_megafile(
319  storehouse::StorageBackend* storage,
320  const std::unordered_map<i32, TableMetadata>& table_metadata);
321 
322 void read_table_megafile(
323  storehouse::StorageBackend* storage,
324  std::unordered_map<i32, TableMetadata>& table_metadata);
325 
326 constexpr WriteFn<BulkJobMetadata> write_bulk_job_metadata =
327  write_db_proto<BulkJobMetadata>;
328 constexpr ReadFn<BulkJobMetadata> read_bulk_job_metadata =
329  read_db_proto<BulkJobMetadata>;
330 
331 constexpr WriteFn<TableMetadata> write_table_metadata =
332  write_db_proto<TableMetadata>;
333 constexpr ReadFn<TableMetadata> read_table_metadata =
334  read_db_proto<TableMetadata>;
335 
336 constexpr WriteFn<VideoMetadata> write_video_metadata =
337  write_db_proto<VideoMetadata>;
338 constexpr ReadFn<VideoMetadata> read_video_metadata =
339  read_db_proto<VideoMetadata>;
340 }
341 }
Definition: metadata.h:191
std::string index_column_name()
Constants.
Definition: metadata.h:255
Definition: metadata.h:148
Definition: database.cpp:36
Definition: metadata.h:223
Definition: metadata.h:106
const std::string & get_database_path()
Path functions.
Definition: metadata.cpp:430
void serialize_db_proto(storehouse::WriteFile *file, const T &descriptor)
Helpers.
Definition: metadata.h:265
Common persistent data structs and their serialization helpers.
Definition: metadata.h:92