scannertools package

Submodules

scannertools.audio module

class scannertools.audio.Audio(audio_path)

Bases: object

Reference to an audio file on disk.

extract(path=None, ext='.wav', segment=None)
path()

scannertools.bboxes module

class scannertools.bboxes.BboxNMS(config)

Bases: scannerpy.kernel.Kernel

execute(*input_columns) → bytes

scannertools.clothing_detection module

class scannertools.clothing_detection.Clothing(predictions)

Bases: object

class scannertools.clothing_detection.ClothingDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

additional_sources = ['bboxes']
build_pipeline(adjust_bboxes=True)
fetch_resources()
job_suffix = 'clothing'
parser_fn()
run_opts = {'pipeline_instances_per_node': 1}
class scannertools.clothing_detection.DetectClothing(config)

Bases: scannertools.clothing_detection.TorchKernel

detect_edge_text(img, start_y=40)
execute(frame: scannerpy.common.FrameType, bboxes: bytes) → bytes
class scannertools.clothing_detection.TorchKernel(config)

Bases: scannerpy.kernel.Kernel

build_model()
close()
execute()
images_to_tensor(images)
scannertools.clothing_detection.parse_clothing(s, _proto)

scannertools.face_detection module

class scannertools.face_detection.FaceDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

build_pipeline()
fetch_resources()
job_suffix = 'face'
parser_fn()
run_opts = {'pipeline_instances_per_node': 1}
class scannertools.face_detection.MTCNNDetectFaces(config)

Bases: scannerpy.stdlib.tensorflow.TensorFlowKernel

build_graph()
execute(frame: scannerpy.common.FrameType) → bytes

scannertools.face_embedding module

class scannertools.face_embedding.EmbedFaces(config)

Bases: scannerpy.stdlib.tensorflow.TensorFlowKernel

build_graph()
execute(frame: scannerpy.common.FrameType, bboxes: bytes) → bytes
class scannertools.face_embedding.FaceEmbeddingPipeline(db)

Bases: scannertools.prelude.Pipeline

additional_sources = ['bboxes']
build_pipeline()
fetch_resources()
job_suffix = 'embed'
parser_fn()
run_opts = {'pipeline_instances_per_node': 1}

scannertools.gender_detection module

class scannertools.gender_detection.DetectGender(config)

Bases: scannerpy.kernel.Kernel

execute(frame: scannerpy.common.FrameType, bboxes: bytes) → bytes
class scannertools.gender_detection.GenderDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

additional_sources = ['bboxes']
build_pipeline()
fetch_resources()
job_suffix = 'gender'
parser_fn()
run_opts = {'pipeline_instances_per_node': 1}

scannertools.kube module

class scannertools.kube.CloudConfig(project: str, service_key: str = NOTHING, storage_key_id: str = NOTHING, storage_key_secret: str = NOTHING)

Bases: object

class scannertools.kube.Cluster(cloud_config, cluster_config)

Bases: object

cli()
create_object(template)
database()
delete()
get_by_owner(ty, owner, namespace='default')
get_credentials()
get_kube_info(kind, namespace='default')
get_object(info, name)
get_pod(deployment, namespace='default')
job_status()
make_container(name)
make_deployment(name, replicas)
master_address()
resize(size)
running(pool='default-pool')
start(reset=True)
class scannertools.kube.ClusterConfig(id: str, num_workers: int, master: scannertools.kube.MachineConfig, worker: scannertools.kube.MachineConfig, zone: str = 'us-east1-b', kube_version: str = '1.9.7-gke.3', workers_per_node: int = 1, autoscale: bool = False, no_workers_timeout: int = 600, scopes: list = ['https://www.googleapis.com/auth/compute', 'https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write', 'https://www.googleapis.com/auth/monitoring', 'https://www.googleapis.com/auth/pubsub', 'https://www.googleapis.com/auth/servicecontrol', 'https://www.googleapis.com/auth/service.management.readonly', 'https://www.googleapis.com/auth/trace.append'], scanner_config: str = '/root/.scanner/config.toml', pipelines: list = [])

Bases: object

class scannertools.kube.MachineConfig(type: scannertools.kube.MachineType, disk: int, gpu: int = 0, gpu_type: str = 'nvidia-tesla-p100', preemptible: bool = False)

Bases: object

price()
class scannertools.kube.MachineType

Bases: abc.ABC

get_cpu()
get_mem()
get_name()
class scannertools.kube.MachineTypeName(name: str)

Bases: scannertools.kube.MachineType

get_cpu()
get_mem()
get_name()
class scannertools.kube.MachineTypeQuantity(cpu: int, mem: int)

Bases: scannertools.kube.MachineType

get_cpu()
get_mem()
get_name()
scannertools.kube.master()
scannertools.kube.run(s, detach=False)
scannertools.kube.worker()

scannertools.object_detection module

class scannertools.object_detection.DetectObjects(config)

Bases: scannerpy.stdlib.tensorflow.TensorFlowKernel

build_graph()
execute(frame: scannerpy.common.FrameType) → bytes
class scannertools.object_detection.ObjectDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

Detects objects in a video.

Uses the SSD-Mobilenet architecture from the TensorFlow Object Detection API.

build_pipeline()
fetch_resources()
job_suffix = 'objdet'
parser_fn()
run_opts = {'pipeline_instances_per_node': 1}

scannertools.optical_flow module

class scannertools.optical_flow.OpticalFlowPipeline(db)

Bases: scannertools.prelude.Pipeline

Computes optical flow on a video.

Unlike other functions, flow fields aren’t materialized into memory as they’re simply too large.

build_pipeline()
job_suffix = 'flow'
parser_fn()

scannertools.pose_detection module

class scannertools.pose_detection.PoseDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

build_pipeline()
fetch_resources()
job_suffix = 'pose'
parser_fn()
run_opts = {'work_packet_size': 8}

scannertools.prelude module

class scannertools.prelude.BoundOp(op, args)

Bases: object

class scannertools.prelude.CustomFormatter(fmt=None, datefmt=None, style='%')

Bases: logging.Formatter

format(record)

Format the specified record as text.

The record’s attribute dictionary is used as the operand to a string formatting operation which yields the returned string. Before formatting the dictionary, a couple of preparatory steps are carried out. The message attribute of the record is computed using LogRecord.getMessage(). If the formatting string uses the time (as determined by a call to usesTime(), formatTime() is called to format the event time. If there is exception information, it is formatted using formatException() and appended to the message.

class scannertools.prelude.DataSource

Bases: abc.ABC

load()
scanner_args(db)
scanner_source(db)
class scannertools.prelude.Pipeline(db)

Bases: abc.ABC

additional_sources = []
base_sources = ['videos', 'frames']
build_pipeline()
build_sink()
build_sources(videos=None, frames=None, **kwargs)
execute(source_args={}, pipeline_args={}, sink_args={}, output_args={}, run_opts={}, no_execute=False, cpu_only=False)
fetch_resources()
job_suffix = None
classmethod make_runner()
parse_output()
parser_fn = None
run_opts = {}
class scannertools.prelude.ScannerColumn(column, parser)

Bases: scannertools.prelude.DataSource

load()
scanner_args(db)
scanner_source(db)
class scannertools.prelude.VideoOutputPipeline(db)

Bases: scannertools.prelude.Pipeline

parse_output(paths=None)
class scannertools.prelude.WithMany(*args)

Bases: object

scannertools.prelude.ffmpeg_extract(input_path, output_ext=None, output_path=None, segment=None)
scannertools.prelude.ffmpeg_fmt_time(t)
scannertools.prelude.get_storage()
scannertools.prelude.imwrite(path, img)
scannertools.prelude.init_storage(bucket=None)
scannertools.prelude.par_for(f, l, process=False, workers=None, progress=True)
scannertools.prelude.sample_video(delete=True)
scannertools.prelude.scanner_ingest(db, videos)
scannertools.prelude.tile(imgs, rows=None, cols=None)
scannertools.prelude.try_import(to_import, current_module)

scannertools.shot_detection module

class scannertools.shot_detection.ShotDetectionPipeline(db)

Bases: scannertools.prelude.Pipeline

build_pipeline()
job_suffix = 'hist'
parse_output()
parser_fn()

scannertools.tf_vis_utils module

A set of functions that are used for visualization.

These functions often receive an image, perform some visualization on the image. The functions do not return a value, instead they modify the image itself.

scannertools.tf_vis_utils.create_category_index(categories)

Creates dictionary of COCO compatible categories keyed by category id.

Parameters:categories

a list of dicts, each of which has the following keys: ‘id’: (required) an integer id uniquely identifying this category. ‘name’: (required) string representing category name

e.g., ‘cat’, ‘dog’, ‘pizza’.
Returns:
a dict containing the same entries as categories, but keyed
by the ‘id’ field of each category.
Return type:category_index
scannertools.tf_vis_utils.draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color='red', thickness=4, display_str_list=(), use_normalized_coordinates=True)

Adds a bounding box to an image.

Each string in display_str_list is displayed on a separate line above the bounding box in black text on a rectangle filled with the input ‘color’. If the top of the bounding box extends to the edge of the image, the strings are displayed below the bounding box.

Parameters:
  • image – a PIL.Image object.
  • ymin – ymin of bounding box.
  • xmin – xmin of bounding box.
  • ymax – ymax of bounding box.
  • xmax – xmax of bounding box.
  • color – color to draw bounding box. Default is red.
  • thickness – line thickness. Default value is 4.
  • display_str_list – list of strings to display in box (each to be shown on its own line).
  • use_normalized_coordinates – If True (default), treat coordinates ymin, xmin, ymax, xmax as relative to the image. Otherwise treat coordinates as absolute.
scannertools.tf_vis_utils.draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color='red', thickness=4, display_str_list=(), use_normalized_coordinates=True)

Adds a bounding box to an image (numpy array).

Parameters:
  • image – a numpy array with shape [height, width, 3].
  • ymin – ymin of bounding box in normalized coordinates (same below).
  • xmin – xmin of bounding box.
  • ymax – ymax of bounding box.
  • xmax – xmax of bounding box.
  • color – color to draw bounding box. Default is red.
  • thickness – line thickness. Default value is 4.
  • display_str_list – list of strings to display in box (each to be shown on its own line).
  • use_normalized_coordinates – If True (default), treat coordinates ymin, xmin, ymax, xmax as relative to the image. Otherwise treat coordinates as absolute.
scannertools.tf_vis_utils.draw_bounding_boxes_on_image(image, boxes, color='red', thickness=4, display_str_list_list=())

Draws bounding boxes on image.

Parameters:
  • image – a PIL.Image object.
  • boxes – a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). The coordinates are in normalized format between [0, 1].
  • color – color to draw bounding box. Default is red.
  • thickness – line thickness. Default value is 4.
  • display_str_list_list – list of list of strings. a list of strings for each bounding box. The reason to pass a list of strings for a bounding box is that it might contain multiple labels.
Raises:

ValueError – if boxes is not a [N, 4] array

scannertools.tf_vis_utils.draw_bounding_boxes_on_image_array(image, boxes, color='red', thickness=4, display_str_list_list=())

Draws bounding boxes on image (numpy array).

Parameters:
  • image – a numpy array object.
  • boxes – a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). The coordinates are in normalized format between [0, 1].
  • color – color to draw bounding box. Default is red.
  • thickness – line thickness. Default value is 4.
  • display_str_list_list – list of list of strings. a list of strings for each bounding box. The reason to pass a list of strings for a bounding box is that it might contain multiple labels.
Raises:

ValueError – if boxes is not a [N, 4] array

scannertools.tf_vis_utils.draw_bounding_boxes_on_image_tensors(images, boxes, classes, scores, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)

Draws bounding boxes on batch of image tensors.

Parameters:
  • images – A 4D uint8 image tensor of shape [N, H, W, C].
  • boxes – [N, max_detections, 4] float32 tensor of detection boxes.
  • classes – [N, max_detections] int tensor of detection classes. Note that classes are 1-indexed.
  • scores – [N, max_detections] float32 tensor of detection scores.
  • category_index – a dict that maps integer ids to category dicts. e.g. {1: {1: ‘dog’}, 2: {2: ‘cat’}, …}
  • max_boxes_to_draw – Maximum number of boxes to draw on an image. Default 20.
  • min_score_thresh – Minimum score threshold for visualization. Default 0.2.
Returns:

4D image tensor of type uint8, with boxes drawn on top.

scannertools.tf_vis_utils.draw_keypoints_on_image(image, keypoints, color='red', radius=2, use_normalized_coordinates=True)

Draws keypoints on an image.

Parameters:
  • image – a PIL.Image object.
  • keypoints – a numpy array with shape [num_keypoints, 2].
  • color – color to draw the keypoints with. Default is red.
  • radius – keypoint radius. Default value is 2.
  • use_normalized_coordinates – if True (default), treat keypoint values as relative to the image. Otherwise treat them as absolute.
scannertools.tf_vis_utils.draw_keypoints_on_image_array(image, keypoints, color='red', radius=2, use_normalized_coordinates=True)

Draws keypoints on an image (numpy array).

Parameters:
  • image – a numpy array with shape [height, width, 3].
  • keypoints – a numpy array with shape [num_keypoints, 2].
  • color – color to draw the keypoints with. Default is red.
  • radius – keypoint radius. Default value is 2.
  • use_normalized_coordinates – if True (default), treat keypoint values as relative to the image. Otherwise treat them as absolute.
scannertools.tf_vis_utils.draw_mask_on_image_array(image, mask, color='red', alpha=0.7)

Draws mask on an image.

Parameters:
  • image – uint8 numpy array with shape (img_height, img_height, 3)
  • mask – a uint8 numpy array of shape (img_height, img_height) with values between either 0 or 1.
  • color – color to draw the keypoints with. Default is red.
  • alpha – transparency value between 0 and 1. (default: 0.7)
Raises:

ValueError – On incorrect data type for image or masks.

scannertools.tf_vis_utils.encode_image_array_as_png_str(image)

Encodes a numpy array into a PNG string.

Parameters:image – a numpy array with shape [height, width, 3].
Returns:PNG encoded image string.
scannertools.tf_vis_utils.parse_labelmap(path)

Parse a label map (.pbtxt) to categories

Parameters:path – file path to the .pbtxt file
Returns:
a list of dicts, each of which has the following keys:
’id’: an integer id uniquely identifying this category. ‘name’: string representing category name
e.g., ‘cat’, ‘dog’, ‘pizza’.
Return type:categories
scannertools.tf_vis_utils.save_image_array_as_png(image, output_path)

Saves an image (represented as a numpy array) to PNG.

Parameters:
  • image – a numpy array with shape [height, width, 3].
  • output_path – path to which image should be written.
scannertools.tf_vis_utils.visualize_boxes_and_labels_on_image_array(image, boxes, classes, scores, category_index, instance_masks=None, keypoints=None, use_normalized_coordinates=False, max_boxes_to_draw=20, min_score_thresh=0.5, agnostic_mode=False, line_thickness=4)

Overlay labeled boxes on an image with formatted scores and label names.

This function groups boxes that correspond to the same location and creates a display string for each detection and overlays these on the image. Note that this function modifies the image in place, and returns that same image.

Parameters:
  • image – uint8 numpy array with shape (img_height, img_width, 3)
  • boxes – a numpy array of shape [N, 4]
  • classes – a numpy array of shape [N]. Note that class indices are 1-based, and match the keys in the label map.
  • scores – a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores.
  • category_index – a dict containing category dictionaries (each holding category index id and category name name) keyed by category indices.
  • instance_masks – a numpy array of shape [N, image_height, image_width], can be None
  • keypoints – a numpy array of shape [N, num_keypoints, 2], can be None
  • use_normalized_coordinates – whether boxes is to be interpreted as normalized coordinates or not.
  • max_boxes_to_draw – maximum number of boxes to visualize. If None, draw all boxes.
  • min_score_thresh – minimum score threshold for a box to be visualized
  • agnostic_mode – boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. This mode will display scores but ignore classes.
  • line_thickness – integer (default: 4) controlling line width of the boxes.
Returns:

uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.

scannertools.video module

class scannertools.video.Video(path, scanner_name=None)

Bases: object

Reference to a video file on disk.

Currently only supports mp4.

__init__(path, scanner_name=None)
Parameters:path (str) – Path to video file
audio()

Extract the audio from the video.

Returns:Reference to the audio file.
Return type:Audio
duration()
Returns:Length of the video in seconds.
Return type:int
extract(path=None, ext='.mp4', segment=None)

Extract an mp4 out of the video.

Parameters:
  • path (str, optional) – Path to write the video.
  • ext (str, optional) – Video extension to write
  • segment (Tuple(int, int), optional) – Start/end in seconds
Returns:

Path to the created video

Return type:

str

fps()
Returns:Frames per seconds of the video.
Return type:float
frame(number=None, time=None)

Extract a single frame from the video into memory.

Exactly one of number or time should be specified.

Parameters:
  • number (int, optional) – The index of the frame to access.
  • time (float, optional) – The time in seconds of the frame to access.
Returns:

(h x w x 3) np.uint8 image.

Return type:

np.array

frames(numbers=None, times=None)

Extract multiple frames from the video into memory.

Parameters:
  • numbers (List[int], optional) – The indices of the frames to access.
  • times (List[float], optional) – The times in seconds of the frames to access.
Returns:

List of (h x w x 3) np.uint8 images.

Return type:

List[np.array]

height()
Returns:Height in pixels of the video.
Return type:int
montage(frames, rows=None, cols=None)

Create a tiled montage of frames in the video.

Parameters:
  • frames (List[int]) – List of frame indices.
  • rows (List[int], optional) – Number of rows in the montage.
  • cols (List[int], optional) – Number of columns in the montage.
Returns:

Image of the montage.

Return type:

np.array

num_frames()
Returns:Number of frames in the video.
Return type:int
path()
Returns:Video file path.
Return type:str
scanner_name()
Returns:Name of the video file in the Scanner database.
Return type:str
width()
Returns:Width in pixels of the video.
Return type:int

scannertools.vis module

class scannertools.vis.BboxDraw(config)

Bases: scannerpy.kernel.Kernel

execute(frame: scannerpy.common.FrameType, bboxes: bytes) → scannerpy.common.FrameType
class scannertools.vis.DrawBboxesPipeline(db)

Bases: scannertools.prelude.VideoOutputPipeline

additional_sources = ['bboxes']
build_pipeline()
fetch_resources()
job_suffix = 'draw_bboxes'
class scannertools.vis.DrawFlowsPipeline(db)

Bases: scannertools.prelude.VideoOutputPipeline

additional_sources = ['flows']
build_pipeline()
job_suffix = 'draw_flow'
class scannertools.vis.DrawPosesPipeline(db)

Bases: scannertools.prelude.VideoOutputPipeline

additional_sources = ['poses']
build_pipeline()
job_suffix = 'draw_pose'
scannertools.vis.flow_draw(config, frame: scannerpy.common.FrameType, flow: scannerpy.common.FrameType) → scannerpy.common.FrameType
scannertools.vis.pose_draw(config, frame: scannerpy.common.FrameType, poses: bytes) → scannerpy.common.FrameType

Module contents