from functools import cached_property
import numpy as np
import rospy
import scipy
import tf
import tf2_py
from rospy import Subscriber
from sensor_msgs.msg import CameraInfo
from tf import TransformListener
from tf.transformations import *
from soccer_common.transformation import Transformation
[docs]class Camera:
"""
This is a reusable class that instantiates an instance of a Camera object that listens to the camera related topics
related to a robot and has useful functions that use geometry to determine the 3d/2d projection and location of things
"""
HORIZONTAL_FOV = 1.39626
[docs] def __init__(self, robot_name: str):
"""
Initializes the camera object
:param robot_name: Name of the robot, to be used in subscribers
"""
self.robot_name = robot_name #: Name of the robot
self.pose = Transformation() #: Pose of the camera
self.pose_base_link_straight = Transformation() #: Pose of the camera
self.camera_info = None #: Camera info object recieved from the subscriber
self.horizontalFOV = Camera.HORIZONTAL_FOV
self.focal_length = 3.67 #: Focal length of the camera (meters) distance to the camera plane as projected in 3D
self.camera_info_subscriber = Subscriber("/" + robot_name + "/camera/camera_info", CameraInfo, self.cameraInfoCallback)
self.tf_listener = TransformListener()
self.init_time = rospy.Time.now()
[docs] def ready(self) -> bool:
"""
Function to determine when the camera object has recieved the necessary information and is ready to be used
:return: True if the camera is ready, else False
"""
return self.pose is not None and self.resolution_x is not None and self.resolution_y is not None and self.camera_info is not None
[docs] def reset_position(self, from_world_frame=False, timestamp=rospy.Time(0), camera_frame="/camera", skip_if_not_found=False):
"""
Resets the position of the camera, it uses a series of methods that fall back on each other to get the location of the camera
:param from_world_frame: If this is set to true, the camera position transformation will be from the world instead of the robot odom frame
:param timestamp: What time do we want the camera tf frame, rospy.Time(0) if get the latest transform
:param camera_frame: The name of the camera frame
:param skip_if_not_found: If set to true, then will not wait if it cannot find the camera transform after the specified duration (1 second), it will just return
"""
if from_world_frame:
try:
self.tf_listener.waitForTransform("world", self.robot_name + camera_frame, timestamp, rospy.Duration(nsecs=1000000))
(trans, rot) = self.tf_listener.lookupTransform("world", self.robot_name + camera_frame, timestamp)
self.pose = Transformation(trans, rot)
return
except (
tf2_py.LookupException,
tf.LookupException,
tf.ConnectivityException,
tf.ExtrapolationException,
tf2_py.TransformException,
) as ex:
rospy.logerr_throttle(5, f"Unable to find transformation from world to {self.robot_name + camera_frame}")
pass
else:
try:
# Find the odom to base_footprint and publish straight base footprint
self.tf_listener.waitForTransform(self.robot_name + "/odom", self.robot_name + "/base_footprint", timestamp, rospy.Duration(secs=1))
(trans, rot) = self.tf_listener.lookupTransform(self.robot_name + "/odom", self.robot_name + "/base_footprint", timestamp)
world_to_base_link = Transformation(trans, rot)
e = world_to_base_link.orientation_euler
e[1] = 0
e[2] = 0
world_to_base_link.orientation_euler = e
self.pose_base_link_straight = world_to_base_link
# Calculate the camera transformation
self.tf_listener.waitForTransform(self.robot_name + "/odom", self.robot_name + camera_frame, timestamp, rospy.Duration(secs=1))
(trans, rot) = self.tf_listener.lookupTransform(self.robot_name + "/odom", self.robot_name + camera_frame, timestamp)
world_to_camera = Transformation(trans, rot)
camera_to_base_link = scipy.linalg.inv(world_to_base_link) @ world_to_camera
self.pose = camera_to_base_link
return
except (
tf2_py.LookupException,
tf.LookupException,
tf.ConnectivityException,
tf.ExtrapolationException,
tf2_py.TransformException,
) as ex:
rospy.logerr_throttle(5, f"Unable to find transformation from world to {self.robot_name + camera_frame}")
pass
[docs] def cameraInfoCallback(self, camera_info: CameraInfo):
"""
Callback function for the camera info subscriber
:param camera_info: from the camera info topic
"""
self.camera_info = camera_info
@cached_property
def resolution_x(self) -> int:
"""
The X resolution of the camera or the width of the screen in pixels
:return: width in pixels
"""
return self.camera_info.width
@cached_property
def resolution_y(self):
"""
The Y resolution of the camera or the height of the screen in pixels
:return: height in pixels
"""
return self.camera_info.height
[docs] def findFloorCoordinate(self, pos: [int]) -> [int]:
"""
From a camera pixel, get a coordinate on the floor
:param pos: The position on the screen in pixels (x, y)
:return: The 3D coordinate of the pixel as projected to the floor
"""
tx, ty = self.imageToWorldFrame(pos[0], pos[1])
pixel_pose = Transformation(position=(self.focal_length, tx, ty))
camera_pose = self.pose
pixel_world_pose = camera_pose @ pixel_pose
ratio = (camera_pose.position[2] - pixel_world_pose.position[2]) / self.pose.position[2] # TODO Fix divide by 0 problem
x_delta = (pixel_world_pose.position[0] - camera_pose.position[0]) / ratio
y_delta = (pixel_world_pose.position[1] - camera_pose.position[1]) / ratio
return [x_delta + camera_pose.position[0], y_delta + camera_pose.position[1], 0]
[docs] def findCameraCoordinate(self, pos: [int]) -> [int]:
"""
From a 3d position on the field, get the camera coordinate, opposite of :func:`~soccer_common.Camera.findFloorCoordinate`
:param pos: The 3D coordinate of the object
:return: The 2D pixel (x, y) on the camera, if the object was projected on the camera
"""
pos3d = Transformation(pos)
camera_pose = self.pose
pos3d_tr = np.linalg.inv(camera_pose) @ pos3d
return self.findCameraCoordinateFixedCamera(pos3d_tr.position)
[docs] def findCameraCoordinateFixedCamera(self, pos: [int]) -> [int]:
"""
Helper function for :func:`~soccer_common.Camera.findCameraCoordinate`, finds the camera coordinate if the camera were fixed at the origin
:param pos: The 3D coordinate of the object
:return: The 2D pixel (x, y) on the camera, if the object was projected on the camera and the camera is placed at the origin
"""
pos = Transformation(pos)
ratio = self.focal_length / pos.position[0]
tx = pos.position[1] * ratio
ty = pos.position[2] * ratio
x, y = self.worldToImageFrame(tx, ty)
return [x, y]
@cached_property
def verticalFOV(self):
"""
The vertical field of vision of the camera.
See `Field of View <https://en.wikipedia.org/wiki/Field_of_view>`_
"""
return 2 * math.atan(math.tan(self.horizontalFOV * 0.5) * (self.resolution_y / self.resolution_x))
@cached_property
def imageSensorHeight(self):
"""
The height of the image sensor (m)
"""
return math.tan(self.verticalFOV / 2.0) * 2.0 * self.focal_length
@cached_property
def imageSensorWidth(self):
"""
The width of the image sensor (m)
"""
return math.tan(self.horizontalFOV / 2.0) * 2.0 * self.focal_length
@cached_property
def pixelHeight(self):
"""
The height of a pixel in real 3d measurements (m)
"""
return self.imageSensorHeight / self.resolution_y
@cached_property
def pixelWidth(self):
"""
The wdith of a pixel in real 3d measurements (m)
"""
return self.imageSensorWidth / self.resolution_x
pass
[docs] def imageToWorldFrame(self, pixel_x: int, pixel_y: int) -> tuple:
"""
From image pixel coordinates, get the coordinates of the pixel as if they have been projected ot the camera plane, which is
positioned at (0,0) in 3D world coordinates
https://docs.google.com/presentation/d/10DKYteySkw8dYXDMqL2Klby-Kq4FlJRnc4XUZyJcKsw/edit#slide=id.g163680c589a_0_0
:param pixel_x: x pixel of the camera
:param pixel_y: y pixel of the camera
:return: 3D position (X, Y) of the pixel in meters
"""
return (
(self.resolution_x / 2.0 - (pixel_x + 0.5)) * self.pixelWidth,
(self.resolution_y / 2.0 - (pixel_y + 0.5)) * self.pixelHeight,
)
[docs] def worldToImageFrame(self, pos_x: float, pos_y: float) -> tuple:
"""
Reverse function for :func:`~soccer_common.Camera.imageToWorldFrame`, takes the 3D world coordinates of the camera plane
and returns pixels
:param pos_x: X position of the pixel on the world plane in meters
:param pos_y: Y position of the pixel on the world plane in meters
:return: Tuple (x, y) of the pixel coordinates of in the image
"""
return (
(self.resolution_x / 2.0 + pos_x / self.pixelWidth) - 0.5,
(self.resolution_y / 2.0 + pos_y / self.pixelHeight) - 0.5,
)
[docs] def calculateBoundingBoxesFromBall(self, ball_position: Transformation, ball_radius: float = 0.07):
"""
Takes a 3D ball transformation and returns the bounding boxes of the ball if seen on camera
:param ball_position: 3D coordinates of the ball stored in the :class:`Transformation` format
:param ball_radius: The radious of the ball in centimeters
:return: The bounding boxes of the ball on the camera in the format [[x1,y1], [x1,y1]] which are the top left
and bottom right of the bounding box respectively
"""
camera_pose = self.pose
pos3d_tr = np.linalg.inv(camera_pose) @ ball_position
x = pos3d_tr.position[0]
y = -pos3d_tr.position[1]
z = -pos3d_tr.position[2]
r = ball_radius
thetay = math.atan2(y, x)
dy = math.sqrt(x**2 + y**2)
phiy = math.asin(r / dy)
xyfar = [x - math.sin(thetay + phiy) * r, y + math.cos(thetay + phiy) * r]
xynear = [x + math.sin(thetay - phiy) * r, y - math.cos(thetay - phiy) * r]
thetaz = math.atan2(z, x)
dz = math.sqrt(x**2 + z**2)
phiz = math.asin(r / dz)
xzfar = [x - math.sin(thetaz + phiz) * r, z + math.cos(thetaz + phiz) * r]
xznear = [x + math.sin(thetaz - phiz) * r, z - math.cos(thetaz - phiz) * r]
ball_right_point = [xyfar[0], xyfar[1], z]
ball_left_point = [xynear[0], xynear[1], z]
ball_bottom_point = [xzfar[0], y, xzfar[1]]
ball_top_point = [xznear[0], y, xznear[1]]
ball_left_point_cam = self.findCameraCoordinateFixedCamera(ball_left_point)
ball_right_point_cam = self.findCameraCoordinateFixedCamera(ball_right_point)
ball_top_point_cam = self.findCameraCoordinateFixedCamera(ball_top_point)
ball_bottom_point_cam = self.findCameraCoordinateFixedCamera(ball_bottom_point)
left_border_x = ball_left_point_cam[0]
right_border_x = ball_right_point_cam[0]
top_border_y = ball_top_point_cam[1]
bottom_border_y = ball_bottom_point_cam[1]
bounding_box = [[left_border_x, top_border_y], [right_border_x, bottom_border_y]]
return bounding_box
[docs] def calculateBallFromBoundingBoxes(self, ball_radius: float = 0.07, bounding_boxes: [float] = []) -> Transformation:
"""
Reverse function for :func:`~soccer_common.Camera.calculateBoundingBoxesFromBall`, takes the bounding boxes
of the ball as seen on the camera and return the 3D position of the ball assuming that the ball is on the ground
:param ball_radius: The radius of the ball in meters
:param bounding_boxes: The bounding boxes of the ball on the camera in the format [[x1,y1], [x1,y1]] which are the top left and bottom right of the bounding box respectively
:return: 3D coordinates of the ball stored in the :class:`Transformation` format
"""
# bounding boxes [(y1, z1), (y2, z2)]
r = ball_radius
y1 = bounding_boxes[0][0]
z1 = bounding_boxes[0][1]
y2 = bounding_boxes[1][0]
z2 = bounding_boxes[1][1]
# Assuming the ball is a sphere, the bounding box must be a square, averaging the borders
ym = (y1 + y2) / 2
zm = (z1 + z2) / 2
length = z2 - z1
width = y2 - y1
y1 = ym - (width / 2)
z1 = zm - (length / 2)
y2 = ym + (width / 2)
z2 = zm + (length / 2)
y1w, z1w = self.imageToWorldFrame(y1, z1)
y2w, z2w = self.imageToWorldFrame(y2, z2)
y1w = -y1w
z1w = -z1w
y2w = -y2w
z2w = -z2w
f = self.focal_length
thetay1 = math.atan2(y1w, f)
thetay2 = math.atan2(y2w, f)
thetayy = (thetay2 - thetay1) / 2
thetay = thetay1 + thetayy
dy = r / math.sin(thetayy)
xy = (math.cos(thetay) * dy, math.sin(thetay) * dy)
thetaz1 = math.atan2(z1w, f)
thetaz2 = math.atan2(z2w, f)
thetazz = (thetaz2 - thetaz1) / 2
thetaz = thetaz1 + thetazz
dz = r / math.sin(thetazz)
xz = (math.cos(thetaz) * dz, math.sin(thetaz) * dz)
ball_x = xy[0]
ball_y = xy[1]
ball_z = xz[1]
tr = Transformation([ball_x, -ball_y, -ball_z])
tr_cam = self.pose @ tr
return tr_cam
[docs] def calculateHorizonCoverArea(self) -> int:
"""
Given the camera's position, return the area that is covered by the horizon (that is not the field area) in pixels from the top position
:return: Pixel length from the top of the image to the point where it meets the horizon
"""
pitch = self.pose.orientation_euler[1]
d = math.sin(pitch) * self.focal_length
(r, h) = self.worldToImageFrame(0, -d)
return int(min(max(0, h), self.resolution_y))