Source code for soccer_common.camera

from functools import cached_property

import numpy as np
import rospy
import scipy
import tf
import tf2_py
from rospy import Subscriber
from sensor_msgs.msg import CameraInfo
from tf import TransformListener
from tf.transformations import *

from soccer_common.transformation import Transformation


[docs]class Camera:
    """
    This is a reusable class that instantiates an instance of a Camera object that listens to the camera related topics
    related to a robot and has useful functions that use geometry to determine the 3d/2d projection and location of things

    """

    HORIZONTAL_FOV = 1.39626

[docs]    def __init__(self, robot_name: str):
        """
        Initializes the camera object

        :param robot_name: Name of the robot, to be used in subscribers
        """

        self.robot_name = robot_name  #: Name of the robot
        self.pose = Transformation()  #: Pose of the camera
        self.pose_base_link_straight = Transformation()  #: Pose of the camera
        self.camera_info = None  #: Camera info object recieved from the subscriber
        self.horizontalFOV = Camera.HORIZONTAL_FOV
        self.focal_length = 3.67  #: Focal length of the camera (meters) distance to the camera plane as projected in 3D

        self.camera_info_subscriber = Subscriber("/" + robot_name + "/camera/camera_info", CameraInfo, self.cameraInfoCallback)

        self.tf_listener = TransformListener()

        self.init_time = rospy.Time.now()

[docs]    def ready(self) -> bool:
        """
        Function to determine when the camera object has recieved the necessary information and is ready to be used

        :return: True if the camera is ready, else False
        """
        return self.pose is not None and self.resolution_x is not None and self.resolution_y is not None and self.camera_info is not None

[docs]    def reset_position(self, from_world_frame=False, timestamp=rospy.Time(0), camera_frame="/camera", skip_if_not_found=False):
        """
        Resets the position of the camera, it uses a series of methods that fall back on each other to get the location of the camera

        :param from_world_frame: If this is set to true, the camera position transformation will be from the world instead of the robot odom frame
        :param timestamp: What time do we want the camera tf frame, rospy.Time(0) if get the latest transform
        :param camera_frame: The name of the camera frame
        :param skip_if_not_found: If set to true, then will not wait if it cannot find the camera transform after the specified duration (1 second), it will just return
        """
        if from_world_frame:
            try:
                self.tf_listener.waitForTransform("world", self.robot_name + camera_frame, timestamp, rospy.Duration(nsecs=1000000))
                (trans, rot) = self.tf_listener.lookupTransform("world", self.robot_name + camera_frame, timestamp)
                self.pose = Transformation(trans, rot)
                return
            except (
                tf2_py.LookupException,
                tf.LookupException,
                tf.ConnectivityException,
                tf.ExtrapolationException,
                tf2_py.TransformException,
            ) as ex:
                rospy.logerr_throttle(5, f"Unable to find transformation from world to {self.robot_name + camera_frame}")
                pass
        else:

            try:
                # Find the odom to base_footprint and publish straight base footprint
                self.tf_listener.waitForTransform(self.robot_name + "/odom", self.robot_name + "/base_footprint", timestamp, rospy.Duration(secs=1))
                (trans, rot) = self.tf_listener.lookupTransform(self.robot_name + "/odom", self.robot_name + "/base_footprint", timestamp)
                world_to_base_link = Transformation(trans, rot)
                e = world_to_base_link.orientation_euler
                e[1] = 0
                e[2] = 0
                world_to_base_link.orientation_euler = e
                self.pose_base_link_straight = world_to_base_link

                # Calculate the camera transformation
                self.tf_listener.waitForTransform(self.robot_name + "/odom", self.robot_name + camera_frame, timestamp, rospy.Duration(secs=1))
                (trans, rot) = self.tf_listener.lookupTransform(self.robot_name + "/odom", self.robot_name + camera_frame, timestamp)
                world_to_camera = Transformation(trans, rot)

                camera_to_base_link = scipy.linalg.inv(world_to_base_link) @ world_to_camera

                self.pose = camera_to_base_link
                return
            except (
                tf2_py.LookupException,
                tf.LookupException,
                tf.ConnectivityException,
                tf.ExtrapolationException,
                tf2_py.TransformException,
            ) as ex:
                rospy.logerr_throttle(5, f"Unable to find transformation from world to {self.robot_name + camera_frame}")
                pass

[docs]    def cameraInfoCallback(self, camera_info: CameraInfo):
        """
        Callback function for the camera info subscriber

        :param camera_info: from the camera info topic
        """
        self.camera_info = camera_info

    @cached_property
    def resolution_x(self) -> int:
        """
        The X resolution of the camera or the width of the screen in pixels

        :return: width in pixels
        """
        return self.camera_info.width

    @cached_property
    def resolution_y(self):
        """
        The Y resolution of the camera or the height of the screen in pixels

        :return: height in pixels
        """
        return self.camera_info.height

[docs]    def findFloorCoordinate(self, pos: [int]) -> [int]:
        """
        From a camera pixel, get a coordinate on the floor

        :param pos: The position on the screen in pixels (x, y)
        :return: The 3D coordinate of the pixel as projected to the floor
        """
        tx, ty = self.imageToWorldFrame(pos[0], pos[1])
        pixel_pose = Transformation(position=(self.focal_length, tx, ty))
        camera_pose = self.pose
        pixel_world_pose = camera_pose @ pixel_pose
        ratio = (camera_pose.position[2] - pixel_world_pose.position[2]) / self.pose.position[2]  # TODO Fix divide by 0 problem
        x_delta = (pixel_world_pose.position[0] - camera_pose.position[0]) / ratio
        y_delta = (pixel_world_pose.position[1] - camera_pose.position[1]) / ratio

        return [x_delta + camera_pose.position[0], y_delta + camera_pose.position[1], 0]

[docs]    def findCameraCoordinate(self, pos: [int]) -> [int]:
        """
        From a 3d position on the field, get the camera coordinate, opposite of :func:`~soccer_common.Camera.findFloorCoordinate`

        :param pos: The 3D coordinate of the object
        :return: The 2D pixel (x, y) on the camera, if the object was projected on the camera
        """
        pos3d = Transformation(pos)
        camera_pose = self.pose
        pos3d_tr = np.linalg.inv(camera_pose) @ pos3d

        return self.findCameraCoordinateFixedCamera(pos3d_tr.position)

[docs]    def findCameraCoordinateFixedCamera(self, pos: [int]) -> [int]:
        """
        Helper function for :func:`~soccer_common.Camera.findCameraCoordinate`, finds the camera coordinate if the camera were fixed at the origin

        :param pos: The 3D coordinate of the object
        :return: The 2D pixel (x, y) on the camera, if the object was projected on the camera and the camera is placed at the origin
        """

        pos = Transformation(pos)

        ratio = self.focal_length / pos.position[0]

        tx = pos.position[1] * ratio
        ty = pos.position[2] * ratio
        x, y = self.worldToImageFrame(tx, ty)
        return [x, y]

    @cached_property
    def verticalFOV(self):
        """
        The vertical field of vision of the camera.
        See `Field of View <https://en.wikipedia.org/wiki/Field_of_view>`_
        """
        return 2 * math.atan(math.tan(self.horizontalFOV * 0.5) * (self.resolution_y / self.resolution_x))

    @cached_property
    def imageSensorHeight(self):
        """
        The height of the image sensor (m)
        """
        return math.tan(self.verticalFOV / 2.0) * 2.0 * self.focal_length

    @cached_property
    def imageSensorWidth(self):
        """
        The width of the image sensor (m)
        """
        return math.tan(self.horizontalFOV / 2.0) * 2.0 * self.focal_length

    @cached_property
    def pixelHeight(self):
        """
        The height of a pixel in real 3d measurements (m)
        """
        return self.imageSensorHeight / self.resolution_y

    @cached_property
    def pixelWidth(self):
        """
        The wdith of a pixel in real 3d measurements (m)
        """
        return self.imageSensorWidth / self.resolution_x
        pass

[docs]    def imageToWorldFrame(self, pixel_x: int, pixel_y: int) -> tuple:
        """
        From image pixel coordinates, get the coordinates of the pixel as if they have been projected ot the camera plane, which is
        positioned at (0,0) in 3D world coordinates
        https://docs.google.com/presentation/d/10DKYteySkw8dYXDMqL2Klby-Kq4FlJRnc4XUZyJcKsw/edit#slide=id.g163680c589a_0_0

        :param pixel_x: x pixel of the camera
        :param pixel_y: y pixel of the camera
        :return: 3D position (X, Y) of the pixel in meters
        """
        return (
            (self.resolution_x / 2.0 - (pixel_x + 0.5)) * self.pixelWidth,
            (self.resolution_y / 2.0 - (pixel_y + 0.5)) * self.pixelHeight,
        )

[docs]    def worldToImageFrame(self, pos_x: float, pos_y: float) -> tuple:
        """
        Reverse function for  :func:`~soccer_common.Camera.imageToWorldFrame`, takes the 3D world coordinates of the camera plane
        and returns pixels

        :param pos_x: X position of the pixel on the world plane in meters
        :param pos_y: Y position of the pixel on the world plane in meters
        :return: Tuple (x, y) of the pixel coordinates of in the image
        """
        return (
            (self.resolution_x / 2.0 + pos_x / self.pixelWidth) - 0.5,
            (self.resolution_y / 2.0 + pos_y / self.pixelHeight) - 0.5,
        )

[docs]    def calculateBoundingBoxesFromBall(self, ball_position: Transformation, ball_radius: float = 0.07):
        """
        Takes a 3D ball transformation and returns the bounding boxes of the ball if seen on camera

        :param ball_position: 3D coordinates of the ball stored in the :class:`Transformation` format
        :param ball_radius: The radious of the ball in centimeters
        :return: The bounding boxes of the ball on the camera in the format [[x1,y1], [x1,y1]] which are the top left
        and bottom right of the bounding box respectively
        """

        camera_pose = self.pose
        pos3d_tr = np.linalg.inv(camera_pose) @ ball_position

        x = pos3d_tr.position[0]
        y = -pos3d_tr.position[1]
        z = -pos3d_tr.position[2]
        r = ball_radius

        thetay = math.atan2(y, x)
        dy = math.sqrt(x**2 + y**2)
        phiy = math.asin(r / dy)

        xyfar = [x - math.sin(thetay + phiy) * r, y + math.cos(thetay + phiy) * r]
        xynear = [x + math.sin(thetay - phiy) * r, y - math.cos(thetay - phiy) * r]

        thetaz = math.atan2(z, x)
        dz = math.sqrt(x**2 + z**2)
        phiz = math.asin(r / dz)

        xzfar = [x - math.sin(thetaz + phiz) * r, z + math.cos(thetaz + phiz) * r]
        xznear = [x + math.sin(thetaz - phiz) * r, z - math.cos(thetaz - phiz) * r]

        ball_right_point = [xyfar[0], xyfar[1], z]
        ball_left_point = [xynear[0], xynear[1], z]
        ball_bottom_point = [xzfar[0], y, xzfar[1]]
        ball_top_point = [xznear[0], y, xznear[1]]

        ball_left_point_cam = self.findCameraCoordinateFixedCamera(ball_left_point)
        ball_right_point_cam = self.findCameraCoordinateFixedCamera(ball_right_point)
        ball_top_point_cam = self.findCameraCoordinateFixedCamera(ball_top_point)
        ball_bottom_point_cam = self.findCameraCoordinateFixedCamera(ball_bottom_point)

        left_border_x = ball_left_point_cam[0]
        right_border_x = ball_right_point_cam[0]
        top_border_y = ball_top_point_cam[1]
        bottom_border_y = ball_bottom_point_cam[1]

        bounding_box = [[left_border_x, top_border_y], [right_border_x, bottom_border_y]]

        return bounding_box

[docs]    def calculateBallFromBoundingBoxes(self, ball_radius: float = 0.07, bounding_boxes: [float] = []) -> Transformation:
        """
        Reverse function for  :func:`~soccer_common.Camera.calculateBoundingBoxesFromBall`, takes the bounding boxes
        of the ball as seen on the camera and return the 3D position of the ball assuming that the ball is on the ground

        :param ball_radius: The radius of the ball in meters
        :param bounding_boxes: The bounding boxes of the ball on the camera in the format [[x1,y1], [x1,y1]] which are the top left and bottom right of the bounding box respectively
        :return: 3D coordinates of the ball stored in the :class:`Transformation` format
        """

        # bounding boxes [(y1, z1), (y2, z2)]
        r = ball_radius

        y1 = bounding_boxes[0][0]
        z1 = bounding_boxes[0][1]
        y2 = bounding_boxes[1][0]
        z2 = bounding_boxes[1][1]

        # Assuming the ball is a sphere, the bounding box must be a square, averaging the borders
        ym = (y1 + y2) / 2
        zm = (z1 + z2) / 2
        length = z2 - z1
        width = y2 - y1
        y1 = ym - (width / 2)
        z1 = zm - (length / 2)
        y2 = ym + (width / 2)
        z2 = zm + (length / 2)

        y1w, z1w = self.imageToWorldFrame(y1, z1)
        y2w, z2w = self.imageToWorldFrame(y2, z2)
        y1w = -y1w
        z1w = -z1w
        y2w = -y2w
        z2w = -z2w

        f = self.focal_length

        thetay1 = math.atan2(y1w, f)
        thetay2 = math.atan2(y2w, f)

        thetayy = (thetay2 - thetay1) / 2
        thetay = thetay1 + thetayy

        dy = r / math.sin(thetayy)

        xy = (math.cos(thetay) * dy, math.sin(thetay) * dy)

        thetaz1 = math.atan2(z1w, f)
        thetaz2 = math.atan2(z2w, f)

        thetazz = (thetaz2 - thetaz1) / 2
        thetaz = thetaz1 + thetazz

        dz = r / math.sin(thetazz)

        xz = (math.cos(thetaz) * dz, math.sin(thetaz) * dz)

        ball_x = xy[0]
        ball_y = xy[1]
        ball_z = xz[1]

        tr = Transformation([ball_x, -ball_y, -ball_z])
        tr_cam = self.pose @ tr

        return tr_cam

[docs]    def calculateHorizonCoverArea(self) -> int:
        """
        Given the camera's position, return the area that is covered by the horizon (that is not the field area) in pixels from the top position
        :return: Pixel length from the top of the image to the point where it meets the horizon
        """

        pitch = self.pose.orientation_euler[1]
        d = math.sin(pitch) * self.focal_length

        (r, h) = self.worldToImageFrame(0, -d)
        return int(min(max(0, h), self.resolution_y))