other samples
Contents
other samples#
Sample 1#
import math
def reward_function(params):
########## [reward_wheels] all_wheels_on_track ##########
all_wheels_on_track = params['all_wheels_on_track']
if all_wheels_on_track :
reward_wheels = 1
else:
reward_wheels = 0
############## [reward_speed] speed #############
speed = params['speed']
reward_speed = speed / 4
############## [reward_direction] direction_diff #############
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
# Get the current agent direction in radians
agent_direction = math.radians(heading)
# Get the coordinates of the closest waypoints
next_point = waypoints[closest_waypoints[1]]
prev_point = waypoints[closest_waypoints[0]]
# Calculate the direction of the track
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
track_direction = math.degrees(track_direction)
# Calculate the difference between the track direction and the heading direction of the agent
direction_diff = abs(track_direction - heading)
if direction_diff > 180:
direction_diff = 360 - direction_diff
# Convert the direction difference to radians
direction_diff = math.radians(direction_diff)
# Calculate the reward for direction alignment using a cosine function
reward_direction = math.cos(direction_diff)
########### [reward weight sum] ############
reward = 1*reward_wheels + 1*reward_speed + 1*reward_direction
return float(reward)
Sample 2#
import math
def reward_function(params):
all_wheels_on_track = params['all_wheels_on_track']
distance_from_center = params['distance_from_center']
track_width = params['track_width']
marker = 0.25 * track_width
if all_wheels_on_track :
if distance_from_center <= marker:
reward_1 = 1
else :
reward_1 = 0.5
else :
reward_1 = 0.1
############## [reward_direction] direction_diff #############
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
x = params['x']
y = params['y']
waypoints_count = len(waypoints) - 1
repeat_count = 7
reward_2 = 0
# Get the coordinates of the closest waypoints
for i in range(repeat_count) :
next_point = waypoints[(closest_waypoints[1]+i) % waypoints_count]
# Calculate the direction of the target waypoint
track_direction = math.atan2(next_point[1] - y, next_point[0] - x)
track_direction = math.degrees(track_direction)
# Calculate the difference between the track direction and the heading direction of the car
direction_diff = abs(track_direction - heading)
if direction_diff > 180:
direction_diff = 360 - direction_diff
# Determine the reward based on the direction difference
direction_diff_rad = math.radians(direction_diff)
reward_direction = math.cos(direction_diff_rad)
reward_2 += reward_direction
reward_2 /= repeat_count
########### [reward weight sum] ############
reward = reward_1 + reward_2
return float(reward)