In the context of learning rate (β€œLR”) scheduling according to an exponential learning rate schedule, i.e. decaying the learning rate exponentially over iterations we have the following closed form expression for the LR at time step (iteration) , denoted given the base LR (i.e. LR at time step zero) :

(Reminder: We’re zero-based indexing here, so we start at .)

Rearranging, we get the expression for the scaling factor, , given a target learning rate which we would like to reach after time steps, which for us will be (or 1e-5):



#!/usr/bin/env python
 
from math import exp, log
 
 
def compute_exp_lr_decay_gamma(lr_base: float, lr_target: float, timesteps: int) -> float:
    return exp(log(lr_target / lr_base) / timesteps)
 
 
def get_gamma_ratio(Ts: tuple[int, int], lr_base: float, lr_target: float) -> float:
    s = log(lr_target / lr_base)
    return exp(s * (1 / Ts[0] - 1 / Ts[1]))
 
 
# Example usage: compute_exp_lr_decay_gamma
 
LR_BASE = 1e-3
LR_TARGET = 1e-5
TIMESTEP_RANGE = [10, 100, 500, 1000, 2500, 5000, 10_000, 100_000]
 
for timesteps in TIMESTEP_RANGE:
    gamma = compute_exp_lr_decay_gamma(LR_BASE, LR_TARGET, timesteps)
    print(f"T: {timesteps} -> gamma: {gamma}")
 
print("-" * 80)
 
# Example usage: get_gamma_ratio
 
LR_BASE = 1e-3
LR_TARGET = 1e-5
T1 = 10
T2 = 50
 
gamma1 = compute_exp_lr_decay_gamma(LR_BASE, LR_TARGET, T1)
gamma2 = compute_exp_lr_decay_gamma(LR_BASE, LR_TARGET, T2)
print(f"T: {T1} -> gamma: {gamma1}")
print(f"T: {T2} -> gamma: {gamma2}")
print(f"T ratio: {get_gamma_ratio((T1, T2), LR_BASE, LR_TARGET)}")
print(f"T ratio: {gamma1 / gamma2}")
print(f"T ratio: {get_gamma_ratio((T2, T1), LR_BASE, LR_TARGET)}")
print(f"T ratio: {gamma2 / gamma1}")

Expression rearrangement steps