ams-slave-22/Core/Src/ClockSync.c
2022-08-03 00:52:28 +02:00

329 lines
12 KiB
C

#include "ClockSync.h"
/**
* @file ClockSync.c
* @author Jasper v. Blanckenburg (j.blanckenburg@fasttube.de)
* @brief Clock synchronization mechanism -- slave side
* @version 0.1
* @date 2022-08-02
*
* @copyright Copyright (c) 2022
*
* OVERVIEW
* =========
* The slaves use the STM's internal clock (HSI), which is -- especially at
* higher temperatures -- quite inaccurate (±4% over the STM's working
* temperature range, according to the datasheet).
*
* Since the CAN bitrate is directly determined from the HSI (through prescaling
* & time quanta), an inaccurate HSI means an inaccurate CAN bitrate. Especially
* once the battery heats up, this leads to packet loss and ultimately the CAN
* transceiver entering Bus-Off due to too many transmission errors.
*
* The easy fix would be to use an external clock (HSE), i.e. a quartz crystal.
* Although a crystal is present on the slaves, it does not work on every one
* and is mounted on the (inaccessible) underside. Thus, we need to make do with
* the HSI.
*
* Fortunately, the HSI frequency can be trimmed through the HSITRIM bits in the
* RCC_CR register (see STM32F412 reference manual, section 6.2.2; as well as
* STM AN5067).
*
* The HSITRIM register provides the mechanism for manipulating the HSI
* frequency, however we still need to determine what value to manipulate it to.
* Since we don't really care about the absolute accuracy of the slaves' clocks,
* but rather their relative accuracy to the other nodes on the CAN bus
* (especially the master), we can synchronize the clocks to one another via
* timed CAN frames.
*
* TIMED CAN FRAMES
* ================
* As the master is the least affected by the battery heating up, and also had
* the most accurate clock during testing, we use it to generate the timed
* frames.
*
* It sends frames from timer interrupts and with high priority (low ID) to
* ensure minimal deviation from their intended frequency. It sends two separate
* kinds of frames: CLOCK_SYNC and MASTER_HEARTBEAT.
*
* The MASTER_HEARTBEAT frames are sent every 100 ms. Their purpose is simply to
* reliably have messages on the bus, so that the slaves can tell whether they
* are roughly in sync with the master by checking for their reception (see the
* FREQUENCY HOPPING section).
*
* The CLOCK_SYNC frames are sent every 1000 ms. They serve as the external
* clock source. The slaves continually trim their HSI according to the time
* they measure between to CLOCK_SYNC frames (see the NORMAL OPERATION section).
*
* FREQUENCY HOPPING
* =================
* If the HSI is very out of sync with the master's clock (e.g. because the AMS
* was restarted with a warm battery), the slaves don't receive any CAN packets
* from the master and thus can't rely on the CLOCK_SYNC frames for
* synchronization. In this case, they rely on what is essentially frequency
* hopping.
*
* The frequency hopping mechanism has two stages: One to get in the right
* ballpark, and one to make the communication reliable enough for normal
* operation.
*
* STAGE 1
* -------
* Stage 1 trims the HSI until at least one MASTER_HEARTBEAT frame has been
* received. The frequency alternates between lower and higher values, i.e. if
* the trim was initially 16, it will go through the following values:
*
* 16 -> 14 -> 18 -> 12 -> 20 -> 10 -> 22 -> ...
*
* Once a MASTER_HEARTBEAT frame has been received, the slave transitions to
* stage 2.
*
* STAGE 2
* -------
* Stage 2 trims the HSI further until at least three consecutive
* MASTER_HEARTBEAT frames have been received. The frequency alternates in the
* same fashion as in stage 1, but now around the frequency where a
* MASTER_HEARTBEAT frame was received in stage 1, and more slowly.
*
* Once three consecutive MASTER_HEARTBEAT frames have been received, the slave
* transitions to normal operation.
*
* NORMAL OPERATION
* ================
* During normal operation, the HSI is continually trimmed so that CLOCK_SYNC
* frames are received every 1000 ms. Since the slave measures time in
* milliseconds (via the HAL_GetTick() function), this allows a measurement
* accuracy of 0.1%. Each increment of HSITRIM should account for an approximate
* 0.3% increase in the clock frequency, according to AN5067, so the 0.1%
* accuracy is more than enough for accurate trimming.
*
* By counting the ticks between two CLOCK_SYNC frames, the slave determines its
* actual HSI frequency (assuming the master clock is accurate):
*
* f_real = 16 MHz * measured_ticks / 1000
*
* If the real frequency differs from the target frequency (16 MHz) by more than
* the trim delta, the trim is incremented or decremented accordingly.
*
* The trim delta is determined dynamically: It is the difference between the
* real frequency before and after each trim.
*
* If the slave misses two consecutive CLOCK_SYNC frames for whatever reason, it
* returns to (stage 1) frequency hopping.
*/
#include "AMS_CAN.h"
#include "stm32f412rx.h"
#include "stm32f4xx_hal.h"
#include "stm32f4xx_hal_can.h"
#include <stdint.h>
ClockSyncState clock_sync_state = CLOCK_SYNC_FREQ_HOPPING_STAGE1;
static uint32_t last_clock_sync_frame_time = 0;
static uint32_t last_master_heartbeat_time = 0;
static uint32_t master_heartbeat_counter = 0;
static uint32_t freq_hopping_start_trim = 0;
static uint32_t freq_hopping_iteration = 0;
static uint32_t freq_hopping_stage2_start_time = 0;
static uint32_t freq_hopping_stage2_start_counter = 0;
static uint32_t freq_hopping_stage2_attempts = 0;
void clock_sync_update() {
ClockSyncState next_state;
switch (clock_sync_state) {
case CLOCK_SYNC_NORMAL_OPERATION:
next_state = clock_sync_update_normal_operation();
break;
case CLOCK_SYNC_FREQ_HOPPING_STAGE1:
next_state = clock_sync_update_freq_hopping_stage1();
break;
case CLOCK_SYNC_FREQ_HOPPING_STAGE2:
next_state = clock_sync_update_freq_hopping_stage2();
break;
default:
// Shouldn't ever happen?
next_state = CLOCK_SYNC_FREQ_HOPPING_STAGE1;
}
if (next_state != clock_sync_state) {
switch (next_state) {
case CLOCK_SYNC_NORMAL_OPERATION:
clock_sync_start_normal_operation();
break;
case CLOCK_SYNC_FREQ_HOPPING_STAGE1:
clock_sync_start_freq_hopping_stage1();
break;
case CLOCK_SYNC_FREQ_HOPPING_STAGE2:
clock_sync_start_freq_hopping_stage2();
break;
}
}
clock_sync_state = next_state;
}
void clock_sync_start_normal_operation() {}
void clock_sync_start_freq_hopping_stage1() {
freq_hopping_start_trim = get_hsi_trim();
freq_hopping_iteration = 0;
}
void clock_sync_start_freq_hopping_stage2() {
freq_hopping_start_trim = get_hsi_trim();
freq_hopping_stage2_start_time = HAL_GetTick();
freq_hopping_stage2_start_counter = master_heartbeat_counter;
freq_hopping_stage2_attempts = 0;
}
ClockSyncState clock_sync_update_normal_operation() {
uint32_t now = HAL_GetTick();
uint8_t transmit_errors =
(ams_can_handle->Instance->ESR & CAN_ESR_TEC_Msk) >> CAN_ESR_TEC_Pos;
if (now - last_master_heartbeat_time > MASTER_HEARTBEAT_DESYNC_THRESH ||
transmit_errors > CLOCK_SYNC_MAX_TRANSMIT_ERRORS) {
return CLOCK_SYNC_FREQ_HOPPING_STAGE1;
}
return CLOCK_SYNC_NORMAL_OPERATION;
}
ClockSyncState clock_sync_update_freq_hopping_stage1() {
uint32_t now = HAL_GetTick();
if (now - last_clock_sync_frame_time < CLOCK_SYNC_SANITY_INTERVAL_MAX) {
// We are at least close to re-sync'ing, go to stage 2
return CLOCK_SYNC_FREQ_HOPPING_STAGE2;
}
if (now - last_master_heartbeat_time > MASTER_HEARTBEAT_SANITY_INTERVAL_MAX) {
uint8_t new_trim = calculate_freq_hopping_trim(freq_hopping_iteration);
set_hsi_trim(new_trim);
freq_hopping_iteration++;
if ((freq_hopping_iteration + 1) * FREQ_HOPPING_TRIM_STEPS >
RCC_CR_HSITRIM_MAX) {
// The next delta would be too large, start again
freq_hopping_iteration = 0;
}
}
return CLOCK_SYNC_FREQ_HOPPING_STAGE1;
}
ClockSyncState clock_sync_update_freq_hopping_stage2() {
if (master_heartbeat_counter - freq_hopping_stage2_start_counter >
FREQ_HOPPING_STAGE2_FRAMES) {
// We've re-sync'd!
return CLOCK_SYNC_NORMAL_OPERATION;
}
uint32_t now = HAL_GetTick();
if (now - freq_hopping_stage2_start_time >
FREQ_HOPPING_STAGE2_FRAMES * MASTER_HEARTBEAT_SANITY_INTERVAL_MAX) {
freq_hopping_stage2_attempts++;
if (freq_hopping_stage2_attempts > FREQ_HOPPING_STAGE2_MAX_ATTEMPTS) {
// Looks like we're not really close to sync'ing, go back to stage 1
return CLOCK_SYNC_FREQ_HOPPING_STAGE1;
}
// We haven't received all heartbeats, trim further
uint8_t new_trim =
calculate_freq_hopping_trim(freq_hopping_stage2_attempts);
set_hsi_trim(new_trim);
freq_hopping_stage2_start_counter = master_heartbeat_counter;
freq_hopping_stage2_start_time = now;
}
return CLOCK_SYNC_FREQ_HOPPING_STAGE2;
}
void clock_sync_handle_clock_sync_frame(uint8_t counter) {
static uint32_t f_pre_trim = CLOCK_TARGET_FREQ;
static int32_t trimmed_last_frame = 0;
static int32_t last_trim_delta = HSI_TRIM_FREQ;
static uint8_t last_clock_sync_frame_counter = 0;
uint32_t now = HAL_GetTick();
uint32_t n_measured = now - last_clock_sync_frame_time;
uint8_t expected_counter = last_clock_sync_frame_counter + 1;
/* Sanity checks:
* - Are we actually in normal operation mode?
* - Have we received a sync frame before?
* - Did the counter increment by one (mod 2^8)? I.e., did we miss a frame?
* - Is the measured time elapsed within feasible bounds?
*/
if (clock_sync_state == CLOCK_SYNC_NORMAL_OPERATION &&
last_clock_sync_frame_time != 0 && counter == expected_counter &&
n_measured >= CLOCK_SYNC_SANITY_INTERVAL_MIN &&
n_measured <= CLOCK_SYNC_SANITY_INTERVAL_MAX) {
uint32_t f_real = n_measured * (CLOCK_TARGET_FREQ / CLOCK_SYNC_INTERVAL);
if (trimmed_last_frame) {
// Update trim delta
last_trim_delta = f_pre_trim - f_real;
if (last_trim_delta == 0) {
last_trim_delta = HSI_TRIM_FREQ;
} else if (last_trim_delta < 0) {
last_trim_delta = -last_trim_delta;
}
trimmed_last_frame = 0;
}
int32_t delta_f = CLOCK_TARGET_FREQ - f_real;
int32_t delta_quants = delta_f / last_trim_delta;
if (delta_quants != 0) {
// We were able to receive the frame, so we should be reasonably close. It
// should thus be enough to trim by -1 or 1.
int32_t trim_delta = (delta_quants < 0) ? -1 : 1;
trim_hsi_by(trim_delta);
f_pre_trim = f_real;
trimmed_last_frame = 1;
}
}
last_clock_sync_frame_time = now;
last_clock_sync_frame_counter = counter;
}
void clock_sync_handle_master_heartbeat() {
last_master_heartbeat_time = HAL_GetTick();
master_heartbeat_counter++;
}
uint8_t get_hsi_trim() {
return (RCC->CR & RCC_CR_HSITRIM_Msk) >> RCC_CR_HSITRIM_Pos;
}
void set_hsi_trim(uint8_t trim) {
uint32_t rcc_cr = RCC->CR;
// Clear current trim and overwrite with new trim
rcc_cr = (rcc_cr & ~RCC_CR_HSITRIM_Msk) |
((trim << RCC_CR_HSITRIM_Pos) & RCC_CR_HSITRIM_Msk);
RCC->CR = rcc_cr;
}
void trim_hsi_by(int32_t delta) {
// Determine current trim
int32_t trim = get_hsi_trim();
trim += delta;
if (trim > RCC_CR_HSITRIM_MAX) {
trim = RCC_CR_HSITRIM_MAX;
} else if (trim < 0) {
trim = 0;
}
set_hsi_trim(trim);
}
uint8_t calculate_freq_hopping_trim(uint32_t freq_hopping_iteration) {
int32_t trim_delta = (freq_hopping_iteration + 1) * FREQ_HOPPING_TRIM_STEPS;
if (freq_hopping_iteration % 2 == 0) {
trim_delta = -trim_delta;
}
int32_t new_trim = freq_hopping_start_trim + trim_delta;
if (new_trim < 0) {
new_trim += RCC_CR_HSITRIM_MAX + 1;
} else if (new_trim > RCC_CR_HSITRIM_MAX) {
new_trim -= RCC_CR_HSITRIM_MAX + 1;
}
}