Raspberry Pi 4 B chainloader in under 300 lines of code
A compact remix of the chainloader from rust-raspberrypi-OS-tutorials repository.
Code
Link the program to start at 0x2000000
but set the physical start address to 0x200000
where the Raspberry Pi boot firmware loads the given kernel image. In linker.ld
:
_phys_binary_begin = 0x200000;
ENTRY(_phys_binary_begin)
SECTIONS {
. = 0x2000000;
_binary_start = .;
.text : {
KEEP(*(.text._start))
*(.text*)
}
.rodata : {
*(.rodata*)
}
.data : {
*(.data*)
}
. = ALIGN(8);
_binary_end = .;
.bss (NOLOAD) : ALIGN(16) {
_bss_start = .;
*(.bss*);
. = ALIGN(16);
_bss_end = .;
}
. = ALIGN(8);
. += 0x1000;
_stack_top = .;
}
We use relocations in the assembly stub to move the chainloader binary to the address it was linked to (0x2000000
), initialize the stack, and jump to chainloader_main
. In stub.s
:
.macro ld_abs reg, adr
movz \reg, #:abs_g2:\adr
movk \reg, #:abs_g1_nc:\adr
movk \reg, #:abs_g0_nc:\adr
.endm
.macro ld_rel reg, adr
adrp \reg, \adr
add \reg, \reg, #:lo12:\adr
.endm
.section .text._start
.global _start
_start:
mrs x0, mpidr_el1
and x0, x0, #0xff;
cbz x0, 1f
0:
wfe
b 0b
1:
ld_abs x0, _bss_start
ld_abs x1, _bss_end
clear_bss:
cmp x0, x1
b.eq copy_binary
stp xzr, xzr, [x0], #16
b clear_bss
copy_binary:
ld_rel x0, _binary_start
ld_abs x1, _binary_start
ld_abs x2, _binary_end
copy_loop:
ldr x3, [x0], #8
str x3, [x1], #8
cmp x1, x2
b.lo copy_loop
ld_abs x0, _stack_top
mov sp, x0
ld_abs x1, chainloader_main
br x1
2:
b 2b
Minimal UART (PL011) setup is needed in order for the chainloader to obtain the payload kernel. The code assumes that the UART clock rate is configured to $48~\mathrm{MHz}$ and sets the $\mathrm{baud}$ rate to $115200~\mathrm{baud}$. In main.rs
:
#![no_std]
#![no_main]
#![feature(const_mut_refs)]
use core::arch::global_asm;
use core::panic::PanicInfo;
global_asm!(include_str!("stub.s"));
const MMIO_BASE: usize = 0xFE00_0000;
const LOAD_ADDR: usize = 0x0020_0000;
const FR_BUSY: u8 = 1 << 3;
const FR_RXFE: u8 = 1 << 4;
const FR_TXFF: u8 = 1 << 5;
#[allow(dead_code)]
#[allow(non_camel_case_types)]
enum Offset {
GPIO_BASE = 0x200000,
GPFSEL1 = 0x200000 + 0x04,
GPIO_PUP_PDN_CNTRL_REG0 = 0x200000 + 0xE4,
GPPUD = 0x200000 + 0x94,
GPPUDCLK0 = 0x200000 + 0x98,
UART0_BASE = 0x201000,
UART0_RSRECR = 0x201000 + 0x04,
UART0_FR = 0x201000 + 0x18,
UART0_ILPR = 0x201000 + 0x20,
UART0_IBRD = 0x201000 + 0x24,
UART0_FBRD = 0x201000 + 0x28,
UART0_LCRH = 0x201000 + 0x2c,
UART0_CR = 0x201000 + 0x30,
UART0_IFLS = 0x201000 + 0x34,
UART0_IMSC = 0x201000 + 0x38,
UART0_RIS = 0x201000 + 0x3c,
UART0_MIS = 0x201000 + 0x40,
UART0_ICR = 0x201000 + 0x44,
UART0_DMACR = 0x201000 + 0x48,
UART0_ITCR = 0x201000 + 0x80,
UART0_ITIP = 0x201000 + 0x84,
UART0_ITOP = 0x201000 + 0x88,
UART0_TDR = 0x201000 + 0x8C,
MBOX_BASE = 0xB880, // MBOX_READ
MBOX_STATUS = 0xB880 + 0x18,
MBOX_WRITE = 0xB880 + 0x20,
}
#[inline(always)]
fn mmio_write(offset: usize, data: T) {
unsafe {
let base = MMIO_BASE as *mut u8;
let adr = base.add(offset) as *mut T;
adr.write_volatile(data)
}
}
#[inline(always)]
fn mmio_read(offset: usize) -> T {
unsafe {
let base = MMIO_BASE as *mut u8;
let adr = base.add(offset) as *mut T;
adr.read_volatile()
}
}
#[inline(always)]
fn flush() {
while mmio_read::(Offset::UART0_FR as usize) & FR_BUSY != 0 {}
}
#[inline(always)]
fn write_byte(byte: u8) {
while read_flag_register() & FR_TXFF != 0 {}
mmio_write(Offset::UART0_BASE as usize, byte);
while read_flag_register() & FR_BUSY != 0 {}
}
#[inline(always)]
fn read_byte() -> u8 {
while read_flag_register() & FR_RXFE != 0 {}
mmio_read(Offset::UART0_BASE as usize)
}
#[inline(always)]
fn read_flag_register() -> u8 {
mmio_read(Offset::UART0_FR as usize)
}
// We require 115_200 baud rate and UARTCLK is set to 48 MHz in config.txt
// Baud Rate divisor: 48_000_000/(16*115_200)=26.041667.
// Integer part: 26
// Fractional part: 0.041667
// Fractional part m: int((0.041667*64)+0.5)=3
// Generated baud rate divider: 3+16/64=26.046875
// Generated baud rate: 48_000_000/(16*26.046875)=115_176
fn uart_init() {
let mut r: u32 = mmio_read::(Offset::GPFSEL1 as usize);
r = (r | (1 << 17) | (1 << 14)) & !(0b11 << 15) & !(0b11 << 12);
mmio_write(Offset::GPFSEL1 as usize, r);
mmio_write(
Offset::GPIO_PUP_PDN_CNTRL_REG0 as usize,
((0b01 << 30) | (0b01 << 28)) as u32,
);
flush();
mmio_write(Offset::UART0_CR as usize, 0 as u16);
let icr_val: u16 = mmio_read::(Offset::UART0_ICR as usize);
mmio_write(Offset::UART0_ICR as usize, icr_val & 0xf800u16);
mmio_write(Offset::UART0_IBRD as usize, 26u16);
mmio_write(Offset::UART0_FBRD as usize, 3u8);
mmio_write(
Offset::UART0_LCRH as usize,
((1 << 4) | (1 << 5) | (1 << 6)) as u8,
);
mmio_write(
Offset::UART0_CR as usize,
((1 << 0) | (1 << 8) | (1 << 9)) as u32,
);
}
fn uart_reset() {
mmio_write::(Offset::UART0_CR as usize, 0);
mmio_write::(Offset::UART0_ICR as usize, 0x7FF);
mmio_write::(Offset::UART0_IBRD as usize, 0);
mmio_write::(Offset::UART0_FBRD as usize, 0);
mmio_write::(Offset::UART0_LCRH as usize, 0);
mmio_write::(Offset::UART0_IFLS as usize, 0);
mmio_write::(Offset::UART0_DMACR as usize, 0);
mmio_write::(Offset::UART0_CR as usize, 0);
flush()
}
#[no_mangle]
pub extern "C" fn chainloader_main() -> ! {
uart_init();
for _ in 0..3 {
write_byte(3);
}
let mut size: u32 = u32::from(read_byte());
size |= u32::from(read_byte()) << 8;
size |= u32::from(read_byte()) << 16;
size |= u32::from(read_byte()) << 24;
write_byte('O' as u8);
write_byte('K' as u8);
let kernel_addr: *mut u8 = LOAD_ADDR as *mut u8;
unsafe {
for i in 0..size {
kernel_addr.offset(i as isize).write_volatile(read_byte());
}
}
flush();
uart_reset();
let kernel: fn() -> ! = unsafe { core::mem::transmute(kernel_addr) };
kernel();
}
#[panic_handler]
fn panic(_info: &PanicInfo) -> ! {
loop {}
}
And finally, a Python script I use to push the payload image:
import argparse
import math
import serial
import struct
import time
ser = serial.Serial("/dev/ttyUSB0",
baudrate=115200,
bytesize=8,
stopbits=1,
write_timeout=0.1)
def parse_arguments():
parser = argparse.ArgumentParser(prog='push_image',
description="push kernel image to Raspberry Pi 4 B")
parser.add_argument("--image", type=str, help="path to kernel image")
args = parser.parse_args()
return args
def wait_for_payload_signal():
start_time = time.time()
duration = 20
count = 0
while time.time() - start_time < duration:
byte = ser.read(1)
if byte == b'\x03':
count += 1
if count == 3:
return True
print("Did not receive payload signal in 20 seconds.")
return False
def push_image():
args = parse_arguments()
image = args.image
received = wait_for_payload_signal()
if not received:
ser.close()
return
f = open(image, 'rb')
buf = bytearray(f.read())
f.close()
size = len(buf)
ser.write(struct.pack("<i", size))
print(size)
if ser.read() != b'O':
print("Chainloader failed to read size")
return
if ser.read() != b'K':
print("Chainloader failed to read size")
return
print("size written.")
chunk_size = 512
c = 0
for i in range(0, size, chunk_size):
chunk = buf[i:i+chunk_size]
written = ser.write(chunk)
c += 1
print(f"{c}/{math.ceil(size/chunk_size)} chunks written")
print("image written.")
while True:
line = ser.readline()
if line:
try:
print(line.decode('utf-8').strip())
except UnicodeDecodeError:
pass
push_image()