Never mind, I'm dumb. BRA has a delay slot, so we're actually loading r2 with 0x8100004 before entering the loop there. I'll paste my whole disassembly here, with a few annotations:
Code:
default.bin: file format binary
Disassembly of section .data:
00000000 <.data>:
! dumping to stack frame
0: 2f e6 mov.l r14,@-r15
2: 4f 22 sts.l pr,@-r15
4: 2f 46 mov.l r4,@-r15
6: b0 0d bsr 0x24
8: 6e 53 mov r5,r14
a: e6 01 mov #1,r6
c: e4 00 mov #0,r4
e: b0 21 bsr 0x54
10: 65 63 mov r6,r5
12: d7 03 mov.l 0x20,r7 ! 300080
14: 65 ed extu.w r14,r5
16: 64 f6 mov.l @r15+,r4
18: 4f 26 lds.l @r15+,pr
1a: 47 2b jmp @r7
1c: 6e f6 mov.l @r15+,r14
1e: 00 00 .word 0x0000
20: 00 30 .word 0x0030
22: 00 80 .word 0x0080
! subroutine 1
24: e6 00 mov #0,r6
26: a0 02 bra 0x2e
28: d2 0e mov.l 0x64,r2 ! 8100004
2a: 22 62 mov.l r6,@r2
2c: 72 04 add #4,r2
2e: d5 0e mov.l 0x68,r5 ! 8100004
30: 32 52 cmp/hs r5,r2
32: 8b fa bf 0x2a ! jump if r2 < r5
34: d6 0d mov.l 0x6c,r6 ! 8100004
36: a0 02 bra 0x3e
38: d5 0e mov.l 0x74,r5 ! 300084
3a: 26 22 mov.l r2,@r6
3c: 76 04 add #4,r6
3e: d4 0c mov.l 0x70,r4 ! 8100004
40: 36 42 cmp/hs r4,r6
42: 8b fa bf 0x3a
44: d1 02 mov.l 0x50,r1 ! 8100000
46: d5 07 mov.l 0x64,r5 ! 8100004
48: 34 58 sub r5,r4
4a: 74 04 add #4,r4
4c: 00 0b rts
4e: 21 42 mov.l r4,@r1
50: 08 10 .word 0x0810
52: 00 00 .word 0x0000
54: d2 02 mov.l 0x60,r2 ! 80020070
56: e0 29 mov #41,r0
58: 42 2b jmp @r2
5a: 00 09 nop
5c: 00 00 .word 0x0000
5e: 00 00 .word 0x0000
60: 80 02 mov.b r0,@(2,r0)
62: 00 70 .word 0x0070
64: 08 10 .word 0x0810
66: 00 04 mov.b r0,@(r0,r0)
68: 08 10 .word 0x0810
6a: 00 04 mov.b r0,@(r0,r0)
6c: 08 10 .word 0x0810
6e: 00 04 mov.b r0,@(r0,r0)
70: 08 10 .word 0x0810
72: 00 04 mov.b r0,@(r0,r0)
74: 00 30 .word 0x0030
76: 00 84 mov.b r8,@(r0,r0)
...
80: 00 0b rts
82: 00 09 nop