@@ -370,28 +370,59 @@ riscv_encode(void *simple lzma_attribute((__unused__)),
370
370
// The loop is advanced by 2 bytes every iteration since the
371
371
// instruction stream may include 16-bit instructions (C extension).
372
372
for (i = 0 ; i <= size ; i += 2 ) {
373
- uint32_t inst = read32le (buffer + i );
373
+ uint32_t inst = buffer [i ];
374
+
375
+ if (inst == 0xEF ) {
376
+ // JAL
377
+ const uint32_t b1 = buffer [i + 1 ];
378
+
379
+ // Only filter rd=x1(ra) and rd=x5(t0).
380
+ if ((b1 & 0x0D ) != 0 )
381
+ continue ;
374
382
375
- if ((inst & 0xDFF ) == 0x0EF ) {
376
- // JAL with rd=x1(ra) or rd=x5(t0)
377
- //
378
383
// The 20-bit immediate is in four pieces.
379
384
// The encoder stores it in big endian form
380
385
// since it improves compression slightly.
381
- uint32_t addr
382
- = ((inst & 0x80000000 ) >> 11 )
383
- | ((inst & 0x7FE00000 ) >> 20 )
384
- | ((inst & 0x00100000 ) >> 9 )
385
- | (inst & 0x000FF000 );
386
+ const uint32_t b2 = buffer [i + 2 ];
387
+ const uint32_t b3 = buffer [i + 3 ];
388
+ const uint32_t pc = now_pos + (uint32_t )i ;
389
+
390
+ // The following chart shows the highest three bytes of JAL, focusing on
391
+ // the 20-bit immediate field [31:12]. The first row of numbers is the
392
+ // bit position in a 32-bit little endian instruction. The second row of
393
+ // numbers shows the order of the immediate field in a J-type instruction.
394
+ // The last row is the bit number in each byte.
395
+ //
396
+ // To determine the amount to shift each bit, subtract the value in
397
+ // the last row from the value in the second last row. If the number
398
+ // is positive, shift left. If negative, shift right.
399
+ //
400
+ // For example, at the rightmost side of the chart, the bit 4 in b1 is
401
+ // the bit 12 of the address. Thus that bit needs to be shifted left
402
+ // by 12 - 4 = 8 bits to put it in the right place in the addr variable.
403
+ //
404
+ // NOTE: The immediate of a J-type instruction holds bits [20:1] of
405
+ // the address. The bit [0] is always 0 and not part of the immediate.
406
+ //
407
+ // | b3 | b2 | b1 |
408
+ // | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x |
409
+ // | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x |
410
+ // | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x |
386
411
387
- addr += now_pos + (uint32_t )i ;
412
+ uint32_t addr = ((b1 & 0xF0 ) << 8 )
413
+ | ((b2 & 0x0F ) << 16 )
414
+ | ((b2 & 0x10 ) << 7 )
415
+ | ((b2 & 0xE0 ) >> 4 )
416
+ | ((b3 & 0x7F ) << 4 )
417
+ | ((b3 & 0x80 ) << 13 );
388
418
389
- inst = (inst & 0xFFF )
390
- | ((addr & 0x1E0000 ) >> 5 )
391
- | ((addr & 0x01FE00 ) << 7 )
392
- | ((addr & 0x0001FE ) << 23 );
419
+ addr += pc ;
393
420
394
- write32le (buffer + i , inst );
421
+ buffer [i + 1 ] = (uint8_t )((b1 & 0x0F )
422
+ | ((addr >> 13 ) & 0xF0 ));
423
+
424
+ buffer [i + 2 ] = (uint8_t )(addr >> 9 );
425
+ buffer [i + 3 ] = (uint8_t )(addr >> 1 );
395
426
396
427
// The "-2" is included because the for-loop will
397
428
// always increment by 2. In this case, we want to
@@ -401,7 +432,10 @@ riscv_encode(void *simple lzma_attribute((__unused__)),
401
432
402
433
} else if ((inst & 0x7F ) == 0x17 ) {
403
434
// AUIPC
404
- //
435
+ inst |= (uint32_t )buffer [i + 1 ] << 8 ;
436
+ inst |= (uint32_t )buffer [i + 2 ] << 16 ;
437
+ inst |= (uint32_t )buffer [i + 3 ] << 24 ;
438
+
405
439
// Branch based on AUIPC's rd. The bitmask test does
406
440
// the same thing as this:
407
441
//
@@ -587,30 +621,50 @@ riscv_decode(void *simple lzma_attribute((__unused__)),
587
621
588
622
size_t i ;
589
623
for (i = 0 ; i <= size ; i += 2 ) {
590
- uint32_t inst = read32le ( buffer + i ) ;
624
+ uint32_t inst = buffer [ i ] ;
591
625
592
- if ((inst & 0xDFF ) == 0x0EF ) {
593
- // JAL with rd=x1(ra) or rd=x5(t0)
594
- uint32_t addr
595
- = ((inst << 5 ) & 0x1E0000 )
596
- | ((inst >> 7 ) & 0x01FE00 )
597
- | ((inst >> 23 ) & 0x0001FE );
626
+ if (inst == 0xEF ) {
627
+ // JAL
628
+ const uint32_t b1 = buffer [i + 1 ];
598
629
599
- addr -= now_pos + (uint32_t )i ;
630
+ // Only filter rd=x1(ra) and rd=x5(t0).
631
+ if ((b1 & 0x0D ) != 0 )
632
+ continue ;
600
633
601
- inst = (inst & 0xFFF )
602
- | ((addr << 11 ) & 0x80000000 )
603
- | ((addr << 20 ) & 0x7FE00000 )
604
- | ((addr << 9 ) & 0x00100000 )
605
- | ( addr & 0x000FF000 );
634
+ const uint32_t b2 = buffer [i + 2 ];
635
+ const uint32_t b3 = buffer [i + 3 ];
636
+ const uint32_t pc = now_pos + (uint32_t )i ;
637
+
638
+ // | b3 | b2 | b1 |
639
+ // | 31 30 29 28 27 26 25 24 | 23 22 21 20 19 18 17 16 | 15 14 13 12 x x x x |
640
+ // | 20 10 9 8 7 6 5 4 | 3 2 1 11 19 18 17 16 | 15 14 13 12 x x x x |
641
+ // | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 4 x x x x |
642
+
643
+ uint32_t addr = ((b1 & 0xF0 ) << 13 )
644
+ | (b2 << 9 ) | (b3 << 1 );
645
+
646
+ addr -= pc ;
647
+
648
+ buffer [i + 1 ] = (uint8_t )((b1 & 0x0F )
649
+ | ((addr >> 8 ) & 0xF0 ));
650
+
651
+ buffer [i + 2 ] = (uint8_t )(((addr >> 16 ) & 0x0F )
652
+ | ((addr >> 7 ) & 0x10 )
653
+ | ((addr << 4 ) & 0xE0 ));
654
+
655
+ buffer [i + 3 ] = (uint8_t )(((addr >> 4 ) & 0x7F )
656
+ | ((addr >> 13 ) & 0x80 ));
606
657
607
- write32le (buffer + i , inst );
608
658
i += 4 - 2 ;
609
659
610
660
} else if ((inst & 0x7F ) == 0x17 ) {
611
661
// AUIPC
612
662
uint32_t inst2 ;
613
663
664
+ inst |= (uint32_t )buffer [i + 1 ] << 8 ;
665
+ inst |= (uint32_t )buffer [i + 2 ] << 16 ;
666
+ inst |= (uint32_t )buffer [i + 3 ] << 24 ;
667
+
614
668
if (inst & 0xE80 ) {
615
669
// AUIPC's rd doesn't equal x0 or x2.
616
670
0 commit comments