Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ default = ["std"]
# implementation uses C intrinsics and requires a C compiler.
neon = []

# The RVV (RISC-V Vector) implementation does not participate in dynamic feature
# detection. If "rvv" is on, RVV support is assumed. The RVV implementation uses
# C intrinsics and requires a C compiler with RVV support.
rvv = []

# The Wasm SIMD implementation does not participate in dynamic feature detection,
# which is currently x86-only. If "wasm_simd" is on, Wasm SIMD support is assumed.
# Note that not all Wasm implementations support the Wasm SIMD specification.
Expand Down
1 change: 1 addition & 0 deletions b3sum/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ edition = "2024"

[features]
neon = ["blake3/neon"]
rvv = ["blake3/rvv"]
prefer_intrinsics = ["blake3/prefer_intrinsics"]
pure = ["blake3/pure"]

Expand Down
130 changes: 130 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,113 @@ fn is_wasm32() -> bool {
target_components()[0] == "wasm32"
}

fn is_riscv64() -> bool {
target_components()[0].starts_with("riscv64")
}

fn is_cross_compiling() -> bool {
let host = env::var("HOST").unwrap();
let target = env::var("TARGET").unwrap();
host != target
}

fn test_rvv_runtime_support() -> bool {
use std::fs;
use std::io::Write;
use std::process::Command;

let out_dir = env::var("OUT_DIR").unwrap();
let test_c = format!("{}/test_rvv.c", out_dir);
let test_bin = format!("{}/test_rvv", out_dir);

let test_code = b"
#include <riscv_vector.h>
#include <stdint.h>

int main() {
size_t vl = __riscv_vsetvlmax_e32m1();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a single intrinsic. Does it cover all extensions used by this implementation or maybe more of them need to be added?

Copy link
Author

@MahnoKropotkinvich MahnoKropotkinvich Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This time all functions used in the arch specific code are tested.

if (vl == 0) return 1;

vuint32m1_t v32_a = __riscv_vmv_v_x_u32m1(42, vl);
vuint32m1_t v32_b = __riscv_vmv_v_x_u32m1(17, vl);
vuint32m1_t v32_sum = __riscv_vadd_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_xor = __riscv_vxor_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_or = __riscv_vor_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_srl = __riscv_vsrl_vx_u32m1(v32_a, 4, vl);
vuint32m1_t v32_sll = __riscv_vsll_vx_u32m1(v32_a, 4, vl);

uint32_t data32[16] = {0};
vuint32m1_t v32_loaded = __riscv_vle32_v_u32m1(data32, vl);
__riscv_vsse32_v_u32m1(data32, sizeof(uint32_t) * 2, v32_sum, vl);

uint64_t data64[16] = {0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60};
vuint64m2_t v64 = __riscv_vle64_v_u64m2(data64, vl);
vuint64m2_t v64_add = __riscv_vadd_vx_u64m2(v64, 100, vl);

uint32_t src_data[16] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160};
vuint32m1_t v32_indexed = __riscv_vluxei64_v_u32m1((const uint32_t *)src_data, v64, vl);

uint32_t result_sum[16] = {0};
uint32_t result_indexed[16] = {0};
__riscv_vse32_v_u32m1(result_sum, v32_sum, vl);
__riscv_vse32_v_u32m1(result_indexed, v32_indexed, vl);

if (result_sum[0] != 59) return 2;
if (result_indexed[0] != 10) return 3;
if (vl > 1 && result_indexed[1] != 20) return 4;

return 0;
}
";

let mut f = match fs::File::create(&test_c) {
Ok(f) => f,
Err(e) => {
println!("cargo:warning=Failed to create RVV test file: {}", e);
return false;
}
};

if let Err(e) = f.write_all(test_code) {
println!("cargo:warning=Failed to write RVV test file: {}", e);
return false;
}

drop(f);

let mut build = cc::Build::new();
build.flag("-march=rv64gcv");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is certainly testing something, but it is testing rv64gcv rather than the actual target the code will be compiled for. For example, rv64imv should theoretically work too, and maybe even rv64izve64x is sufficient.

Even though it is not possible to check for vector extension even in Nightly Rust right now, this will serve as a piece of documentation for things actually used in the code. Intrinsics above are already good, but actual extensions will be even better.

let compiler = build.get_compiler();

let mut cmd = compiler.to_command();
cmd.arg(&test_c).arg("-o").arg(&test_bin);

if !cmd.status().map(|s| s.success()).unwrap_or(false) {
return false;
}

match Command::new(&test_bin).output() {
Ok(output) => output.status.success(),
Err(_) => false,
}
}

fn is_rvv() -> bool {
if defined("CARGO_FEATURE_RVV") {
return true;
}

if is_pure() {
return false;
}

if !is_cross_compiling() && is_riscv64() && is_little_endian() {
return test_rvv_runtime_support();
}

false
}
Comment on lines +102 to +207
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is very limited, I'd be nicer to detect the availability of extensions rather than parsing target triple for just a few known good values. I have a custom target that is called riscv64-unknown-none-abundance and it will support vector extension in the future, but this feature detection will be unable to take advantage of it automatically.

The fact that override exists helps, but it'd be much nicer if feature detection just worked out of the box.


fn endianness() -> String {
let endianness = env::var("CARGO_CFG_TARGET_ENDIAN").unwrap();
assert!(endianness == "little" || endianness == "big");
Expand Down Expand Up @@ -300,6 +407,14 @@ fn build_wasm32_simd() {
println!("cargo:rustc-cfg=blake3_wasm32_simd");
}

fn build_rvv_c_intrinsics() {
let mut build = new_build();
build.file("c/blake3_rvv.c");
build.flag("-march=rv64gcv");
build.define("BLAKE3_USE_RVV", "1");
build.compile("blake3_rvv");
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
// As of Rust 1.80, unrecognized config names are warnings. Give Cargo all of our config names.
let all_cfgs = [
Expand All @@ -312,6 +427,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
"blake3_avx512_ffi",
"blake3_neon",
"blake3_wasm32_simd",
"blake3_rvv",
];
for cfg_name in all_cfgs {
// TODO: Switch this whole file to the new :: syntax when our MSRV reaches 1.77.
Expand All @@ -327,6 +443,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\".");
}

if is_pure() && is_rvv() {
panic!("It doesn't make sense to enable both \"pure\" and \"rvv\".");
}

if is_x86_64() || is_x86_32() {
let support = c_compiler_support();
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
Expand Down Expand Up @@ -361,6 +481,16 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
build_wasm32_simd();
}

if is_rvv() && is_big_endian() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_rvv() should check for endianness too in the autodetection case. Otherwise it'll be impossible to compile for big-endian RISC-V targets, however uncommon they are.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay.

panic!("The RVV implementation doesn't support big-endian RISC-V.")
}

// Enable RVV if explicitly requested via feature flag
if is_riscv64() && is_rvv() {
println!("cargo:rustc-cfg=blake3_rvv");
build_rvv_c_intrinsics();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should always build RVV support unless pure feature is selected. When RVV support is detected during compile time corresponding implementation will be used unconditionally, but if not, I think it still makes sense to do runtime CPU feature detection unless it is not possible.

Copy link
Author

@MahnoKropotkinvich MahnoKropotkinvich Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, although RISC-V does provide an x86 cpuid-like functionality, it's M-mode only (misa register). Or we can query the OS for supported ISAs, but the querying API varies by OS. On Linux, it's getauxval(AT_HWCAP), on FreeBSD, it's elf_aux_info. And I think we shouldn't assume that BLAKE3 won't run on bare metal, so runtime ISA dispatching should account for bare metal, Linux, and FreeBSD environments—it's too complicated.
See also: https://news.ycombinator.com/item?id=24002931

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. And target_feature in Rust doesn't seem to expose v even in Nightly. Very unfortunate.

That said, you could still conditionally check for features in OS-specific way when compiled for the OS.


// The `cc` crate doesn't automatically emit rerun-if directives for the
// environment variables it supports, in particular for $CC. We expect to
// do a lot of benchmarking across different compilers, so we explicitly
Expand Down
96 changes: 95 additions & 1 deletion c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ add_subdirectory(dependencies)
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
set(BLAKE3_RISCV64_NAMES riscv64 RISCV64 rv64)
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
if(MSVC)
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
Expand Down Expand Up @@ -76,9 +77,80 @@ elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
# 32-bit ARMv8 needs NEON to be enabled explicitly
set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
endif()

if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_RISCV64_NAMES)
if(DEFINED BLAKE3_CFLAGS_RVV)
message(STATUS "Using user-specified RVV flags: ${BLAKE3_CFLAGS_RVV}")
elseif(NOT CMAKE_CROSSCOMPILING)
message(STATUS "Native RISC-V build: testing RVV runtime support...")

file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_rvv.c "
#include <riscv_vector.h>
#include <stdint.h>

int main() {
size_t vl = __riscv_vsetvlmax_e32m1();
if (vl == 0) return 1;

vuint32m1_t v32_a = __riscv_vmv_v_x_u32m1(42, vl);
vuint32m1_t v32_b = __riscv_vmv_v_x_u32m1(17, vl);
vuint32m1_t v32_sum = __riscv_vadd_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_xor = __riscv_vxor_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_or = __riscv_vor_vv_u32m1(v32_a, v32_b, vl);
vuint32m1_t v32_srl = __riscv_vsrl_vx_u32m1(v32_a, 4, vl);
vuint32m1_t v32_sll = __riscv_vsll_vx_u32m1(v32_a, 4, vl);

uint32_t data32[16] = {0};
vuint32m1_t v32_loaded = __riscv_vle32_v_u32m1(data32, vl);
__riscv_vsse32_v_u32m1(data32, sizeof(uint32_t) * 2, v32_sum, vl);

uint64_t data64[16] = {0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60};
vuint64m2_t v64 = __riscv_vle64_v_u64m2(data64, vl);
vuint64m2_t v64_add = __riscv_vadd_vx_u64m2(v64, 100, vl);

uint32_t src_data[16] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160};
vuint32m1_t v32_indexed = __riscv_vluxei64_v_u32m1((const uint32_t *)src_data, v64, vl);

uint32_t result_sum[16] = {0};
uint32_t result_indexed[16] = {0};
__riscv_vse32_v_u32m1(result_sum, v32_sum, vl);
__riscv_vse32_v_u32m1(result_indexed, v32_indexed, vl);

if (result_sum[0] != 59) return 2;
if (result_indexed[0] != 10) return 3;
if (vl > 1 && result_indexed[1] != 20) return 4;

return 0;
}
")

try_run(
BLAKE3_RVV_RUN_RESULT
BLAKE3_RVV_COMPILE_RESULT
${CMAKE_CURRENT_BINARY_DIR}/try_rvv
${CMAKE_CURRENT_BINARY_DIR}/test_rvv.c
COMPILE_DEFINITIONS -march=rv64gcv
COMPILE_OUTPUT_VARIABLE BLAKE3_RVV_COMPILE_OUTPUT
)

if(BLAKE3_RVV_COMPILE_RESULT AND BLAKE3_RVV_RUN_RESULT EQUAL 0)
set(BLAKE3_CFLAGS_RVV "-march=rv64gcv" CACHE STRING "the compiler flags to enable RVV")
message(STATUS "RVV runtime support verified")
elseif(BLAKE3_RVV_COMPILE_RESULT)
message(STATUS "RVV compiled but failed to run (CPU doesn't support it)")
else()
message(STATUS "RVV compilation failed")
endif()
else()
message(WARNING
"Cross-compiling for RISC-V: Cannot auto-detect RVV support.\n"
"RVV will be DISABLED. To enable, specify:\n"
" cmake -DBLAKE3_CFLAGS_RVV=\"-march=rv64gcv\" ..")
endif()
endif()
endif()

mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON)
mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON BLAKE3_CFLAGS_RVV)
mark_as_advanced(BLAKE3_AMD64_ASM_SOURCES)

message(STATUS "BLAKE3 SIMD configuration: ${CMAKE_C_COMPILER_ARCHITECTURE_ID}")
Expand Down Expand Up @@ -113,6 +185,10 @@ elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
OR CMAKE_SIZEOF_VOID_P EQUAL 8))
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_RISCV64_NAMES
AND DEFINED BLAKE3_CFLAGS_RVV)
set(BLAKE3_SIMD_TYPE "rvv-intrinsics" CACHE STRING "the SIMD acceleration type to use")

else()
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
endif()
Expand Down Expand Up @@ -205,9 +281,26 @@ elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
endif()

elseif(BLAKE3_SIMD_TYPE STREQUAL "rvv-intrinsics")
set(BLAKE3_SIMD_RVV_INTRINSICS ON)

target_sources(blake3 PRIVATE
blake3_rvv.c
)
target_compile_definitions(blake3 PRIVATE
BLAKE3_USE_RVV=1
)

if (DEFINED BLAKE3_CFLAGS_RVV)
set_source_files_properties(blake3_rvv.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_RVV}")
# blake3_dispatch.c also needs RVV flags to call __riscv_vsetvlmax_e32m1()
set_source_files_properties(blake3_dispatch.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_RVV}")
endif()

elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
target_compile_definitions(blake3 PRIVATE
BLAKE3_USE_NEON=0
BLAKE3_USE_RVV=0
BLAKE3_NO_SSE2
BLAKE3_NO_SSE41
BLAKE3_NO_AVX2
Expand Down Expand Up @@ -365,6 +458,7 @@ install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")
add_feature_info("RVV SIMD intrinsics" BLAKE3_SIMD_RVV_INTRINSICS "The library uses RISC-V Vector Extension (RVV) intrinsics.")
add_feature_info("oneTBB parallelism" BLAKE3_USE_TBB "The library uses oneTBB parallelism.")
feature_summary(WHAT ENABLED_FEATURES)

Expand Down
14 changes: 14 additions & 0 deletions c/blake3_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#endif
#endif

#if BLAKE3_USE_RVV == 1
#include <riscv_vector.h>
#endif

#if !defined(BLAKE3_ATOMICS)
#if defined(__has_include)
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
Expand Down Expand Up @@ -294,6 +298,12 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
return;
#endif

#if BLAKE3_USE_RVV == 1
blake3_hash_many_rvv(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
#endif

blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
Expand Down Expand Up @@ -327,6 +337,10 @@ size_t blake3_simd_degree(void) {
#endif
#if BLAKE3_USE_NEON == 1
return 4;
#endif
#if BLAKE3_USE_RVV == 1
// RVV vector length is dynamic, query it at runtime.
return __riscv_vsetvlmax_e32m1();
#endif
return 1;
}
10 changes: 10 additions & 0 deletions c/blake3_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ enum blake3_flags {
#define MAX_SIMD_DEGREE 16
#elif BLAKE3_USE_NEON == 1
#define MAX_SIMD_DEGREE 4
#elif BLAKE3_USE_RVV == 1
#define MAX_SIMD_DEGREE 64
#else
#define MAX_SIMD_DEGREE 1
#endif
Expand Down Expand Up @@ -326,6 +328,14 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags_end, uint8_t *out);
#endif

#if BLAKE3_USE_RVV == 1
void blake3_hash_many_rvv(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#endif

#ifdef __cplusplus
}
#endif
Expand Down
Loading