Skip to content

Commit 7efa421

Browse files
committed
Fast, SIMD-accelerated Unicode & base64 processing
Unicode (UTF8, UTF16, UTF32) and Base64 processing at billions of characters per second in PHP using the simdutf library. https://github.com/simdutf/simdutf
0 parents  commit 7efa421

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+78892
-0
lines changed

.github/workflows/integration.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
name: Tests
2+
on: [push, pull_request, workflow_dispatch]
3+
4+
jobs:
5+
Ubuntu:
6+
strategy:
7+
fail-fast: false
8+
matrix:
9+
php-version:
10+
- "8.1"
11+
- "8.2"
12+
- "8.3"
13+
- "8.4"
14+
os: [ubuntu-latest]
15+
experimental: [false]
16+
runs-on: ${{ matrix.os }}
17+
name: Ubuntu (${{ matrix.php-version }})
18+
continue-on-error: ${{ matrix.experimental }}
19+
steps:
20+
- name: "Checkout"
21+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
22+
23+
- name: "Install PHP"
24+
uses: shivammathur/setup-php@9e72090525849c5e82e596468b86eb55e9cc5401 # v2.32.0
25+
with:
26+
php-version: "${{ matrix.php-version }}"
27+
extensions: mbstring
28+
# for correct php-config extension dir, see https://github.com/shivammathur/setup-php/issues/147
29+
tools: pecl, phpize, php-config
30+
31+
- name: "Build extension"
32+
# use 'php run-tests.php' instead of 'make test' to ensure all extensions are loaded from conf.d
33+
run: |
34+
export NO_INTERACTION=true
35+
export REPORT_EXIT_STATUS=1
36+
phpize
37+
./configure
38+
make -j$(nproc)
39+
sudo make install
40+
echo "extension=simdutf.so" | sudo tee /etc/php/${{ matrix.php-version }}/cli/conf.d/20-simdutf.ini
41+
php run-tests.php || exit 1
42+
43+
- name: "Show"
44+
run: "php -dextension=simdutf.so --ri simdutf"
45+
46+
- name: "Error log"
47+
if: ${{ failure() }}
48+
run: "ls -1t tests/*.log | xargs -d'\n' cat"
49+
50+
- name: "Error diff"
51+
if: ${{ failure() }}
52+
run: |
53+
for FILE in $(find tests -name '*.diff'); do
54+
echo $FILE
55+
cat $FILE
56+
echo
57+
done
58+
59+
macOS:
60+
runs-on: macos-latest
61+
continue-on-error: false
62+
strategy:
63+
fail-fast: false
64+
matrix:
65+
php: ['8.1', '8.2', '8.3', '8.4']
66+
steps:
67+
- name: Checkout
68+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
69+
70+
- name: Install PHP ${{ matrix.php }}
71+
uses: shivammathur/setup-php@9e72090525849c5e82e596468b86eb55e9cc5401 # v2.32.0
72+
with:
73+
php-version: ${{ matrix.php }}
74+
coverage: none
75+
tools: none
76+
77+
- name: Build simdutf
78+
run: |
79+
phpize
80+
./configure
81+
make -j$(nproc)
82+
83+
- name: Run tests
84+
# 'make test' is ok on macOS since there's no conf.d directory to scan for INIs
85+
run: |
86+
export NO_INTERACTION=true
87+
export REPORT_EXIT_STATUS=1
88+
make test TESTS="--show-diff -j2"

.gitignore

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
*.swp
2+
*.swo
3+
*.tgz
4+
.deps
5+
acinclude.m4
6+
aclocal.m4
7+
autom4te.cache/
8+
build/
9+
config.h
10+
config.log
11+
config.nice
12+
config.status
13+
config.guess
14+
config.h.in
15+
config.sub
16+
configure
17+
configure.in
18+
install-sh
19+
ltmain.sh
20+
missing
21+
mkinstalldirs
22+
run-tests.php
23+
Makefile
24+
Makefile.*
25+
!Makefile.frag
26+
libtool
27+
*~
28+
modules/
29+
.libs/
30+
*.la
31+
*.lo
32+
*.dep
33+
php_test_results_*.txt
34+
!tests/**/*.phpt
35+
!tests/_files/
36+
*~
37+
configure.ac
38+
/cmake-*
39+
/run-php
40+
benchmark/vendor
41+
composer.lock
42+
.idea
43+
.DS_Store

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
## 1.0.0 - 2025-04-14
2+
- First release for Packagist

README.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
`awesome/simdutf`
2+
==================
3+
4+
[![Tests status](https://github.com/awesomized/simdutf-php-ext/workflows/Tests/badge.svg)](https://github.com/awesomized/simdutf-php-ext/actions?query=workflow%3ATests)
5+
[![Latest Stable Version](https://img.shields.io/packagist/v/awesome/simdutf)](https://packagist.org/packages/awesome/simdutf)
6+
7+
Unicode and Base64 routines at billions of characters per second in PHP using the [simdutf](https://github.com/simdutf/simdutf) library.
8+
9+
Accelerates Unicode routines (UTF8, UTF16, UTF32) and Base64 using SSE2, AVX2, NEON, AVX-512, RISC-V Vector Extension, LoongArch64, etc.
10+
11+
The `simdutf` library is already part of Node.js, Bun, WebKit, Chromium, Cloudflare Workers, and more. Now it can be used with PHP.
12+
13+
## Related SIMD-accelerated PHP extensions
14+
* [crc_fast](https://packagist.org/packages/awesome/crc_fast) PHP extension for SIMD-accelerated CRC calculations
15+
at >100GiB/s.
16+
* [simdjson_plus](https://packagist.org/packages/awesome/simdjson_plus) PHP extension for parsing gigabytes of JSON per second using the
17+
[simdjson](https://github.com/simdjson/simdjson) project.
18+
19+
## Changes
20+
21+
See the [change log](CHANGELOG.md).
22+
23+
## Installing
24+
25+
Use [Composer](https://getcomposer.org) to install this library using [PIE](https://github.com/php/pie):
26+
27+
```bash
28+
composer install awesome/simdutf
29+
```
30+
31+
## Building
32+
33+
Like most `PHP` extensions, you can also build yourself:
34+
35+
```bash
36+
$ phpize
37+
$ ./configure
38+
$ make
39+
$ make test
40+
$ make install
41+
```
42+
43+
And add the following line to your `php.ini`:
44+
45+
```
46+
extension=simdutf.so
47+
```
48+
49+
## Usage
50+
51+
Supplies all the [simdutf API](https://github.com/simdutf/simdutf#api) functions as PHP functions in the `SimdUtf` namespace (e.g., `simdutf::validate_utf8()` becomes `\SimdUtf\validate_utf8()`).
52+
53+
See the [stubs file](simdutf.stub.php) for a complete list of functions.
54+
55+
## References
56+
* [simdutf](https://github.com/simdutf/simdutf) the `simdutf` library
57+
* [simdutf-rs](https://github.com/Nugine/simdutf-rs) the Rust bindings for `simdutf`

composer.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"name": "awesome/simdutf",
3+
"description": "Unicode routines (UTF8, UTF16, UTF32) and Base64 for PHP: billions of characters per second using SIMD - SSE2, AVX2, NEON, AVX-512, RISC-V Vector Extension, LoongArch64, POWER.",
4+
"type": "php-ext",
5+
"keywords": ["utf8", "utf16", "utf32", "base64", "simd"],
6+
"homepage": "https://github.com/awesomized/simdutf-php-ext",
7+
"license": [
8+
"Apache-2.0",
9+
"MIT"
10+
],
11+
"authors": [
12+
{
13+
"name": "Don MacAskill",
14+
"homepage": "https://github.com/onethumb",
15+
"role": "Developer"
16+
}
17+
],
18+
"support": {
19+
"issues": "https://github.com/awesomized/simdutf-php-ext/issues",
20+
"source": "https://github.com/awesomized/simdutf-php-ext"
21+
},
22+
"php-ext": {
23+
"extension-name": "simdutf"
24+
},
25+
"prefer-stable": true,
26+
"config": {
27+
"sort-packages": true
28+
},
29+
"extra": {
30+
"branch-alias": {
31+
"dev-main": "2.0.x-dev"
32+
}
33+
},
34+
"require": {
35+
"php": "^8.1"
36+
}
37+
}

config.m4

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
dnl config.m4 for extension simdutf
2+
3+
PHP_ARG_ENABLE(simdutf, whether to enable simdutf, [ --enable-simdutf Enable simdutf])
4+
5+
if test "$PHP_SIMDUTF" != "no"; then
6+
7+
PHP_REQUIRE_CXX()
8+
9+
AC_MSG_CHECKING([PHP version])
10+
11+
if test -z "$PHP_CONFIG"; then
12+
AC_MSG_ERROR([php-config not found])
13+
fi
14+
php_version=`$PHP_CONFIG --vernum`
15+
16+
if test -z "$php_version"; then
17+
AC_MSG_ERROR([failed to detect PHP version, please report])
18+
fi
19+
20+
if test "$php_version" -lt "80100"; then
21+
AC_MSG_ERROR([You need at least PHP 8.1.0 to be able to use this version of simdutf. PHP $php_version found])
22+
else
23+
AC_MSG_RESULT([$php_version, ok])
24+
fi
25+
26+
dnl Mark symbols hidden by default if the compiler (for example, gcc >= 4)
27+
dnl supports it. This can help reduce the binary size and startup time.
28+
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],
29+
[CXXFLAGS="$CXXFLAGS -fvisibility=hidden"])
30+
31+
AC_DEFINE(HAVE_SIMDUTF, 1, [whether simdutf is enabled])
32+
dnl Disable exceptions because PHP is written in C and loads this C++ module, handle errors manually.
33+
dnl Disable development checks of C simdutf library in php debug builds (can manually override)
34+
PHP_NEW_EXTENSION(simdutf, [
35+
php_simdutf.cpp \
36+
src/php_base64.cpp \
37+
src/php_validate.cpp \
38+
src/php_count.cpp \
39+
src/php_convert_latin1.cpp \
40+
src/php_convert_utf8.cpp \
41+
src/php_convert_utf16.cpp \
42+
src/php_convert_utf32.cpp \
43+
src/php_length.cpp \
44+
src/php_endianness.cpp \
45+
src/php_trim.cpp \
46+
src/php_encoding.cpp \
47+
src/simdutf.cpp],
48+
$ext_shared,, "-std=c++17 -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1", cxx)
49+
50+
PHP_ADD_INCLUDE($ext_srcdir/src)
51+
52+
PHP_INSTALL_HEADERS([ext/simdutf], [php_simdutf.h])
53+
PHP_ADD_MAKEFILE_FRAGMENT
54+
PHP_ADD_BUILD_DIR(src, 1)
55+
fi

config.w32

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
ARG_ENABLE("simdutf", "whether to enable simdutf support", "no");
2+
3+
if (PHP_SIMDUTF == "yes") {
4+
AC_DEFINE('HAVE_SIMDUTF', 1, 'Have simdutf support', false);
5+
6+
// NOTE: The linker requires that the files have different basenames, so simdutf.cpp was
7+
// renamed to php_simdutf.cpp
8+
EXTENSION('simdutf',
9+
'php_simdutf.cpp',
10+
'yes',
11+
'/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 /std:c++latest');
12+
ADD_SOURCES(configure_module_dirname + '/src', 'simdsimdutf.cpp simdsimdutf_bindings.cpp', 'simdutf');
13+
ADD_FLAG('CFLAGS_SIMDUTF', '/I' + configure_module_dirname);
14+
PHP_INSTALL_HEADERS('ext/simdutf', 'php_simdutf.h src/simdutf_bindings_defs.h');
15+
}
16+
// vim:ft=javascript

php_simdutf.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
extern "C" {
2+
#ifdef HAVE_CONFIG_H
3+
#include "config.h"
4+
#endif
5+
6+
#include "php.h"
7+
#include "zend_exceptions.h"
8+
#include "main/SAPI.h"
9+
#include "ext/standard/info.h"
10+
11+
#include "php_simdutf.h"
12+
}
13+
14+
#include "simdutf.h"
15+
#include "simdutf_arginfo.h"
16+
17+
PHP_RINIT_FUNCTION(simdutf)
18+
{
19+
#if defined(ZTS) && defined(COMPILE_DL_SIMDUTF_PHP_EXT)
20+
ZEND_TSRMLS_CACHE_UPDATE();
21+
#endif
22+
return SUCCESS;
23+
}
24+
25+
PHP_MINFO_FUNCTION(simdutf)
26+
{
27+
const simdutf::implementation *impl = simdutf::get_active_implementation();
28+
29+
php_info_print_table_start();
30+
php_info_print_table_row(2, "simdutf support", "enabled");
31+
php_info_print_table_row(2, "simdutf extension version", PHP_SIMDUTF_VERSION);
32+
php_info_print_table_row(2, "simdutf extension support", PHP_SIMDUTF_SUPPORT_URL);
33+
php_info_print_table_row(2, "simdutf library version", SIMDUTF_VERSION);
34+
php_info_print_table_row(2, "simdutf implementation name", impl->name().c_str());
35+
php_info_print_table_row(2, "simdutf implementation description", impl->description().c_str());
36+
php_info_print_table_end();
37+
}
38+
39+
/* Module initialization */
40+
PHP_MINIT_FUNCTION(simdutf)
41+
{
42+
// Register encoding type constants
43+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UNSPECIFIED", PHP_SIMDUTF_ENCODING_UNSPECIFIED, CONST_CS | CONST_PERSISTENT);
44+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UTF8", PHP_SIMDUTF_ENCODING_UTF8, CONST_CS | CONST_PERSISTENT);
45+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UTF16_LE", PHP_SIMDUTF_ENCODING_UTF16_LE, CONST_CS | CONST_PERSISTENT);
46+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UTF16_BE", PHP_SIMDUTF_ENCODING_UTF16_BE, CONST_CS | CONST_PERSISTENT);
47+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UTF32_LE", PHP_SIMDUTF_ENCODING_UTF32_LE, CONST_CS | CONST_PERSISTENT);
48+
REGISTER_LONG_CONSTANT("SIMDUTF_ENCODING_UTF32_BE", PHP_SIMDUTF_ENCODING_UTF32_BE, CONST_CS | CONST_PERSISTENT);
49+
50+
return SUCCESS;
51+
}
52+
53+
/* Module entry */
54+
zend_module_entry simdutf_module_entry = {
55+
STANDARD_MODULE_HEADER,
56+
"simdutf",
57+
ext_functions,
58+
PHP_MINIT(simdutf),
59+
NULL,
60+
PHP_RINIT(simdutf),
61+
NULL,
62+
PHP_MINFO(simdutf),
63+
PHP_SIMDUTF_VERSION,
64+
STANDARD_MODULE_PROPERTIES
65+
};
66+
67+
#ifdef COMPILE_DL_SIMDUTF
68+
#ifdef ZTS
69+
ZEND_TSRMLS_CACHE_DEFINE();
70+
#endif
71+
ZEND_GET_MODULE(simdutf)
72+
#endif

0 commit comments

Comments
 (0)