Skip to content

Added initial support for Bytes type #2831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions integration_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -595,6 +595,7 @@ RUN(NAME test_set_discard LABELS cpython llvm llvm_jit)
RUN(NAME test_set_from_list LABELS cpython llvm llvm_jit)
RUN(NAME test_set_clear LABELS cpython llvm)
RUN(NAME test_set_pop LABELS cpython llvm)
RUN(NAME test_bytes_01 LABELS cpython llvm llvm_jit)
RUN(NAME test_global_set LABELS cpython llvm llvm_jit)
RUN(NAME test_for_loop LABELS cpython llvm llvm_jit c)
RUN(NAME modules_01 LABELS cpython llvm llvm_jit c wasm wasm_x86 wasm_x64)
20 changes: 20 additions & 0 deletions integration_tests/test_bytes_01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
def f():
a: bytes = b"This is a test string"
b: bytes = b"This is another test string"
c: bytes = b"""Bigger test string with docstrings
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
eiusmod tempor incididunt ut labore et dolore magna aliqua. """


def g(a: bytes) -> bytes:
return a


def h() -> bytes:
bar: bytes
bar = g(b"fiwabcd")
return b"12jw19\\xq0"


f()
h()
2 changes: 2 additions & 0 deletions src/libasr/ASR.asdl
Original file line number Diff line number Diff line change
@@ -140,6 +140,7 @@ expr
| StringOrd(expr arg, ttype type, expr? value)
| StringChr(expr arg, ttype type, expr? value)
| StringFormat(expr fmt, expr* args, string_format_kind kind, ttype type, expr? value)
| BytesConstant(string s, ttype type)
| CPtrCompare(expr left, cmpop op, expr right, ttype type, expr? value)
| SymbolicCompare(expr left, cmpop op, expr right, ttype type, expr? value)
| DictConstant(expr* keys, expr* values, ttype type)
@@ -198,6 +199,7 @@ ttype
| Real(int kind)
| Complex(int kind)
| Character(int kind, int len, expr? len_expr)
| Byte(int kind, int len, expr? len_expr)
| Logical(int kind)
| Set(ttype type)
| List(ttype type)
3 changes: 2 additions & 1 deletion src/libasr/asdl_cpp.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,9 @@
Generate C++ AST node definitions from an ASDL description.
"""

import sys
import os
import sys

import asdl


42 changes: 41 additions & 1 deletion src/libasr/asr_utils.h
Original file line number Diff line number Diff line change
@@ -207,6 +207,9 @@ static inline int extract_kind_from_ttype_t(const ASR::ttype_t* type) {
case ASR::ttypeType::Character: {
return ASR::down_cast<ASR::Character_t>(type)->m_kind;
}
case ASR::ttypeType::Byte: {
return ASR::down_cast<ASR::Byte_t>(type)->m_kind;
}
case ASR::ttypeType::Logical: {
return ASR::down_cast<ASR::Logical_t>(type)->m_kind;
}
@@ -251,6 +254,10 @@ static inline void set_kind_to_ttype_t(ASR::ttype_t* type, int kind) {
ASR::down_cast<ASR::Character_t>(type)->m_kind = kind;
break;
}
case ASR::ttypeType::Byte: {
ASR::down_cast<ASR::Byte_t>(type)->m_kind = kind;
break;
}
case ASR::ttypeType::Logical: {
ASR::down_cast<ASR::Logical_t>(type)->m_kind = kind;
break;
@@ -542,6 +549,9 @@ static inline std::string type_to_str(const ASR::ttype_t *t)
case ASR::ttypeType::Character: {
return "character";
}
case ASR::ttypeType::Byte: {
return "byte";
}
case ASR::ttypeType::Tuple: {
return "tuple";
}
@@ -990,7 +1000,8 @@ static inline bool is_value_constant(ASR::expr_t *a_value) {
case ASR::exprType::ImpliedDoLoop:
case ASR::exprType::PointerNullConstant:
case ASR::exprType::ArrayConstant:
case ASR::exprType::StringConstant: {
case ASR::exprType::StringConstant:
case ASR::exprType::BytesConstant: {
return true;
}
case ASR::exprType::RealBinOp:
@@ -1421,6 +1432,9 @@ static inline std::string get_type_code(const ASR::ttype_t *t, bool use_undersco
case ASR::ttypeType::Character: {
return "str";
}
case ASR::ttypeType::Byte: {
return "bytes";
}
case ASR::ttypeType::Tuple: {
ASR::Tuple_t *tup = ASR::down_cast<ASR::Tuple_t>(t);
std::string result = "tuple";
@@ -1608,6 +1622,9 @@ static inline std::string type_to_str_python(const ASR::ttype_t *t,
case ASR::ttypeType::Character: {
return "str";
}
case ASR::ttypeType::Byte: {
return "bytes";
}
case ASR::ttypeType::Tuple: {
ASR::Tuple_t *tup = ASR::down_cast<ASR::Tuple_t>(t);
std::string result = "tuple[";
@@ -2148,6 +2165,7 @@ inline size_t extract_dimensions_from_ttype(ASR::ttype_t *x,
case ASR::ttypeType::Real:
case ASR::ttypeType::Complex:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Enum:
@@ -2419,6 +2437,7 @@ inline bool ttype_set_dimensions(ASR::ttype_t** x,
case ASR::ttypeType::Real:
case ASR::ttypeType::Complex:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Enum:
@@ -2540,6 +2559,12 @@ static inline ASR::ttype_t* duplicate_type(Allocator& al, const ASR::ttype_t* t,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
break;
}
case ASR::ttypeType::Byte: {
ASR::Byte_t* tnew = ASR::down_cast<ASR::Byte_t>(t);
t_ = ASRUtils::TYPE(ASR::make_Byte_t(al, t->base.loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
break;
}
case ASR::ttypeType::StructType: {
ASR::StructType_t* tnew = ASR::down_cast<ASR::StructType_t>(t);
t_ = ASRUtils::TYPE(ASR::make_StructType_t(al, t->base.loc,
@@ -2696,6 +2721,11 @@ static inline ASR::ttype_t* duplicate_type_without_dims(Allocator& al, const ASR
return ASRUtils::TYPE(ASR::make_Character_t(al, loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
}
case ASR::ttypeType::Byte: {
ASR::Byte_t* tnew = ASR::down_cast<ASR::Byte_t>(t);
return ASRUtils::TYPE(ASR::make_Byte_t(al, loc,
tnew->m_kind, tnew->m_len, tnew->m_len_expr));
}
case ASR::ttypeType::StructType: {
ASR::StructType_t* tstruct = ASR::down_cast<ASR::StructType_t>(t);
return ASRUtils::TYPE(ASR::make_StructType_t(al, t->base.loc,
@@ -3123,6 +3153,11 @@ inline bool types_equal(ASR::ttype_t *a, ASR::ttype_t *b,
ASR::Character_t *b2 = ASR::down_cast<ASR::Character_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::Byte) : {
ASR::Byte_t *a2 = ASR::down_cast<ASR::Byte_t>(a);
ASR::Byte_t *b2 = ASR::down_cast<ASR::Byte_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::List) : {
ASR::List_t *a2 = ASR::down_cast<ASR::List_t>(a);
ASR::List_t *b2 = ASR::down_cast<ASR::List_t>(b);
@@ -3306,6 +3341,11 @@ inline bool types_equal_with_substitution(ASR::ttype_t *a, ASR::ttype_t *b,
ASR::Character_t *b2 = ASR::down_cast<ASR::Character_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::Byte) : {
ASR::Byte_t *a2 = ASR::down_cast<ASR::Byte_t>(a);
ASR::Byte_t *b2 = ASR::down_cast<ASR::Byte_t>(b);
return (a2->m_kind == b2->m_kind);
}
case (ASR::ttypeType::List) : {
ASR::List_t *a2 = ASR::down_cast<ASR::List_t>(a);
ASR::List_t *b2 = ASR::down_cast<ASR::List_t>(b);
91 changes: 89 additions & 2 deletions src/libasr/codegen/asr_to_llvm.cpp
Original file line number Diff line number Diff line change
@@ -82,6 +82,23 @@ void string_init(llvm::LLVMContext &context, llvm::Module &module,
builder.CreateCall(fn, args);
}

void bytes_init(llvm::LLVMContext &context, llvm::Module &module,
llvm::IRBuilder<> &builder, llvm::Value* arg_size, llvm::Value* arg_bytes) {
std::string func_name = "_lfortran_bytes_init";
llvm::Function *fn = module.getFunction(func_name);
if (!fn) {
llvm::FunctionType *function_type = llvm::FunctionType::get(
llvm::Type::getVoidTy(context), {
llvm::Type::getInt32Ty(context),
llvm::Type::getInt8PtrTy(context)
}, true);
fn = llvm::Function::Create(function_type,
llvm::Function::ExternalLinkage, func_name, module);
}
std::vector<llvm::Value*> args = {arg_size, arg_bytes};
builder.CreateCall(fn, args);
}

class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
{
private:
@@ -143,7 +160,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
bool prototype_only;
llvm::StructType *complex_type_4, *complex_type_8;
llvm::StructType *complex_type_4_ptr, *complex_type_8_ptr;
llvm::PointerType *character_type;
llvm::PointerType *character_type, *byte_type;
llvm::PointerType *list_type;
std::vector<std::string> struct_type_stack;

@@ -910,6 +927,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
complex_type_4_ptr = llvm_utils->complex_type_4_ptr;
complex_type_8_ptr = llvm_utils->complex_type_8_ptr;
character_type = llvm_utils->character_type;
byte_type = llvm_utils->character_type;
list_type = llvm::Type::getInt8PtrTy(context);

llvm::Type* bound_arg = static_cast<llvm::Type*>(arr_descr->get_dimension_descriptor_type(true));
@@ -948,7 +966,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
prototype_only = false;

// TODO: handle dependencies across modules and main program

;
// Then do all the modules in the right order
std::vector<std::string> build_order
= determine_module_dependencies(x);
@@ -2879,6 +2897,25 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
}
}
llvm_symtab[h] = ptr;
} else if (x.m_type->type == ASR::ttypeType::Byte) {
llvm::Constant *ptr = module->getOrInsertGlobal(x.m_name,
character_type);
if (!external) {
if (init_value) {
module->getNamedGlobal(x.m_name)->setInitializer(
init_value);
} else {
module->getNamedGlobal(x.m_name)->setInitializer(
llvm::Constant::getNullValue(character_type)
);
ASR::Byte_t *t = down_cast<ASR::Byte_t>(x.m_type);
if( t->m_len >= 0 ) {
strings_to_be_allocated.insert(std::pair(ptr, llvm::ConstantInt::get(
context, llvm::APInt(32, t->m_len+1))));
}
}
}
llvm_symtab[h] = ptr;
} else if( x.m_type->type == ASR::ttypeType::CPtr ) {
llvm::Type* void_ptr = llvm::Type::getVoidTy(context)->getPointerTo();
llvm::Constant *ptr = module->getOrInsertGlobal(x.m_name,
@@ -3889,6 +3926,36 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
} else {
throw CodeGenError("Unsupported len value in ASR " + std::to_string(strlen));
}
} else if (is_a<ASR::Byte_t>(*v->m_type) && !is_array_type && !is_list) {
ASR::Byte_t *t = down_cast<ASR::Byte_t>(v->m_type);
target_var = ptr;
int byte_len = t->m_len;
if (byte_len >= 0 || byte_len == -3) {
llvm::Value *arg_size;
if (byte_len == -3) {
LCOMPILERS_ASSERT(t->m_len_expr)
this->visit_expr(*t->m_len_expr);
arg_size = builder->CreateAdd(builder->CreateSExtOrTrunc(tmp,
llvm::Type::getInt32Ty(context)),
llvm::ConstantInt::get(context, llvm::APInt(32, 1)) );
} else {
// Compile time length
arg_size = llvm::ConstantInt::get(context,
llvm::APInt(32, byte_len+1));
}
llvm::Value *init_value = LLVM::lfortran_malloc(context, *module, *builder, arg_size);
string_init(context, *module, *builder, arg_size, init_value);
builder->CreateStore(init_value, target_var);
if (v->m_intent == intent_local) {
strings_to_be_deallocated.push_back(al, CreateLoad(target_var));
}
} else if (byte_len == -2) {
// Allocatable string. Initialize to `nullptr` (unallocated)
llvm::Value *init_value = llvm::Constant::getNullValue(type);
builder->CreateStore(init_value, target_var);
} else {
throw CodeGenError("Unsupported bytes len value in ASR " + std::to_string(byte_len));
}
} else if (is_list) {
ASR::List_t* asr_list = ASR::down_cast<ASR::List_t>(v->m_type);
std::string type_code = ASRUtils::get_type_code(asr_list->m_type);
@@ -7072,6 +7139,10 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
tmp = builder->CreateGlobalStringPtr(x.m_s);
}

void visit_BytesConstant(const ASR::BytesConstant_t &x) {
tmp = builder->CreateGlobalStringPtr(x.m_s);
}

inline void fetch_ptr(ASR::Variable_t* x) {
uint32_t x_h = get_hash((ASR::asr_t*)x);
LCOMPILERS_ASSERT(llvm_symtab.find(x_h) != llvm_symtab.end());
@@ -7128,6 +7199,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
case ASR::ttypeType::Complex:
case ASR::ttypeType::StructType:
case ASR::ttypeType::Character:
case ASR::ttypeType::Byte:
case ASR::ttypeType::Logical:
case ASR::ttypeType::Class: {
if( t2->type == ASR::ttypeType::StructType ) {
@@ -8848,6 +8920,21 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
target_type = character_type;
break;
}
case (ASR::ttypeType::Byte) : {
ASR::Variable_t *orig_arg = nullptr;
if( func_subrout->type == ASR::symbolType::Function ) {
ASR::Function_t* func = down_cast<ASR::Function_t>(func_subrout);
orig_arg = ASRUtils::EXPR2VAR(func->m_args[i]);
} else {
throw CodeGenError("ICE: expected func_subrout->type == ASR::symbolType::Function.");
}
if (orig_arg->m_abi == ASR::abiType::BindC) {
character_bindc = true;
}

target_type = character_type;
break;
}
case (ASR::ttypeType::Logical) :
target_type = llvm::Type::getInt1Ty(context);
break;
Loading