Espresso 0.0.2a

This commit is contained in:
2026-02-12 20:33:46 -06:00
parent c0dc95e255
commit 021fdbbcef
26 changed files with 452 additions and 27315 deletions

View File

@ -301,14 +301,14 @@ void print_lint(int64_t value)
void print_hex(uint32_t value, int width, bool uppercase)
{
const char* hex_chars = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
char buffer[9]; // 8 hex digits max for 32-bit
char buffer[9]; /* 8 hex digits max for 32-bit */
int i = 0;
do
{
buffer[i++] = hex_chars[value & 0xF];
value >>= 4;
} while (value || i < width); // ensure at least 'width' digits
} while (value || i < width); /* ensure at least 'width' digits */
while (i--)
{

View File

@ -172,9 +172,9 @@ char* strchr(const char* s, int c)
{
while (*s)
{
if (*s == (char)c)
if (*s == (char) c)
{
return (char*)s;
return (char*) s;
}
s++;
}
@ -182,6 +182,22 @@ char* strchr(const char* s, int c)
return NULL;
}
int num_strchr(const char* s, int c)
{
int rv = 0;
while (*s)
{
if (*s == (char) c)
{
rv++;
}
s++;
}
return rv;
}
void* memset(void* dst, int c, size_t n)
{
@ -201,10 +217,10 @@ void* memset(void* dst, int c, size_t n)
void* memcpy(void *dst, const void *src, uint32_t n)
{
/*if (sse_initialized > 1)
if (sse_initialized > 1)
{
return sse2_memcpy(dst, src, n);
}*/
}
char *d = dst;
const char *s = src;

View File

@ -1,6 +1,8 @@
#include <types.h>
#include <stdio.h>
#include <emmintrin.h> /* SSE2 intrinsics, TODO: use these in all functions, currently only used in memclr_sse2 */
#include <vector_extensions/sse.h>
@ -20,7 +22,7 @@ void enable_sse(void)
}
/* Basic SSE test: add two arrays of 4 floats using xmm registers */
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
int32_t test_sse(void)
{
float a[4] __attribute__((aligned(16))) = {1.0f, 2.0f, 3.0f, 4.0f};
@ -45,7 +47,7 @@ int32_t test_sse(void)
return 0;
}
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
void sse2_add_double_arrays(double *dst, const double *a, const double *b, size_t count)
{
for (size_t i = 0; i < count; i += 2)
@ -62,7 +64,7 @@ void sse2_add_double_arrays(double *dst, const double *a, const double *b, size_
}
}
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
void sse2_add_int64_arrays(int64_t *dst, const int64_t *a, const int64_t *b, size_t count)
{
for (size_t i = 0; i < count; i += 2)
@ -79,7 +81,7 @@ void sse2_add_int64_arrays(int64_t *dst, const int64_t *a, const int64_t *b, siz
}
}
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
void sse2_add_int32_arrays(int32_t *dst, const int32_t *a, const int32_t *b, size_t count)
{
for (size_t i = 0; i < count; i += 4)
@ -96,7 +98,7 @@ void sse2_add_int32_arrays(int32_t *dst, const int32_t *a, const int32_t *b, siz
}
}
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
void *sse2_memcpy(void *dst, const void *src, uint32_t n)
{
uint8_t *d = (uint8_t *)dst;
@ -136,7 +138,7 @@ void *sse2_memcpy(void *dst, const void *src, uint32_t n)
}
__attribute__((force_align_arg_pointer))
__attribute__((force_align_arg_pointer, target("sse2")))
char *sse2_strncpy(char *dest, const char *src, uint32_t n)
{
uint32_t i = 0;
@ -218,48 +220,31 @@ void int_vector_to_double_vector(const int32_t *src, double *dst)
);
}
void * memclr_sse2(const void * const m_start, const size_t m_count)
__attribute__((force_align_arg_pointer, target("sse2")))
void* memclr_sse2(void *m_start, size_t m_count)
{
/* "i" is our counter of how many bytes we've cleared */
size_t i;
unsigned char *dst = m_start;
size_t i = 0;
/* find out if "m_start" is aligned on a SSE_XMM_SIZE boundary */
if ((size_t)m_start & (SSE_XMM_SIZE - 1))
while ((uintptr_t)(dst + i) & 15 && i < m_count)
{
i = 0;
/* we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary */
/* ... and lets make sure we don't copy 'too' many bytes (i < m_count) */
while (((size_t)m_start + i) & (SSE_XMM_SIZE - 1) && i < m_count)
{
asm volatile ("stosb;" :: "D"((size_t)m_start + i), "a"(0));
i++;
}
dst[i++] = 0;
}
else
__m128i zero = _mm_setzero_si128();
for (; i + 64 <= m_count; i += 64)
{
/* if "m_start" was aligned, set our count to 0 */
i = 0;
_mm_store_si128((__m128i *)(dst + i + 0), zero);
_mm_store_si128((__m128i *)(dst + i + 16), zero);
_mm_store_si128((__m128i *)(dst + i + 32), zero);
_mm_store_si128((__m128i *)(dst + i + 48), zero);
}
asm volatile ("pxor %%xmm0,%%xmm0"::); /* zero out XMM0 */
/* clear 64-byte chunks of memory (4 16-byte operations) */
for(; i + 64 <= m_count; i += 64)
for (; i < m_count; ++i)
{
asm volatile (" movdqa %%xmm0, 0(%0); " /* move 16 bytes from XMM0 to %0 + 0 */
" movdqa %%xmm0, 16(%0); "
" movdqa %%xmm0, 32(%0); "
" movdqa %%xmm0, 48(%0); "
:: "r"((size_t)m_start + i));
dst[i] = 0;
}
/* copy the remaining bytes (if any) */
asm volatile (" rep stosb; " :: "a"((size_t)(0)), "D"(((size_t)m_start) + i), "c"(m_count - i));
/* "i" will contain the total amount of bytes that were actually transfered */
i += m_count - i;
/* we return "m_start" + the amount of bytes that were transfered */
return (void *)(((size_t)m_start) + i);
return m_start;
}