This commit is contained in:
2025-05-28 14:41:02 -05:00
commit 6d366537dd
73 changed files with 4448 additions and 0 deletions

190
lib/fs/ramfs/ramfs.c Normal file
View File

@ -0,0 +1,190 @@
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <fs/ramfs.h>
static ramfs_file_t ramfs_files[RAMFS_MAX_FILES];
static ramfs_directory_t root_directory = { .name = "/", .file_count = 0 };
void ramfs_init()
{
memset(ramfs_files, 0, sizeof(ramfs_files));
root_directory.file_count = 0;
}
ramfs_file_t* ramfs_create_file(const char* name, uint32_t size, uint8_t permissions)
{
for (uint32_t i = 0; i < RAMFS_MAX_FILES; i++) {
if (ramfs_files[i].name[0] == '\0') {
// Create the file
ramfs_file_t* file = &ramfs_files[i];
strncpy(file->name, name, RAMFS_MAX_PATH_LEN);
file->size = size;
file->data = (size > 0) ? malloc(size) : NULL;
file->permissions = permissions;
if (root_directory.file_count < RAMFS_MAX_FILES) {
root_directory.files[root_directory.file_count++] = file;
}
return file;
}
}
return NULL;
}
bool ramfs_delete_file(const char* name)
{
for (uint32_t i = 0; i < root_directory.file_count; i++) {
if (strcmp(root_directory.files[i]->name, name) == 0) {
free(root_directory.files[i]->data);
root_directory.files[i] = root_directory.files[root_directory.file_count - 1];
root_directory.file_count--;
return true;
}
}
return false;
}
ramfs_file_t* ramfs_find_file(const char* name)
{
for (uint32_t i = 0; i < root_directory.file_count; i++) {
if (strcmp(root_directory.files[i]->name, name) == 0) {
return root_directory.files[i];
}
}
return NULL;
}
bool ramfs_resize_file(const char* name, uint32_t new_size)
{
ramfs_file_t* file = ramfs_find_file(name);
if (file)
{
file->data = realloc(file->data, new_size);
file->size = new_size;
return true;
}
return false;
}
bool ramfs_append_to_file(const char* name, const uint8_t* data, uint32_t data_size)
{
ramfs_file_t* file = ramfs_find_file(name);
if (file && (file->permissions & RAMFS_PERM_WRITE))
{
file->data = realloc(file->data, file->size + data_size + 1);
if (!file->data) return false;
memcpy(file->data + file->size, data, data_size);
file->size += data_size;
file->data[file->size] = '\0';
return true;
}
return false;
}
bool ramfs_create_directory(const char* name)
{
if (root_directory.file_count < RAMFS_MAX_FILES) {
ramfs_directory_t* dir = (ramfs_directory_t*)malloc(sizeof(ramfs_directory_t));
strncpy(dir->name, name, RAMFS_MAX_PATH_LEN);
dir->file_count = 0;
root_directory.files[root_directory.file_count++] = (ramfs_file_t*)dir;
return true;
}
return false;
}
bool ramfs_delete_directory(const char* name)
{
for (uint32_t i = 0; i < root_directory.file_count; i++) {
if (strcmp(root_directory.files[i]->name, name) == 0) {
// Delete the directory (along with its files)
ramfs_directory_t* dir = (ramfs_directory_t*)root_directory.files[i];
for (uint32_t j = 0; j < dir->file_count; j++) {
free(dir->files[j]->data);
}
free(dir);
root_directory.files[i] = root_directory.files[root_directory.file_count - 1];
root_directory.file_count--;
return true;
}
}
return false;
}
bool ramfs_check_permissions(const char* name, uint8_t required_permissions)
{
ramfs_file_t* file = ramfs_find_file(name);
if (file) {
return (file->permissions & required_permissions) == required_permissions;
}
return false;
}
void ramfs_list_files()
{
printf("Files in RAMFS:\n");
for (uint32_t i = 0; i < root_directory.file_count; i++) {
printf("%s (Size: %u bytes)\n", root_directory.files[i]->name, root_directory.files[i]->size);
}
}
void ramfs_list_directories()
{
printf("Directories in RAMFS:\n");
for (uint32_t i = 0; i < root_directory.file_count; i++) {
if (root_directory.files[i]->data != NULL) {
printf("%s\n", root_directory.files[i]->name);
}
}
}
bool ramfs_read_file(const char* name, uint8_t* buffer, uint32_t buffer_size)
{
ramfs_file_t* file = ramfs_find_file(name);
if (file && (file->permissions & RAMFS_PERM_READ))
{
uint32_t bytes_to_read = (file->size < buffer_size) ? file->size : buffer_size;
memcpy(buffer, file->data, bytes_to_read);
return true;
}
return false;
}
bool ramfs_write_file(const char* name, const uint8_t* data, uint32_t size)
{
return ramfs_append_to_file(name, data, size);
}
bool ramfs_overwrite_file(const char* name, const uint8_t* data, uint32_t size)
{
ramfs_file_t* file = ramfs_find_file(name);
if (!file || !(file->permissions & RAMFS_PERM_WRITE)) {
return false;
}
// Allocate space for data + null terminator
uint8_t* new_data = realloc(file->data, size + 1);
if (!new_data && size > 0) {
return false; // realloc failed
}
file->data = new_data;
if (size > 0 && data) {
memcpy(file->data, data, size);
}
file->data[size] = '\0'; // Ensure null-terminated
file->size = size;
return true;
}

26
lib/math/software_float.c Normal file
View File

@ -0,0 +1,26 @@
#include <math/software_float.h>
/* A float = FLOAT_SIGN * FLOAT_MANTISSA * (2 ^ FLOAT_EXPONENT) */
soft_float32_t decode_float(uint32_t raw)
{
soft_float32_t f;
f.sign = (raw >> 31) & 1;
f.exponent = (raw >> 23) & 0xFF;
f.mantissa = raw & 0x7FFFFF;
return f;
}
soft_float64_t decode_double(uint64_t raw)
{
soft_float64_t d;
d.sign = (raw >> 63) & 1;
d.exponent = (raw >> 52) & 0x7FF;
d.mantissa = raw & 0xFFFFFFFFFFFFF;
return d;
}

136
lib/mm/heap.c Normal file
View File

@ -0,0 +1,136 @@
#include <stdio.h>
#include <string.h>
#include <mm/heap.h>
#include <mm/pmm.h>
#include <mm/paging.h>
#define ALIGNMENT 8
#define ALIGN(size) (((size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
typedef struct block_header {
size_t size;
struct block_header* next;
int free;
} block_header_t;
#define BLOCK_SIZE sizeof(block_header_t)
static uint8_t* heap_base = (uint8_t*)HEAP_START;
static uint8_t* heap_end = (uint8_t*)(HEAP_START + HEAP_SIZE);
static block_header_t* free_list = NULL;
void heap_init(void)
{
free_list = (block_header_t*)heap_base;
free_list->size = HEAP_SIZE - BLOCK_SIZE;
free_list->next = NULL;
free_list->free = 1;
}
void* malloc(size_t size)
{
size = ALIGN(size);
block_header_t* curr = free_list;
while (curr)
{
if (curr->free && curr->size >= size)
{
/* Split if there's space for another block */
if (curr->size >= size + BLOCK_SIZE + ALIGNMENT)
{
block_header_t* new_block = (block_header_t*)((uint8_t*)curr + BLOCK_SIZE + size);
new_block->size = curr->size - size - BLOCK_SIZE;
new_block->next = curr->next;
new_block->free = 1;
curr->next = new_block;
curr->size = size;
}
curr->free = 0;
return (void*)((uint8_t*)curr + BLOCK_SIZE);
}
curr = curr->next;
}
printd("Malloc failed due to lack of free memory\n");
return NULL;
}
void* calloc(size_t nmemb, size_t size)
{
size_t total = nmemb * size;
void* ptr = malloc(total);
if (ptr)
{
memset(ptr, 0, total);
}
return ptr;
}
void* realloc(void* ptr, size_t size)
{
if (!ptr)
{
return malloc(size);
}
if (size == 0)
{
free(ptr);
return NULL;
}
block_header_t* block = (block_header_t*)((uint8_t*)ptr - BLOCK_SIZE);
if (block->size >= size)
{
return ptr;
}
void* new_ptr = malloc(size);
if (new_ptr) {
memcpy(new_ptr, ptr, block->size);
free(ptr);
}
return new_ptr;
}
void free(void* ptr)
{
if (!ptr)
{
return;
}
block_header_t* block = (block_header_t*)((uint8_t*)ptr - BLOCK_SIZE);
block->free = 1;
/* Forward coalescing */
if (block->next && block->next->free)
{
block->size += BLOCK_SIZE + block->next->size;
block->next = block->next->next;
}
/* Backward coalescing */
block_header_t* prev = NULL;
block_header_t* curr = free_list;
while (curr && curr != block)
{
prev = curr;
curr = curr->next;
}
if (prev && prev->free)
{
prev->size += BLOCK_SIZE + block->size;
prev->next = block->next;
}
}

82
lib/mm/paging.c Normal file
View File

@ -0,0 +1,82 @@
#include <string.h>
#include <stdio.h>
#include <mm/paging.h>
#include <mm/pmm.h>
#define PAGE_DIRECTORY_ENTRIES 1024
#define PAGE_TABLE_ENTRIES 1024
#define PAGE_SIZE 4096
typedef uint32_t page_directory_entry_t;
typedef uint32_t page_table_entry_t;
static page_directory_entry_t* page_directory = (page_directory_entry_t*)0x9C000; /* Must be page-aligned */
static page_table_entry_t* page_tables[PAGE_DIRECTORY_ENTRIES];
extern void _enable_paging_asm(void);
void paging_init(void)
{
/* Allocate and clear the page directory */
page_directory = (uint32_t*)alloc_page();
memset(page_directory, 0, 4096);
/* Allocate and set up the first identity-mapped page table (04MB) */
uint32_t* first_table = (uint32_t*)alloc_page();
memset(first_table, 0, 4096);
for (uint32_t i = 0; i < 1024; i++)
{
first_table[i] = (i * 0x1000) | 3; /* Present | RW */
}
page_directory[0] = ((uint32_t)first_table) | 3;
/* --- Map 4MB heap at 0xC0000000 --- */
uint32_t* heap_table = (uint32_t*)alloc_page();
memset(heap_table, 0, 4096);
for (uint32_t i = 0; i < 1024; i++) /* 256 pages = 1MB */
{
uint32_t phys = (uint32_t)alloc_page();
if (phys == 0)
{
printf("Out of physical memory during heap mapping!\n");
while (1);
}
heap_table[i] = phys | 3; /* Present | RW */
}
/* 0xC0000000 >> 22 == 768 */
page_directory[768] = ((uint32_t)heap_table) | 3;
// Load page directory
asm volatile ("mov %0, %%cr3" : : "r"(page_directory));
/* Enable paging */
_enable_paging_asm();
}
void map_page(void* phys_addr, void* virt_addr)
{
uint32_t pd_index = ((uint32_t)virt_addr >> 22) & 0x3FF;
uint32_t pt_index = ((uint32_t)virt_addr >> 12) & 0x3FF;
/* Allocate page table if necessary */
if (!(page_directory[pd_index] & 1))
{
void* pt_phys = alloc_page();
page_tables[pd_index] = (page_table_entry_t*)((uint32_t)pt_phys + 0xC0000000); /* Map it higher */
for (int i = 0; i < PAGE_TABLE_ENTRIES; i++)
{
page_tables[pd_index][i] = 0x00000002; /* Not present */
}
page_directory[pd_index] = ((uint32_t)pt_phys) | 0x3; /* Present, R/W */
}
page_table_entry_t* page_table = (page_table_entry_t*)((page_directory[pd_index] & 0xFFFFF000) | 0xC0000000);
page_table[pt_index] = ((uint32_t)phys_addr & 0xFFFFF000) | 0x3; /* Present, R/W */
asm volatile ("invlpg (%0)" :: "r" (virt_addr) : "memory");
}

79
lib/mm/pmm.c Normal file
View File

@ -0,0 +1,79 @@
#include <stddef.h>
#include <mm/pmm.h>
#define PAGE_SIZE 4096
#define BITMAP_SIZE (1024 * 1024) /* Supports up to 4GB RAM (1 bit per page) */
static uint8_t bitmap[BITMAP_SIZE / 8];
static uint32_t total_pages;
static uint32_t used_pages = 0;
static inline void set_bit(uint32_t idx)
{
bitmap[idx / 8] |= (1 << (idx % 8));
}
static inline void clear_bit(uint32_t idx)
{
bitmap[idx / 8] &= ~(1 << (idx % 8));
}
static inline int test_bit(uint32_t idx)
{
return (bitmap[idx / 8] >> (idx % 8)) & 1;
}
void pmm_init(multiboot_info_t* mb_info)
{
total_pages = 0x100000; /* 4GB / 4KB = 1M pages */
for (uint32_t i = 0; i < total_pages / 8; i++)
{
bitmap[i] = 0xFF; /* Mark all as used */
}
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
while ((uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length)
{
if (mmap->type == 1) /* Usable */
{
uint64_t base = mmap->addr;
uint64_t len = mmap->len;
for (uint64_t addr = base; addr < base + len; addr += PAGE_SIZE)
{
if (addr >= 0x210000) /* Skip first 2.1MB, or ≈ 2.06MiB */
{
uint32_t idx = addr / PAGE_SIZE;
if (idx >= total_pages)
{
continue; /* skip entries above 4GB */
}
clear_bit(idx);
used_pages--;
}
}
}
mmap = (multiboot_memory_map_t*)((uint32_t)mmap + mmap->size + sizeof(mmap->size));
}
}
void* alloc_page(void) {
for (uint32_t i = 0; i < total_pages; i++)
{
if (!test_bit(i))
{
set_bit(i);
used_pages++;
return (void*)(i * PAGE_SIZE);
}
}
return NULL; /* Out of memory */
}
void free_page(void* ptr)
{
uint32_t idx = (uint32_t)ptr / PAGE_SIZE;
clear_bit(idx);
}

183
lib/printf.c Normal file
View File

@ -0,0 +1,183 @@
#include <stdarg.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdint.h>
#include <printf.h>
void printwc(const char* str, uint8_t color)
{
uint8_t c = terminal_getcolor();
terminal_setcolor(color);
printf(str);
terminal_setcolor(c);
}
void printd(const char* str)
{
terminal_debug_writestring(str);
}
void printdc(const char* str, uint8_t color)
{
uint8_t c = terminal_getcolor();
terminal_setcolor(color);
printd(str);
terminal_setcolor(c);
}
void printf(const char* format, ...) {
va_list args;
va_start(args, format);
for (size_t i = 0; format[i] != '\0'; ++i) {
if (format[i] == '%' && format[i + 1] != '\0') {
++i;
// Parse width (only supports zero-padding like %04x)
int width = 0;
if (format[i] == '0') {
++i;
while (format[i] >= '0' && format[i] <= '9') {
width = width * 10 + (format[i] - '0');
++i;
}
}
switch (format[i]) {
case 's': {
const char* str = va_arg(args, const char*);
terminal_writestring(str ? str : "(null)");
break;
}
case 'c': {
char c = (char) va_arg(args, int);
terminal_putchar(c);
break;
}
case 'd':
case 'i': {
int32_t val = va_arg(args, int32_t);
print_int(val);
break;
}
case 'u': {
uint32_t val = va_arg(args, uint32_t);
print_uint(val);
break;
}
case 'x': {
uint32_t val = va_arg(args, uint32_t);
print_hex(val, width, false);
break;
}
case 'X': {
uint32_t val = va_arg(args, uint32_t);
print_hex(val, width, true);
break;
}
case 'p': {
void* ptr = va_arg(args, void*);
terminal_writestring("0x");
print_hex((uint32_t)(uintptr_t)ptr, 8, true); // assumes 32-bit pointer
break;
}
case 'f':
case 'F': {
double val = va_arg(args, double); // double is promoted from float
print_double(val, 2); // 2 decimal places
break;
}
case '%': {
terminal_putchar('%');
break;
}
default: {
terminal_putchar('%');
terminal_putchar(format[i]);
break;
}
}
} else {
terminal_putchar(format[i]);
}
}
va_end(args);
}
void print_double(double value, int precision)
{
// Handle the integer part
int32_t integer_part = (int32_t)value;
double fractional_part = value - integer_part;
// Print the integer part
print_int(integer_part);
// Print the decimal point
terminal_putchar('.');
// Print the fractional part (scaled up)
fractional_part *= 1;
for (int i = 0; i < precision; i++) {
fractional_part *= 10;
}
int32_t frac_int = (int32_t)fractional_part;
print_int(frac_int);
}
void print_uint(uint32_t value) {
char buffer[11]; // Enough for 32-bit unsigned int
int i = 0;
do {
buffer[i++] = '0' + (value % 10);
value /= 10;
} while (value > 0);
while (i--) {
terminal_putchar(buffer[i]);
}
}
void print_int(int32_t value) {
char buffer[12]; // Enough for 32-bit signed int (-2147483648)
int i = 0;
uint32_t u;
if (value < 0) {
terminal_putchar('-');
u = (uint32_t)(-value);
} else {
u = (uint32_t)value;
}
// Convert to string in reverse
do {
buffer[i++] = '0' + (u % 10);
u /= 10;
} while (u > 0);
// Print in correct order
while (i--) {
terminal_putchar(buffer[i]);
}
}
void print_hex(uint32_t value, int width, bool uppercase)
{
const char* hex_chars = uppercase ? "0123456789ABCDEF" : "0123456789abcdef";
char buffer[9]; // 8 hex digits max for 32-bit
int i = 0;
do {
buffer[i++] = hex_chars[value & 0xF];
value >>= 4;
} while (value || i < width); // ensure at least 'width' digits
while (i--) {
terminal_putchar(buffer[i]);
}
}

5
lib/processes.c Normal file
View File

@ -0,0 +1,5 @@
#include <stdint.h>
#include <processes.h>

160
lib/string.c Normal file
View File

@ -0,0 +1,160 @@
#include <string.h>
#include <vector_extentions/sse.h>
extern int16_t sse_initialized;
size_t strlen(const char* str)
{
size_t len = 0;
while (str[len]) len++;
return len;
}
size_t strcmp(const char *s1, const char *s2)
{
int32_t i = 0;
while ((s1[i] == s2[i]))
{
if (s2[i++] == 0)
{
return 0;
}
}
return 1;
}
int32_t strncmp(const char *s1, const char *s2, size_t n)
{
while (n--)
{
unsigned char c1 = (unsigned char)*s1++;
unsigned char c2 = (unsigned char)*s2++;
if (c1 != c2)
{
return c1 - c2;
}
if (c1 == '\0')
{
break;
}
}
return 0;
}
size_t strcpy(char *dst, const char *src)
{
int32_t i = 0;
while ((*dst++ = *src++) != 0)
i++;
return i;
}
char *strncpy(char *dest, const char *src, uint32_t n)
{
if (sse_initialized > 0)
{
return sse2_strncpy(dest, src, n);
}
uint32_t i = 0;
for (; i < n && src[i]; ++i)
dest[i] = src[i];
for (; i < n; ++i)
dest[i] = '\0';
return dest;
}
void strcat(char *dest, const char *src)
{
char *end = (char *)dest + strlen(dest);
memcpy((void *)end, (void *)src, strlen(src));
end = end + strlen(src);
*end = '\0';
}
void *memset(void *dst, char c, uint32_t n)
{
char *temp = dst;
for (; n != 0; n--) *temp++ = c;
return dst;
}
void *memcpy(void *dst, const void *src, uint32_t n)
{
if (sse_initialized > 0)
{
return sse2_memcpy(dst, src, n);
}
char *ret = dst;
char *p = dst;
const char *q = src;
while (n--)
*p++ = *q++;
return ret;
}
int32_t memcmp(uint8_t *s1, uint8_t *s2, uint32_t n)
{
while (n--)
{
if (*s1 != *s2)
return 0;
s1++;
s2++;
}
return 1;
}
void* memclr(const void * const m_start, const size_t m_count)
{
if (sse_initialized > 0)
{
return memclr_sse2(m_start, m_count);
}
return memset(m_start, '\0', (uint32_t)m_count);
}
int32_t isspace(char c)
{
return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
}
int32_t isalpha(char c)
{
return (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
}
int32_t ischar(int32_t c)
{
return ((c >= 32) && (c <= 126));
}
char upper(char c)
{
if ((c >= 'a') && (c <= 'z'))
return (c - 32);
return c;
}
char toupper(char c)
{
if ((c >= 'a') && (c <= 'z'))
{
return (c - 32);
}
return c;
}
char lower(char c)
{
if ((c >= 'A') && (c <= 'Z'))
return (c + 32);
return c;
}

4
lib/syscall.c Normal file
View File

@ -0,0 +1,4 @@
#include <stdint.h>
#include <syscall.h>

263
lib/vector_extentions/sse.c Normal file
View File

@ -0,0 +1,263 @@
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdio.h>
#include <vector_extentions/sse.h>
void enable_sse(void) {
uint32_t cr0, cr4;
__asm__ volatile ("mov %%cr0, %0" : "=r"(cr0));
cr0 &= ~(1 << 2); // EM = 0
cr0 |= (1 << 1); // MP = 1
__asm__ volatile ("mov %0, %%cr0" :: "r"(cr0));
__asm__ volatile ("mov %%cr4, %0" : "=r"(cr4));
cr4 |= (1 << 9); // OSFXSR = 1
cr4 |= (1 << 10); // OSXMMEXCPT = 1
__asm__ volatile ("mov %0, %%cr4" :: "r"(cr4));
}
// Basic SSE test: add two arrays of 4 floats using xmm registers
__attribute__((force_align_arg_pointer))
int32_t test_sse(void) {
float a[4] __attribute__((aligned(16))) = {1.0f, 2.0f, 3.0f, 4.0f};
float b[4] __attribute__((aligned(16))) = {5.0f, 6.0f, 7.0f, 8.0f};
float result[4] __attribute__((aligned(16)));
asm volatile (
"movaps %1, %%xmm0\n\t"
"movaps %2, %%xmm2\n\t"
"addps %%xmm2, %%xmm0\n\t"
"movaps %%xmm0, %0\n\t"
: "=m" (result)
: "m" (a), "m" (b)
: "xmm0", "xmm2"
);
if (result[0] != 6.0 || result[1] != 8.0 || result[2] != 10.0 || result[3] != 12.0)
{
return -1;
}
return 0;
}
__attribute__((force_align_arg_pointer))
void sse2_add_double_arrays(double *dst, const double *a, const double *b, size_t count)
{
for (size_t i = 0; i < count; i += 2)
{
asm volatile (
"movapd (%1), %%xmm0\n\t"
"movapd (%2), %%xmm1\n\t"
"addpd %%xmm1, %%xmm0\n\t"
"movapd %%xmm0, (%0)\n\t"
:
: "r"(dst + i), "r"(a + i), "r"(b + i)
: "xmm0", "xmm1", "memory"
);
}
}
__attribute__((force_align_arg_pointer))
void sse2_add_int64_arrays(int64_t *dst, const int64_t *a, const int64_t *b, size_t count)
{
for (size_t i = 0; i < count; i += 2)
{
asm volatile (
"movdqa (%1), %%xmm0\n\t"
"movdqa (%2), %%xmm1\n\t"
"paddq %%xmm1, %%xmm0\n\t"
"movdqa %%xmm0, (%0)\n\t"
:
: "r"(dst + i), "r"(a + i), "r"(b + i)
: "xmm0", "xmm1", "memory"
);
}
}
__attribute__((force_align_arg_pointer))
void sse2_add_int32_arrays(int32_t *dst, const int32_t *a, const int32_t *b, size_t count)
{
for (size_t i = 0; i < count; i += 4)
{
asm volatile (
"movdqa (%1), %%xmm0\n\t"
"movdqa (%2), %%xmm1\n\t"
"paddd %%xmm1, %%xmm0\n\t"
"movdqa %%xmm0, (%0)\n\t"
:
: "r"(dst + i), "r"(a + i), "r"(b + i)
: "xmm0", "xmm1", "memory"
);
}
}
__attribute__((force_align_arg_pointer))
void *sse2_memcpy(void *dst, const void *src, uint32_t n)
{
uintptr_t i;
/* Align to 16 bytes if necessary */
while (((uintptr_t)dst & 15) && n > 0)
{
*((uint8_t*)dst) = *((uint8_t*)src);
dst = (uint8_t*)dst + 1;
src = (uint8_t*)src + 1;
n--;
}
/* Copy 16-byte chunks with SSE2 */
for (i = 0; i + 15 < n; i += 16)
{
asm volatile (
"movdqa (%1), %%xmm0\n\t" /* Load 16 bytes from source into xmm0 */
"movdqa %%xmm0, (%0)\n\t" /* Store 16 bytes to destination */
:
: "r"(dst + i), "r"(src + i)
: "xmm0", "memory"
);
}
/* Handle remaining bytes (less than 16) */
while (n > 0)
{
*((uint8_t*)dst) = *((uint8_t*)src);
dst = (uint8_t*)dst + 1;
src = (uint8_t*)src + 1;
n--;
}
return dst; /* Return pointer to destination */
}
__attribute__((force_align_arg_pointer))
char *sse2_strncpy(char *dest, const char *src, uint32_t n)
{
uint32_t i = 0;
/* Align initial copy */
while (((uintptr_t)(dest + i) & 15) && i < n && src[i]) {
dest[i] = src[i];
i++;
}
/* Bulk copy in 16-byte blocks */
for (; i + 15 < n; i += 16)
{
asm volatile (
"movdqu (%1), %%xmm0\n\t"
"movdqu %%xmm0, (%0)\n\t"
:
: "r"(dest + i), "r"(src + i)
: "xmm0", "memory"
);
/* Manually check if any nulls in the just-copied block */
for (int j = 0; j < 16; ++j)
{
if (src[i + j] == '\0')
{
/* Null found: pad the rest with zeros */
for (int k = j + 1; k < 16 && i + k < n; ++k)
{
dest[i + k] = '\0';
}
i += 16;
goto tail;
}
}
}
tail:
/* Final bytes */
for (; i < n && src[i]; ++i)
{
dest[i] = src[i];
}
for (; i < n; ++i)
{
dest[i] = '\0';
}
return dest;
}
__attribute__((force_align_arg_pointer))
void double_vector_to_int_vector(const double *src, int32_t *dst)
{
asm volatile (
"pxor %%xmm0, %%xmm0\n\t" /* zero xmm0 */
"cvttpd2dq (%1), %%xmm1\n\t" /* convert src to int32s */
"movq %%xmm1, %%xmm0\n\t" /* move low 64 bits (2 ints) to xmm0 */
"movdqa %%xmm0, (%0)\n\t" /* store result */
:
: "r"(dst), "r"(src)
: "xmm0", "xmm1", "memory"
);
}
__attribute__((force_align_arg_pointer))
void int_vector_to_double_vector(const int32_t *src, double *dst)
{
asm volatile (
"movq (%1), %%xmm0\n\t" /* Load 2 int32s (64 bits) into xmm0 */
"cvtdq2pd %%xmm0, %%xmm1\n\t" /* Convert to 2 doubles */
"movapd %%xmm1, (%0)\n\t" /* Store to destination */
:
: "r"(dst), "r"(src)
: "xmm0", "xmm1", "memory"
);
}
void * memclr_sse2(const void * const m_start, const size_t m_count)
{
/* "i" is our counter of how many bytes we've cleared */
size_t i;
/* find out if "m_start" is aligned on a SSE_XMM_SIZE boundary */
if ((size_t)m_start & (SSE_XMM_SIZE - 1))
{
i = 0;
/* we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary */
/* ... and lets make sure we don't copy 'too' many bytes (i < m_count) */
while (((size_t)m_start + i) & (SSE_XMM_SIZE - 1) && i < m_count)
{
asm volatile ("stosb;" :: "D"((size_t)m_start + i), "a"(0));
i++;
}
}
else
{
/* if "m_start" was aligned, set our count to 0 */
i = 0;
}
asm volatile ("pxor %%xmm0,%%xmm0"::); /* zero out XMM0 */
/* clear 64-byte chunks of memory (4 16-byte operations) */
for(; i + 64 <= m_count; i += 64)
{
asm volatile (" movdqa %%xmm0, 0(%0); " /* move 16 bytes from XMM0 to %0 + 0 */
" movdqa %%xmm0, 16(%0); "
" movdqa %%xmm0, 32(%0); "
" movdqa %%xmm0, 48(%0); "
:: "r"((size_t)m_start + i));
}
/* copy the remaining bytes (if any) */
asm volatile (" rep stosb; " :: "a"((size_t)(0)), "D"(((size_t)m_start) + i), "c"(m_count - i));
/* "i" will contain the total amount of bytes that were actually transfered */
i += m_count - i;
/* we return "m_start" + the amount of bytes that were transfered */
return (void *)(((size_t)m_start) + i);
}