Everyone should be familiar with the concept of brute forcing. In short it is an action of trying every possible combination till succeeding. Most often used in password hacking / hash guessing.
We can brute force all kinds of problems in our life and the only drawback is how much time can we spend for that. And I was curious if we can write a simple Hello world! program i C with it.
To achieve this we should:
- Iterate a string of valid C code characters.
- Try to compile it.
- Try to run it.
- Check if the output will match a desired string, in this case
Hello world
.
I wrote a simple C program to do all those steps in a loop till it reached 100 characters of code in length just to check how long will it take.
[expand]
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#define MAX_BUFF_LEN 100
#define HELLO_C_FILE_NAME ("hello.c")
#define DESIRED_STRING ("Hello world\n")
#define TEST_HELLO_WORLD ("#include <stdio.h>\nvoid main(){printf(\"Hello world\\n\");}")
static inline int inc_string(char * str, unsigned int max_len)
{
char low_limit = 32; // as for space ' '
char high_limit = 126; // as for tilde '~'
unsigned int len;
len = strlen(str) - 1; // because indexing in C starts from 0
if (len >= MAX_BUFF_LEN) return -1; // if we reached the max lenght of the buffer
if (str[len] >= high_limit) { // "xxxxx~"
for (int i = len; i >= 0; i--) {
if (str[i] < high_limit) break;
else {
str[i] = low_limit;
if (i == 0) {
str[len + 1] = low_limit;
str[len + 2] = '\0';
} else str[i - 1]++;
}
}
} else {
str[len]++;
}
return 0;
}
static inline int read_file(const char * path, char * str, unsigned int max_len)
{
FILE *fp;
long file_size;
fp = fopen(path, "r");
if (fp == NULL) {
perror("Error readind previous hello.c code!");
return(-1);
}
fseek(fp, 0L, SEEK_END);
file_size = ftell(fp);
rewind(fp);
if (file_size > max_len) {
printf("File 'hello.c' biger then max_len. Missing null terminator?\n");
fclose(fp);
return(-1);
}
if (1 != fread(str, file_size, 1, fp)) {
printf("Error reading 'hello.c'\n");
fclose(fp);
return(-1);
}
fclose(fp);
return 1;
}
static inline int write_file(char * path, char * str, unsigned int max_len)
{
FILE *fp;
fp = fopen(path, "w");
if (fp == NULL) {
perror("Error readind previous hello.c code!");
return(-1);
}
fprintf(fp, "%s", str);
fclose(fp);
return 1;
}
int main(void)
{
char buff[MAX_BUFF_LEN] = " \0";
char ret_buff[MAX_BUFF_LEN];
char command[100];
FILE *fp;
snprintf(command, sizeof(command), "tcc -run %s 2> /dev/null", HELLO_C_FILE_NAME);
read_file(HELLO_C_FILE_NAME, buff, MAX_BUFF_LEN);
for (;;) {
fp = popen(command, "r");
while (fgets(ret_buff, sizeof(ret_buff), fp) != NULL) {
if (memcmp(DESIRED_STRING, ret_buff, sizeof(DESIRED_STRING)) == 0) {
printf("YESSS\n");
exit(0);
}
}
pclose(fp);
inc_string(buff, MAX_BUFF_LEN);
write_file(HELLO_C_FILE_NAME, buff, MAX_BUFF_LEN);
}
return 0;
}
[/expand]
I compiled with gcc -Ofast
flag run it on a RAM disk mounted with sudo mount
-t tmpfs -o size=10m ramdisk ./ramdisk
, and waited…
And it never finished. After two hours it reached a string of five characters. So my curiosity is fulfilled and now I know that the GitHub Copilot or an army of interns is the closest we will get to automatic code generation.